jOOλ for Functional SQL in Java

Overview

jOOλ (jOOL) is a library that brings functional programming capabilities to Java, particularly useful for SQL-like operations on Java collections. It extends Java 8 Streams with missing functionality and provides SQL-inspired operations.

Dependencies

<dependencies>
<!-- jOOλ -->
<dependency>
<groupId>org.jooq</groupId>
<artifactId>jool</artifactId>
<version>0.9.15</version>
</dependency>
<!-- Java 8+ -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.24</version>
<optional>true</optional>
</dependency>
</dependencies>

Core Concepts

1. Seq - The Enhanced Stream

import org.jooq.lambda.Seq;
import org.jooq.lambda.tuple.Tuple;
import org.jooq.lambda.tuple.Tuple2;
public class JoolIntroduction {
public void basicSeqOperations() {
// Creating sequences
Seq<Integer> numbers = Seq.of(1, 2, 3, 4, 5);
Seq<String> names = Seq.of("John", "Jane", "Bob", "Alice");
// SQL-like operations
Seq<String> result = names
.filter(name -> name.startsWith("J"))
.map(String::toUpperCase)
.sorted();
result.forEach(System.out::println); // JANE, JOHN
}
}

SQL-Inspired Operations

1. Window Functions

public class WindowFunctions {
public void rankAndRowNumbers() {
Seq<Employee> employees = Seq.of(
new Employee("John", "IT", 5000),
new Employee("Jane", "IT", 6000),
new Employee("Bob", "HR", 4500),
new Employee("Alice", "HR", 5500),
new Employee("Charlie", "IT", 7000)
);
// Add row numbers
Seq<Tuple2<Long, Employee>> withRowNumbers = employees
.zipWithIndex()
.map(t -> Tuple.tuple(t.v1 + 1, t.v2));
// Rank employees by salary within department
Seq<Tuple3<String, String, Integer>> ranked = employees
.groupBy(Employee::getDepartment)
.flatMap((dept, deptEmployees) -> 
deptEmployees
.sorted(Comparator.comparing(Employee::getSalary).reversed())
.zipWithIndex()
.map(t -> Tuple.tuple(dept, t.v2.getName(), t.v1 + 1))
);
}
public void leadAndLag() {
Seq<Integer> numbers = Seq.of(1, 3, 5, 7, 9);
// Lead - next value
Seq<Tuple2<Integer, Integer>> withLead = numbers
.zip(numbers.skip(1));
// Lag - previous value
Seq<Tuple2<Integer, Integer>> withLag = numbers
.zip(numbers.limit(numbers.size() - 1));
withLead.forEach(t -> 
System.out.println("Current: " + t.v1 + ", Next: " + t.v2));
}
}

2. Advanced Grouping and Aggregation

public class AdvancedAggregations {
public void multiLevelGrouping() {
Seq<Sale> sales = Seq.of(
new Sale("NY", "Q1", "Electronics", 1000),
new Sale("NY", "Q1", "Clothing", 500),
new Sale("NY", "Q2", "Electronics", 1200),
new Sale("LA", "Q1", "Electronics", 800),
new Sale("LA", "Q2", "Clothing", 600)
);
// Group by multiple fields
Map<Tuple2<String, String>, Double> revenueByRegionAndQuarter = sales
.groupBy(s -> Tuple.tuple(s.getRegion(), s.getQuarter()))
.mapValues(seq -> seq.mapToDouble(Sale::getAmount).sum());
// Pivot-like operations
Map<String, Map<String, Double>> pivot = sales
.groupBy(Sale::getRegion)
.mapValues(regionalSales -> 
regionalSales.groupBy(Sale::getQuarter)
.mapValues(seq -> seq.mapToDouble(Sale::getAmount).sum())
);
}
public void rollupAndCube() {
Seq<Sale> sales = Seq.of(/* sales data */);
// Rollup - hierarchical aggregation
Seq<Tuple3<String, String, Double>> rollup = sales
.groupBy(s -> Tuple.tuple(s.getRegion(), s.getCategory()))
.map((tuple, seq) -> Tuple.tuple(
tuple.v1, 
tuple.v2, 
seq.mapToDouble(Sale::getAmount).sum()
));
// Add totals
Seq<Tuple3<String, String, Double>> withTotals = rollup
.concat(sales.groupBy(Sale::getRegion)
.map((region, seq) -> Tuple.tuple(region, "TOTAL", 
seq.mapToDouble(Sale::getAmount).sum())))
.concat(Seq.of(Tuple.tuple("TOTAL", "TOTAL", 
sales.mapToDouble(Sale::getAmount).sum())));
}
}

3. Joins and Set Operations

public class JoinOperations {
public void innerJoin() {
Seq<Employee> employees = Seq.of(
new Employee(1, "John", 1),
new Employee(2, "Jane", 2),
new Employee(3, "Bob", 1)
);
Seq<Department> departments = Seq.of(
new Department(1, "IT"),
new Department(2, "HR"),
new Department(3, "Finance")
);
// Inner join
Seq<Tuple2<Employee, Department>> joined = employees
.innerJoin(departments)
.on((emp, dept) -> emp.getDeptId() == dept.getId());
// More fluent join
Seq<EmployeeDTO> result = employees
.innerJoin(departments, (emp, dept) -> emp.getDeptId() == dept.getId())
.map(t -> new EmployeeDTO(t.v1.getName(), t.v2.getName()));
}
public void leftJoin() {
Seq<Employee> employees = Seq.of(/* employees */);
Seq<Department> departments = Seq.of(/* departments */);
// Left outer join
Seq<Tuple2<Employee, Optional<Department>>> leftJoined = employees
.leftOuterJoin(departments)
.on((emp, dept) -> emp.getDeptId() == dept.getId());
// Handle missing departments
Seq<EmployeeDTO> withDefaultDept = leftJoined
.map(t -> new EmployeeDTO(
t.v1.getName(),
t.v2.map(Department::getName).orElse("No Department")
));
}
public void crossJoin() {
Seq<String> categories = Seq.of("A", "B", "C");
Seq<Integer> years = Seq.of(2020, 2021, 2022);
// Cross join (cartesian product)
Seq<Tuple2<String, Integer>> allCombinations = categories
.crossJoin(years);
allCombinations.forEach(t -> 
System.out.println("Category: " + t.v1 + ", Year: " + t.v2));
}
public void setOperations() {
Seq<Integer> set1 = Seq.of(1, 2, 3, 4, 5);
Seq<Integer> set2 = Seq.of(4, 5, 6, 7, 8);
// Union
Seq<Integer> union = set1.union(set2); // 1,2,3,4,5,6,7,8
// Intersection
Seq<Integer> intersection = set1.intersect(set2); // 4,5
// Difference
Seq<Integer> difference = set1.subtract(set2); // 1,2,3
// Distinct with custom comparator
Seq<Employee> employees = Seq.of(/* employees with duplicates */);
Seq<Employee> distinctByName = employees.distinct(Employee::getName);
}
}

Functional SQL Patterns

1. Case-When Expressions

public class CaseWhenExpressions {
public void sqlCase() {
Seq<Employee> employees = Seq.of(/* employees */);
// SQL-like CASE WHEN
Seq<Tuple2<String, String>> withGrade = employees
.map(emp -> {
String grade = Seq.of(
Tuple.tuple(emp.getSalary() >= 8000, "A"),
Tuple.tuple(emp.getSalary() >= 6000, "B"),
Tuple.tuple(emp.getSalary() >= 4000, "C")
)
.filter(t -> t.v1)
.findFirst()
.map(Tuple2::v2)
.orElse("D");
return Tuple.tuple(emp.getName(), grade);
});
// More functional approach
Seq<Tuple2<String, String>> functionalCase = employees
.map(emp -> Tuple.tuple(
emp.getName(),
Seq.<Tuple2<Predicate<Employee>, String>>of(
Tuple.tuple(e -> e.getSalary() >= 8000, "A"),
Tuple.tuple(e -> e.getSalary() >= 6000, "B"),
Tuple.tuple(e -> e.getSalary() >= 4000, "C")
)
.filter(t -> t.v1.test(emp))
.findFirst()
.map(Tuple2::v2)
.orElse("D")
));
}
}

2. Windowing and Analytics

public class WindowingOperations {
public void movingAverages() {
Seq<Double> prices = Seq.of(10.5, 11.2, 12.8, 13.5, 14.2, 15.0, 14.8);
// 3-period moving average
Seq<Double> movingAvg = prices
.window(0, 2) // current, next two
.map(window -> window.avg().orElse(0.0));
// Cumulative sum
Seq<Double> cumulativeSum = prices
.scanLeft(0.0, Double::sum)
.skip(1); // skip initial 0.0
}
public void departmentRanking() {
Seq<Employee> employees = Seq.of(/* employees */);
// Rank employees within department by salary
Seq<Tuple4<String, String, Integer, Long>> ranked = employees
.groupBy(Employee::getDepartment)
.flatMap((dept, deptEmployees) -> 
deptEmployees
.sorted(Comparator.comparing(Employee::getSalary).reversed())
.zipWithIndex()
.map(t -> Tuple.tuple(
dept, 
t.v2.getName(), 
t.v2.getSalary(), 
t.v1 + 1L
))
);
// Top 3 earners per department
Seq<Employee> topEarners = employees
.groupBy(Employee::getDepartment)
.flatMap((dept, deptEmployees) -> 
deptEmployees
.sorted(Comparator.comparing(Employee::getSalary).reversed())
.limit(3)
);
}
}

Practical Use Cases

1. Data Transformation Pipeline

public class DataTransformationPipeline {
public Seq<SalesReport> generateSalesReport(Seq<Sale> rawSales) {
return rawSales
// Filter and clean data
.filter(sale -> sale.getAmount() > 0)
.filter(sale -> sale.getDate().isAfter(LocalDate.now().minusYears(1)))
// Enrich data
.map(sale -> {
String season = getSeason(sale.getDate());
boolean isWeekend = sale.getDate().getDayOfWeek()
.getValue() >= 6;
return Tuple.tuple(sale, season, isWeekend);
})
// Aggregate
.groupBy(t -> Tuple.tuple(t.v1.getRegion(), t.v2, t.v3))
.map((key, sales) -> {
double totalAmount = sales.mapToDouble(t -> t.v1.getAmount()).sum();
long transactionCount = sales.count();
double avgAmount = totalAmount / transactionCount;
return new SalesReport(
key.v1, // region
key.v2, // season
key.v3, // isWeekend
totalAmount,
transactionCount,
avgAmount
);
})
// Sort for reporting
.sorted(Comparator
.comparing(SalesReport::getRegion)
.thenComparing(SalesReport::getTotalAmount).reversed());
}
private String getSeason(LocalDate date) {
int month = date.getMonthValue();
if (month >= 3 && month <= 5) return "Spring";
if (month >= 6 && month <= 8) return "Summer";
if (month >= 9 && month <= 11) return "Autumn";
return "Winter";
}
}

2. ETL Process with jOOλ

public class ETLProcessor {
public void processCustomerData() {
Seq<Customer> customers = loadCustomers();
Seq<Order> orders = loadOrders();
Seq<Product> products = loadProducts();
Seq<CustomerAnalysis> analysis = customers
// Join with orders
.leftOuterJoin(orders, (cust, order) -> cust.getId() == order.getCustomerId())
.groupBy(t -> t.v1) // Group by customer
// Calculate customer metrics
.map((customer, customerOrders) -> {
Seq<Order> ordersSeq = customerOrders.map(Tuple2::v2);
double totalSpent = ordersSeq
.flatMap(order -> order.getItems().stream())
.mapToDouble(item -> item.getQuantity() * item.getUnitPrice())
.sum();
long orderCount = ordersSeq.count();
Set<String> purchasedCategories = ordersSeq
.flatMap(order -> order.getItems().stream())
.map(item -> products
.findFirst(p -> p.getId() == item.getProductId())
.map(Product::getCategory)
.orElse("Unknown"))
.toSet();
return new CustomerAnalysis(
customer.getId(),
customer.getName(),
totalSpent,
orderCount,
purchasedCategories,
calculateCustomerSegment(totalSpent, orderCount)
);
})
// Filter and sort
.filter(analysis -> analysis.getTotalSpent() > 1000)
.sorted(Comparator.comparing(CustomerAnalysis::getTotalSpent).reversed());
}
private String calculateCustomerSegment(double totalSpent, long orderCount) {
if (totalSpent > 5000) return "VIP";
if (totalSpent > 2000 && orderCount > 5) return "Loyal";
if (totalSpent > 1000) return "Regular";
return "Casual";
}
}

3. Financial Calculations

public class FinancialCalculations {
public Seq<PortfolioAnalysis> analyzePortfolio(Seq<Trade> trades) {
return trades
.groupBy(Trade::getSymbol)
.map((symbol, symbolTrades) -> {
Seq<Trade> sortedTrades = symbolTrades
.sorted(Comparator.comparing(Trade::getTimestamp));
// Calculate metrics
double totalVolume = sortedTrades
.mapToDouble(Trade::getVolume)
.sum();
double avgPrice = sortedTrades
.mapToDouble(Trade::getPrice)
.average()
.orElse(0.0);
// Price volatility (standard deviation)
double priceVariance = sortedTrades
.mapToDouble(trade -> Math.pow(trade.getPrice() - avgPrice, 2))
.average()
.orElse(0.0);
double volatility = Math.sqrt(priceVariance);
// Moving averages
Seq<Double> prices = sortedTrades.map(Trade::getPrice);
Seq<Double> shortMA = prices.window(0, 4).map(w -> w.avg().orElse(0.0));
Seq<Double> longMA = prices.window(0, 9).map(w -> w.avg().orElse(0.0));
return new PortfolioAnalysis(
symbol,
totalVolume,
avgPrice,
volatility,
calculateTrend(shortMA, longMA)
);
});
}
private String calculateTrend(Seq<Double> shortMA, Seq<Double> longMA) {
double lastShort = shortMA.findLast().orElse(0.0);
double lastLong = longMA.findLast().orElse(0.0);
if (lastShort > lastLong) return "Bullish";
if (lastShort < lastLong) return "Bearish";
return "Neutral";
}
}

Advanced Patterns

1. Recursive Queries

public class RecursiveOperations {
public Seq<Employee> findSubordinates(String managerId, Seq<Employee> allEmployees) {
return Seq.iterate(
allEmployees.filter(e -> e.getManagerId().equals(managerId)),
subordinates -> subordinates.isEmpty() ? Seq.empty() :
subordinates.flatMap(sub -> 
allEmployees.filter(e -> e.getManagerId().equals(sub.getId()))
)
)
.takeWhile(seq -> !seq.isEmpty())
.flatMap(Function.identity())
.distinct();
}
public Seq<String> findManagementChain(String employeeId, 
Seq<Employee> allEmployees) {
return Seq.iterate(
allEmployees.findFirst(e -> e.getId().equals(employeeId)),
current -> current.flatMap(emp -> 
allEmployees.findFirst(e -> e.getId().equals(emp.getManagerId()))
)
)
.takeWhile(Optional::isPresent)
.map(Optional::get)
.map(Employee::getName);
}
}

2. Time Series Analysis

public class TimeSeriesAnalysis {
public Seq<TimeSeriesPoint> analyzeTimeSeries(Seq<DataPoint> data) {
return data
.groupBy(dp -> dp.getTimestamp().truncatedTo(ChronoUnit.HOURS))
.map((timestamp, points) -> 
Tuple.tuple(
timestamp,
points.mapToDouble(DataPoint::getValue).avg().orElse(0.0),
points.mapToDouble(DataPoint::getValue).min().orElse(0.0),
points.mapToDouble(DataPoint::getValue).max().orElse(0.0),
points.count()
)
)
.sorted(Comparator.comparing(Tuple2::v1))
.window(1, 1) // Look at current and next point
.map(window -> {
Tuple5<Instant, Double, Double, Double, Long> current = window.get(0);
Tuple5<Instant, Double, Double, Double, Long> next = window.get(1);
double growthRate = (next.v2 - current.v2) / current.v2 * 100;
boolean isSpike = Math.abs(growthRate) > 50;
return new TimeSeriesPoint(
current.v1,
current.v2,
current.v3,
current.v4,
current.v5,
growthRate,
isSpike
);
});
}
}

Performance Optimization

1. Lazy Evaluation with Caching

public class OptimizedSequences {
public void optimizedOperations() {
Seq<Employee> employees = loadLargeEmployeeDataset();
// Use parallel processing for large datasets
Seq<String> highPaidDepartments = employees
.parallel() // Switch to parallel processing
.filter(emp -> emp.getSalary() > 50000)
.groupBy(Employee::getDepartment)
.filter((dept, deptEmployees) -> deptEmployees.count() > 10)
.map(Tuple2::v1)
.sequential(); // Switch back to sequential if needed
// Cache expensive operations
Seq<Employee> cachedSeq = employees.cache();
// Multiple operations on cached sequence
long highEarners = cachedSeq.filter(emp -> emp.getSalary() > 100000).count();
double avgSalary = cachedSeq.mapToDouble(Employee::getSalary).average().orElse(0.0);
// Batch processing for memory efficiency
cachedSeq
.batch(1000) // Process in batches of 1000
.forEach(batch -> processBatch(batch));
}
private void processBatch(Seq<Employee> batch) {
// Process batch
batch.forEach(this::processEmployee);
}
}

Testing jOOλ Code

public class JoolTest {
@Test
void testSequenceOperations() {
Seq<Integer> numbers = Seq.of(1, 2, 3, 4, 5);
Seq<Integer> result = numbers
.filter(n -> n % 2 == 0)
.map(n -> n * 2);
assertEquals(Seq.of(4, 8), result);
}
@Test
void testJoinOperations() {
Seq<Employee> employees = Seq.of(
new Employee(1, "John", 1),
new Employee(2, "Jane", 2)
);
Seq<Department> departments = Seq.of(
new Department(1, "IT"),
new Department(2, "HR")
);
Seq<String> result = employees
.innerJoin(departments, (e, d) -> e.getDeptId() == d.getId())
.map(t -> t.v1.getName() + " - " + t.v2.getName());
assertEquals(Seq.of("John - IT", "Jane - HR"), result);
}
}

Best Practices

  1. Use Lazy Evaluation: jOOλ sequences are lazy by default
  2. Cache Expensive Operations: Use .cache() for multiple terminal operations
  3. Parallel Processing: Use .parallel() for CPU-intensive operations on large datasets
  4. Batch Large Datasets: Use .batch(size) to avoid memory issues
  5. Prefer Immutable Operations: jOOλ operations are immutable
  6. Use Tuples for Complex Data: Leverage jOOλ's Tuple classes for multiple return values

jOOλ provides powerful SQL-like operations for Java collections, making data manipulation more expressive and functional while maintaining good performance characteristics.

Leave a Reply

Your email address will not be published. Required fields are marked *


Macro Nepal Helper