Java Stream API provides powerful collectors for partitioning and grouping data. Here's a comprehensive guide:
1. Basic Partitioning with partitioningBy()
Partitioning divides elements into two groups based on a predicate.
import java.util.*;
import java.util.stream.*;
import java.util.function.*;
public class PartitioningExamples {
public static void main(String[] args) {
List<Integer> numbers = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
// Basic partitioning - even vs odd numbers
Map<Boolean, List<Integer>> evenOddPartition = numbers.stream()
.collect(Collectors.partitioningBy(n -> n % 2 == 0));
System.out.println("Even numbers: " + evenOddPartition.get(true));
System.out.println("Odd numbers: " + evenOddPartition.get(false));
// Partitioning with objects
List<Person> people = Arrays.asList(
new Person("Alice", 25, "New York"),
new Person("Bob", 30, "London"),
new Person("Charlie", 22, "New York"),
new Person("Diana", 35, "Paris"),
new Person("Eve", 28, "London")
);
// Partition adults vs non-adults
Map<Boolean, List<Person>> adultsPartition = people.stream()
.collect(Collectors.partitioningBy(p -> p.getAge() >= 18));
System.out.println("\nAdults: " + adultsPartition.get(true));
System.out.println("Minors: " + adultsPartition.get(false));
}
}
class Person {
private String name;
private int age;
private String city;
public Person(String name, int age, String city) {
this.name = name;
this.age = age;
this.city = city;
}
// getters, equals, hashCode, toString
public String getName() { return name; }
public int getAge() { return age; }
public String getCity() { return city; }
@Override
public String toString() {
return name + "(" + age + ")";
}
}
2. Advanced Partitioning
Partitioning with Downstream Collectors
public class AdvancedPartitioning {
public static void main(String[] args) {
List<Person> people = Arrays.asList(
new Person("Alice", 25, "New York"),
new Person("Bob", 30, "London"),
new Person("Charlie", 22, "New York"),
new Person("Diana", 35, "Paris"),
new Person("Eve", 28, "London"),
new Person("Frank", 17, "New York")
);
// Partition by adult and count
Map<Boolean, Long> adultCount = people.stream()
.collect(Collectors.partitioningBy(
p -> p.getAge() >= 18,
Collectors.counting()
));
System.out.println("Adult count: " + adultCount.get(true));
System.out.println("Minor count: " + adultCount.get(false));
// Partition and get names only
Map<Boolean, List<String>> adultNames = people.stream()
.collect(Collectors.partitioningBy(
p -> p.getAge() >= 18,
Collectors.mapping(Person::getName, Collectors.toList())
));
System.out.println("\nAdult names: " + adultNames.get(true));
System.out.println("Minor names: " + adultNames.get(false));
// Partition and get average age
Map<Boolean, Double> averageAge = people.stream()
.collect(Collectors.partitioningBy(
p -> p.getAge() >= 18,
Collectors.averagingInt(Person::getAge)
));
System.out.println("\nAverage adult age: " + averageAge.get(true));
System.out.println("Average minor age: " + averageAge.get(false));
// Partition and group by city within partition
Map<Boolean, Map<String, List<Person>>> partitionedByCity = people.stream()
.collect(Collectors.partitioningBy(
p -> p.getAge() >= 18,
Collectors.groupingBy(Person::getCity)
));
System.out.println("\nAdults by city: " + partitionedByCity.get(true));
System.out.println("Minors by city: " + partitionedByCity.get(false));
}
}
3. Basic Grouping with groupingBy()
Grouping divides elements into multiple groups based on a classifier function.
public class BasicGrouping {
public static void main(String[] args) {
List<Person> people = Arrays.asList(
new Person("Alice", 25, "New York"),
new Person("Bob", 30, "London"),
new Person("Charlie", 22, "New York"),
new Person("Diana", 35, "Paris"),
new Person("Eve", 28, "London"),
new Person("Frank", 40, "New York")
);
// Group by city
Map<String, List<Person>> peopleByCity = people.stream()
.collect(Collectors.groupingBy(Person::getCity));
System.out.println("People by city:");
peopleByCity.forEach((city, persons) ->
System.out.println(city + ": " + persons));
// Group by age range
Map<String, List<Person>> peopleByAgeRange = people.stream()
.collect(Collectors.groupingBy(p -> {
if (p.getAge() < 25) return "Young";
else if (p.getAge() < 35) return "Adult";
else return "Senior";
}));
System.out.println("\nPeople by age range:");
peopleByAgeRange.forEach((range, persons) ->
System.out.println(range + ": " + persons));
}
}
4. Advanced Grouping
Grouping with Downstream Collectors
public class AdvancedGrouping {
public static void main(String[] args) {
List<Person> people = Arrays.asList(
new Person("Alice", 25, "New York"),
new Person("Bob", 30, "London"),
new Person("Charlie", 22, "New York"),
new Person("Diana", 35, "Paris"),
new Person("Eve", 28, "London"),
new Person("Frank", 40, "New York"),
new Person("Grace", 22, "Paris")
);
// Group by city and count
Map<String, Long> countByCity = people.stream()
.collect(Collectors.groupingBy(
Person::getCity,
Collectors.counting()
));
System.out.println("Count by city: " + countByCity);
// Group by city and get average age
Map<String, Double> averageAgeByCity = people.stream()
.collect(Collectors.groupingBy(
Person::getCity,
Collectors.averagingInt(Person::getAge)
));
System.out.println("Average age by city: " + averageAgeByCity);
// Group by city and get names only
Map<String, List<String>> namesByCity = people.stream()
.collect(Collectors.groupingBy(
Person::getCity,
Collectors.mapping(Person::getName, Collectors.toList())
));
System.out.println("Names by city: " + namesByCity);
// Group by city and get max age
Map<String, Optional<Person>> oldestByCity = people.stream()
.collect(Collectors.groupingBy(
Person::getCity,
Collectors.maxBy(Comparator.comparingInt(Person::getAge))
));
System.out.println("Oldest by city: " + oldestByCity);
// Group by city and get age statistics
Map<String, IntSummaryStatistics> ageStatsByCity = people.stream()
.collect(Collectors.groupingBy(
Person::getCity,
Collectors.summarizingInt(Person::getAge)
));
ageStatsByCity.forEach((city, stats) ->
System.out.println(city + " - Stats: " + stats));
}
}
5. Multi-level Grouping
public class MultiLevelGrouping {
public static void main(String[] args) {
List<Employee> employees = Arrays.asList(
new Employee("Alice", "Engineering", "Senior", 75000),
new Employee("Bob", "Engineering", "Junior", 55000),
new Employee("Charlie", "Engineering", "Senior", 80000),
new Employee("Diana", "Marketing", "Manager", 90000),
new Employee("Eve", "Marketing", "Junior", 50000),
new Employee("Frank", "Sales", "Senior", 70000),
new Employee("Grace", "Sales", "Junior", 45000),
new Employee("Henry", "Engineering", "Manager", 110000)
);
// Group by department and then by level
Map<String, Map<String, List<Employee>>> byDeptAndLevel = employees.stream()
.collect(Collectors.groupingBy(
Employee::getDepartment,
Collectors.groupingBy(Employee::getLevel)
));
System.out.println("Employees by department and level:");
byDeptAndLevel.forEach((dept, levelMap) -> {
System.out.println("\n" + dept + ":");
levelMap.forEach((level, emps) ->
System.out.println(" " + level + ": " + emps));
});
// Group by department and calculate average salary by level
Map<String, Map<String, Double>> avgSalaryByDeptAndLevel = employees.stream()
.collect(Collectors.groupingBy(
Employee::getDepartment,
Collectors.groupingBy(
Employee::getLevel,
Collectors.averagingDouble(Employee::getSalary)
)
));
System.out.println("\nAverage salary by department and level:");
avgSalaryByDeptAndLevel.forEach((dept, levelMap) -> {
System.out.println("\n" + dept + ":");
levelMap.forEach((level, avgSalary) ->
System.out.printf(" %s: $%.2f%n", level, avgSalary));
});
}
}
class Employee {
private String name;
private String department;
private String level;
private double salary;
public Employee(String name, String department, String level, double salary) {
this.name = name;
this.department = department;
this.level = level;
this.salary = salary;
}
// getters and toString
public String getName() { return name; }
public String getDepartment() { return department; }
public String getLevel() { return level; }
public double getSalary() { return salary; }
@Override
public String toString() {
return name + "($" + salary + ")";
}
}
6. Custom Collection Types in Grouping
public class CustomCollectionGrouping {
public static void main(String[] args) {
List<Person> people = Arrays.asList(
new Person("Alice", 25, "New York"),
new Person("Bob", 30, "London"),
new Person("Charlie", 22, "New York"),
new Person("Diana", 35, "Paris"),
new Person("Eve", 28, "London")
);
// Group to TreeMap for sorted keys
Map<String, List<Person>> sortedByCity = people.stream()
.collect(Collectors.groupingBy(
Person::getCity,
TreeMap::new,
Collectors.toList()
));
System.out.println("Sorted by city: " + sortedByCity);
// Group to specific collection type
Map<String, Set<Person>> peopleSetByCity = people.stream()
.collect(Collectors.groupingBy(
Person::getCity,
Collectors.toCollection(LinkedHashSet::new)
));
System.out.println("People set by city: " + peopleSetByCity);
// Group and join names
Map<String, String> namesByCity = people.stream()
.collect(Collectors.groupingBy(
Person::getCity,
Collectors.mapping(
Person::getName,
Collectors.joining(", ", "[", "]")
)
));
System.out.println("Names by city: " + namesByCity);
}
}
7. Real-World Examples
E-commerce Analytics
public class EcommerceAnalytics {
public static void main(String[] args) {
List<Order> orders = Arrays.asList(
new Order("Laptop", "Electronics", 1200.0, 2),
new Order("Phone", "Electronics", 800.0, 3),
new Order("Book", "Education", 30.0, 5),
new Order("Desk", "Furniture", 250.0, 1),
new Order("Chair", "Furniture", 150.0, 4),
new Order("Tablet", "Electronics", 500.0, 2),
new Order("Notebook", "Education", 5.0, 10)
);
// Total revenue by category
Map<String, Double> revenueByCategory = orders.stream()
.collect(Collectors.groupingBy(
Order::getCategory,
Collectors.summingDouble(Order::getTotalPrice)
));
System.out.println("Revenue by category: " + revenueByCategory);
// Average order value by category
Map<String, Double> avgOrderValueByCategory = orders.stream()
.collect(Collectors.groupingBy(
Order::getCategory,
Collectors.averagingDouble(Order::getTotalPrice)
));
System.out.println("Average order value by category: " + avgOrderValueByCategory);
// Products count by category
Map<String, Long> productCountByCategory = orders.stream()
.collect(Collectors.groupingBy(
Order::getCategory,
Collectors.counting()
));
System.out.println("Product count by category: " + productCountByCategory);
// Top product by revenue in each category
Map<String, Optional<Order>> topProductByCategory = orders.stream()
.collect(Collectors.groupingBy(
Order::getCategory,
Collectors.maxBy(Comparator.comparingDouble(Order::getTotalPrice))
));
System.out.println("Top product by category: " + topProductByCategory);
}
}
class Order {
private String productName;
private String category;
private double unitPrice;
private int quantity;
public Order(String productName, String category, double unitPrice, int quantity) {
this.productName = productName;
this.category = category;
this.unitPrice = unitPrice;
this.quantity = quantity;
}
public double getTotalPrice() {
return unitPrice * quantity;
}
// getters and toString
public String getProductName() { return productName; }
public String getCategory() { return category; }
public double getUnitPrice() { return unitPrice; }
public int getQuantity() { return quantity; }
@Override
public String toString() {
return productName + "($" + getTotalPrice() + ")";
}
}
Student Grade Analysis
public class StudentGradeAnalysis {
public static void main(String[] args) {
List<Student> students = Arrays.asList(
new Student("Alice", "Math", 85),
new Student("Alice", "Science", 92),
new Student("Bob", "Math", 78),
new Student("Bob", "Science", 88),
new Student("Charlie", "Math", 95),
new Student("Charlie", "Science", 90),
new Student("Diana", "Math", 82),
new Student("Diana", "Science", 79)
);
// Average grade by subject
Map<String, Double> avgGradeBySubject = students.stream()
.collect(Collectors.groupingBy(
Student::getSubject,
Collectors.averagingInt(Student::getGrade)
));
System.out.println("Average grade by subject: " + avgGradeBySubject);
// Student grades by subject
Map<String, Map<String, Integer>> gradesBySubjectAndStudent = students.stream()
.collect(Collectors.groupingBy(
Student::getSubject,
Collectors.toMap(Student::getName, Student::getGrade)
));
System.out.println("Grades by subject and student: " + gradesBySubjectAndStudent);
// Student average across all subjects
Map<String, Double> studentOverallAverage = students.stream()
.collect(Collectors.groupingBy(
Student::getName,
Collectors.averagingInt(Student::getGrade)
));
System.out.println("Student overall average: " + studentOverallAverage);
}
}
class Student {
private String name;
private String subject;
private int grade;
public Student(String name, String subject, int grade) {
this.name = name;
this.subject = subject;
this.grade = grade;
}
// getters
public String getName() { return name; }
public String getSubject() { return subject; }
public int getGrade() { return grade; }
@Override
public String toString() {
return name + ":" + grade;
}
}
8. Performance Considerations and Best Practices
public class GroupingPerformance {
public static void main(String[] args) {
List<Person> largeList = createLargeDataset();
// Sequential grouping
long startTime = System.currentTimeMillis();
Map<String, List<Person>> sequential = largeList.stream()
.collect(Collectors.groupingBy(Person::getCity));
long sequentialTime = System.currentTimeMillis() - startTime;
// Parallel grouping
startTime = System.currentTimeMillis();
Map<String, List<Person>> parallel = largeList.parallelStream()
.collect(Collectors.groupingByConcurrent(Person::getCity));
long parallelTime = System.currentTimeMillis() - startTime;
System.out.println("Sequential time: " + sequentialTime + "ms");
System.out.println("Parallel time: " + parallelTime + "ms");
System.out.println("Parallel speedup: " + (sequentialTime / (double) parallelTime) + "x");
}
private static List<Person> createLargeDataset() {
List<Person> people = new ArrayList<>();
String[] cities = {"New York", "London", "Paris", "Tokyo", "Berlin"};
Random random = new Random();
for (int i = 0; i < 100000; i++) {
String city = cities[random.nextInt(cities.length)];
people.add(new Person("Person" + i, random.nextInt(80) + 18, city));
}
return people;
}
}
Key Differences: Partitioning vs Grouping
| Aspect | Partitioning | Grouping |
|---|---|---|
| Number of groups | Always 2 (true/false) | Multiple (based on classifier) |
| Classifier | Predicate (boolean) | Function (any type) |
| Use case | Binary classification | Multi-category classification |
| Performance | Slightly faster | More flexible |
Best Practices
- Use partitioning when you have a clear binary condition
- Use grouping for multiple categories
- Choose appropriate downstream collectors for your needs
- Consider parallel streams for large datasets
- Use
groupingByConcurrentfor parallel streams with unordered collections - Be mindful of memory usage with large groupings
These partitioning and grouping operations are essential for data analysis and transformation in Java streams, providing powerful tools for organizing and aggregating data.
I don’t think the title of your article matches the content lol. Just kidding, mainly because I had some doubts after reading the article.