CSV Parsing and Generation in Java

This comprehensive guide covers various methods for parsing and generating CSV files in Java, including native approaches and popular libraries.

1. Native Java CSV Processing

Basic CSV Reader

import java.io.*;
import java.util.ArrayList;
import java.util.List;
public class NativeCsvParser {
public List<String[]> parseCsv(String filePath) throws IOException {
List<String[]> records = new ArrayList<>();
try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {
String line;
while ((line = br.readLine()) != null) {
String[] values = parseCsvLine(line);
records.add(values);
}
}
return records;
}
private String[] parseCsvLine(String line) {
List<String> values = new ArrayList<>();
StringBuilder currentValue = new StringBuilder();
boolean inQuotes = false;
for (int i = 0; i < line.length(); i++) {
char c = line.charAt(i);
if (c == '"') {
// Handle escaped quotes ("")
if (inQuotes && i < line.length() - 1 && line.charAt(i + 1) == '"') {
currentValue.append('"');
i++; // Skip next quote
} else {
inQuotes = !inQuotes;
}
} else if (c == ',' && !inQuotes) {
values.add(currentValue.toString());
currentValue.setLength(0);
} else {
currentValue.append(c);
}
}
values.add(currentValue.toString());
return values.toArray(new String[0]);
}
// Usage example
public static void main(String[] args) {
NativeCsvParser parser = new NativeCsvParser();
try {
List<String[]> data = parser.parseCsv("data.csv");
for (String[] row : data) {
System.out.println(String.join(" | ", row));
}
} catch (IOException e) {
e.printStackTrace();
}
}
}

Basic CSV Writer

import java.io.*;
import java.util.List;
public class NativeCsvWriter {
public void writeCsv(String filePath, List<String[]> data) throws IOException {
try (BufferedWriter bw = new BufferedWriter(new FileWriter(filePath))) {
for (String[] row : data) {
bw.write(convertToCsvRow(row));
bw.newLine();
}
}
}
private String convertToCsvRow(String[] row) {
StringBuilder csvRow = new StringBuilder();
for (int i = 0; i < row.length; i++) {
String value = row[i];
// Check if value needs quoting
if (value.contains(",") || value.contains("\"") || value.contains("\n")) {
// Escape quotes and wrap in quotes
value = "\"" + value.replace("\"", "\"\"") + "\"";
}
csvRow.append(value);
if (i < row.length - 1) {
csvRow.append(",");
}
}
return csvRow.toString();
}
// Usage example
public static void main(String[] args) {
NativeCsvWriter writer = new NativeCsvWriter();
List<String[]> data = List.of(
new String[]{"Name", "Age", "City"},
new String[]{"John Doe", "30", "New York"},
new String[]{"Jane Smith", "25", "San Francisco, CA"},
new String[]{"Bob \"The Builder\"", "40", "Chicago"}
);
try {
writer.writeCsv("output.csv", data);
System.out.println("CSV file written successfully");
} catch (IOException e) {
e.printStackTrace();
}
}
}

2. Apache Commons CSV

Dependencies

<!-- Maven -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.10.0</version>
</dependency>
<!-- Gradle -->
implementation 'org.apache.commons:commons-csv:1.10.0'

CSV Parsing with Apache Commons CSV

import org.apache.commons.csv.*;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
public class CommonsCsvParser {
public List<Person> parseCsvToObjects(String filePath) throws IOException {
List<Person> people = new ArrayList<>();
try (Reader reader = new FileReader(filePath);
CSVParser parser = new CSVParser(reader, CSVFormat.DEFAULT
.withFirstRecordAsHeader()
.withIgnoreHeaderCase()
.withTrim())) {
for (CSVRecord record : parser) {
Person person = new Person();
person.setName(record.get("Name"));
person.setAge(Integer.parseInt(record.get("Age")));
person.setEmail(record.get("Email"));
person.setCity(record.get("City"));
people.add(person);
}
}
return people;
}
public void parseCsvWithCustomFormat(String filePath) throws IOException {
CSVFormat format = CSVFormat.newFormat(';') // Custom delimiter
.withQuote('"')
.withHeader("Name", "Age", "Email", "City")
.withIgnoreEmptyLines()
.withIgnoreSurroundingSpaces();
try (Reader reader = new InputStreamReader(
new FileInputStream(filePath), StandardCharsets.UTF_8);
CSVParser parser = new CSVParser(reader, format)) {
for (CSVRecord record : parser) {
System.out.printf("Name: %s, Age: %s, Email: %s%n",
record.get("Name"), record.get("Age"), record.get("Email"));
}
}
}
public void parseLargeCsvFile(String filePath) throws IOException {
try (Reader reader = new FileReader(filePath);
CSVParser parser = CSVFormat.DEFAULT.parse(reader)) {
int rowCount = 0;
for (CSVRecord record : parser) {
// Process each record
processRecord(record);
rowCount++;
if (rowCount % 1000 == 0) {
System.out.println("Processed " + rowCount + " records");
}
}
}
}
private void processRecord(CSVRecord record) {
// Custom record processing logic
String[] values = new String[record.size()];
for (int i = 0; i < record.size(); i++) {
values[i] = record.get(i);
}
// Process the values...
}
}
// Model class
class Person {
private String name;
private int age;
private String email;
private String city;
// Constructors, getters, and setters
public Person() {}
public Person(String name, int age, String email, String city) {
this.name = name;
this.age = age;
this.email = email;
this.city = city;
}
// Getters and setters...
public String getName() { return name; }
public void setName(String name) { this.name = name; }
public int getAge() { return age; }
public void setAge(int age) { this.age = age; }
public String getEmail() { return email; }
public void setEmail(String email) { this.email = email; }
public String getCity() { return city; }
public void setCity(String city) { this.city = city; }
@Override
public String toString() {
return String.format("Person{name='%s', age=%d, email='%s', city='%s'}",
name, age, email, city);
}
}

CSV Generation with Apache Commons CSV

import org.apache.commons.csv.*;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
public class CommonsCsvWriter {
public void writeObjectsToCsv(List<Person> people, String filePath) throws IOException {
CSVFormat format = CSVFormat.DEFAULT
.withHeader("Name", "Age", "Email", "City")
.withQuote('"');
try (Writer writer = new FileWriter(filePath);
CSVPrinter printer = new CSVPrinter(writer, format)) {
for (Person person : people) {
printer.printRecord(
person.getName(),
person.getAge(),
person.getEmail(),
person.getCity()
);
}
printer.flush();
}
}
public void writeCsvWithCustomFormat(String filePath) throws IOException {
CSVFormat format = CSVFormat.newFormat('|') // Pipe delimiter
.withQuote('"')
.withHeader("ID", "Product", "Price", "Category")
.withNullString("N/A")
.withIgnoreEmptyLines();
try (Writer writer = new OutputStreamWriter(
new FileOutputStream(filePath), StandardCharsets.UTF_8);
CSVPrinter printer = new CSVPrinter(writer, format)) {
// Sample data
printer.printRecord(1, "Laptop", 999.99, "Electronics");
printer.printRecord(2, "Desk Chair", 199.50, "Furniture");
printer.printRecord(3, null, 49.99, "Accessories"); // Will be "N/A"
printer.printRecord(4, "Monitor", 299.99, "Electronics");
printer.flush();
}
}
public void generateCsvReport(List<SalesRecord> sales, String filePath) throws IOException {
CSVFormat format = CSVFormat.DEFAULT
.withHeader("Date", "Product", "Quantity", "Unit Price", "Total")
.withQuote('"');
try (Writer writer = new FileWriter(filePath);
CSVPrinter printer = new CSVPrinter(writer, format)) {
double grandTotal = 0;
for (SalesRecord sale : sales) {
double total = sale.getQuantity() * sale.getUnitPrice();
grandTotal += total;
printer.printRecord(
sale.getDate(),
sale.getProduct(),
sale.getQuantity(),
String.format("$%.2f", sale.getUnitPrice()),
String.format("$%.2f", total)
);
}
// Add summary row
printer.printRecord("", "", "", "Grand Total:", String.format("$%.2f", grandTotal));
printer.flush();
}
}
}
class SalesRecord {
private String date;
private String product;
private int quantity;
private double unitPrice;
// Constructors, getters, and setters
public SalesRecord(String date, String product, int quantity, double unitPrice) {
this.date = date;
this.product = product;
this.quantity = quantity;
this.unitPrice = unitPrice;
}
// Getters and setters...
public String getDate() { return date; }
public void setDate(String date) { this.date = date; }
public String getProduct() { return product; }
public void setProduct(String product) { this.product = product; }
public int getQuantity() { return quantity; }
public void setQuantity(int quantity) { this.quantity = quantity; }
public double getUnitPrice() { return unitPrice; }
public void setUnitPrice(double unitPrice) { this.unitPrice = unitPrice; }
}

3. OpenCSV Library

Dependencies

<!-- Maven -->
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>5.8</version>
</dependency>
<!-- Gradle -->
implementation 'com.opencsv:opencsv:5.8'

CSV Parsing with OpenCSV

import com.opencsv.*;
import com.opencsv.bean.*;
import com.opencsv.exceptions.CsvException;
import java.io.*;
import java.util.List;
public class OpenCsvParser {
public List<String[]> readAllData(String filePath) throws IOException, CsvException {
try (CSVReader reader = new CSVReader(new FileReader(filePath))) {
return reader.readAll();
}
}
public void readCsvLineByLine(String filePath) throws IOException {
try (CSVReader reader = new CSVReader(new FileReader(filePath))) {
String[] nextLine;
while ((nextLine = reader.readNext()) != null) {
// Process each line
System.out.println(String.join(" | ", nextLine));
}
}
}
// Using CsvToBean for object mapping
public List<Employee> parseToObjects(String filePath) throws IOException {
try (Reader reader = new FileReader(filePath)) {
CsvToBean<Employee> csvToBean = new CsvToBeanBuilder<Employee>(reader)
.withType(Employee.class)
.withIgnoreLeadingWhiteSpace(true)
.withSeparator(',')
.build();
return csvToBean.parse();
}
}
// Custom CSV parser with special settings
public void parseWithCustomSettings(String filePath) throws IOException, CsvException {
CSVParser parser = new CSVParserBuilder()
.withSeparator(';')
.withIgnoreQuotations(false)
.build();
CSVReaderBuilder readerBuilder = new CSVReaderBuilder(new FileReader(filePath))
.withCSVParser(parser)
.withSkipLines(1); // Skip header
try (CSVReader reader = readerBuilder.build()) {
List<String[]> allData = reader.readAll();
for (String[] row : allData) {
System.out.println(String.join(" | ", row));
}
}
}
}
// Employee model with CSV annotations
class Employee {
@CsvBindByName(column = "EMP_ID")
private String employeeId;
@CsvBindByName(column = "FIRST_NAME")
private String firstName;
@CsvBindByName(column = "LAST_NAME")
private String lastName;
@CsvBindByName(column = "DEPARTMENT")
private String department;
@CsvBindByName(column = "SALARY")
private double salary;
@CsvDate(value = "yyyy-MM-dd")
@CsvBindByName(column = "HIRE_DATE")
private Date hireDate;
// Constructors, getters, and setters
public Employee() {}
// Getters and setters...
public String getEmployeeId() { return employeeId; }
public void setEmployeeId(String employeeId) { this.employeeId = employeeId; }
public String getFirstName() { return firstName; }
public void setFirstName(String firstName) { this.firstName = firstName; }
public String getLastName() { return lastName; }
public void setLastName(String lastName) { this.lastName = lastName; }
public String getDepartment() { return department; }
public void setDepartment(String department) { this.department = department; }
public double getSalary() { return salary; }
public void setSalary(double salary) { this.salary = salary; }
public Date getHireDate() { return hireDate; }
public void setHireDate(Date hireDate) { this.hireDate = hireDate; }
@Override
public String toString() {
return String.format("Employee{id='%s', name='%s %s', department='%s', salary=%.2f}",
employeeId, firstName, lastName, department, salary);
}
}

CSV Generation with OpenCSV

import com.opencsv.*;
import com.opencsv.bean.*;
import java.io.*;
import java.util.Arrays;
import java.util.List;
public class OpenCsvWriter {
public void writeDataToCsv(List<String[]> data, String filePath) throws IOException {
try (CSVWriter writer = new CSVWriter(new FileWriter(filePath))) {
writer.writeAll(data);
}
}
public void writeObjectsToCsv(List<Employee> employees, String filePath) throws IOException {
try (Writer writer = new FileWriter(filePath)) {
StatefulBeanToCsv<Employee> beanToCsv = new StatefulBeanToCsvBuilder<Employee>(writer)
.withSeparator(',')
.withQuotechar(CSVWriter.NO_QUOTE_CHARACTER)
.build();
beanToCsv.write(employees);
} catch (CsvRequiredFieldEmptyException | CsvDataTypeMismatchException e) {
throw new IOException("Error writing CSV", e);
}
}
public void writeWithCustomSettings(String filePath) throws IOException {
CSVWriterBuilder writerBuilder = new CSVWriterBuilder(new FileWriter(filePath))
.withSeparator('|')
.withQuoteChar(CSVWriter.NO_QUOTE_CHARACTER)
.withEscapeChar(CSVWriter.DEFAULT_ESCAPE_CHARACTER)
.withLineEnd(CSVWriter.DEFAULT_LINE_END);
try (CSVWriter writer = writerBuilder.build()) {
// Write header
writer.writeNext(new String[]{"ID", "Product", "Price", "In Stock"});
// Write data
writer.writeNext(new String[]{"1", "Laptop", "999.99", "true"});
writer.writeNext(new String[]{"2", "Mouse", "29.99", "false"});
writer.writeNext(new String[]{"3", "Keyboard", "79.99", "true"});
writer.flush();
}
}
}

4. Advanced CSV Processing

CSV Processor with Validation

import java.io.*;
import java.util.*;
import java.util.function.Function;
public class AdvancedCsvProcessor {
public <T> List<T> processCsvWithValidation(
String filePath, 
Function<String[], Optional<T>> mapper,
Function<String[], Boolean> validator) throws IOException {
List<T> results = new ArrayList<>();
List<String> errors = new ArrayList<>();
try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {
String line;
int lineNumber = 0;
while ((line = br.readLine()) != null) {
lineNumber++;
try {
String[] fields = parseCsvLine(line);
// Validate the record
if (!validator.apply(fields)) {
errors.add("Line " + lineNumber + ": Validation failed");
continue;
}
// Map to object
Optional<T> result = mapper.apply(fields);
result.ifPresent(results::add);
} catch (Exception e) {
errors.add("Line " + lineNumber + ": " + e.getMessage());
}
}
}
// Log errors
if (!errors.isEmpty()) {
System.err.println("Processing completed with errors:");
errors.forEach(System.err::println);
}
return results;
}
public void processLargeCsvFile(String inputPath, String outputPath, 
Function<String[], String[]> transformer) throws IOException {
try (BufferedReader reader = new BufferedReader(new FileReader(inputPath));
BufferedWriter writer = new BufferedWriter(new FileWriter(outputPath))) {
String line;
boolean isHeader = true;
while ((line = reader.readLine()) != null) {
String[] fields = parseCsvLine(line);
if (isHeader) {
// Write header as-is
writer.write(line);
writer.newLine();
isHeader = false;
} else {
// Transform data
String[] transformed = transformer.apply(fields);
writer.write(convertToCsvRow(transformed));
writer.newLine();
}
}
}
}
private String[] parseCsvLine(String line) {
// Simplified CSV parsing - use a library in production
return line.split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)");
}
private String convertToCsvRow(String[] row) {
return String.join(",", Arrays.stream(row)
.map(field -> {
if (field.contains(",") || field.contains("\"")) {
return "\"" + field.replace("\"", "\"\"") + "\"";
}
return field;
})
.toArray(String[]::new));
}
}

CSV to JSON Converter

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import java.io.*;
import java.util.*;
public class CsvToJsonConverter {
private final ObjectMapper objectMapper;
public CsvToJsonConverter() {
this.objectMapper = new ObjectMapper();
this.objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
}
public void convertCsvToJson(String csvFilePath, String jsonFilePath) throws IOException {
List<Map<String, String>> jsonData = new ArrayList<>();
try (BufferedReader br = new BufferedReader(new FileReader(csvFilePath))) {
String line;
String[] headers = null;
while ((line = br.readLine()) != null) {
String[] fields = parseCsvLine(line);
if (headers == null) {
headers = fields;
} else {
Map<String, String> record = new LinkedHashMap<>();
for (int i = 0; i < headers.length && i < fields.length; i++) {
record.put(headers[i], fields[i]);
}
jsonData.add(record);
}
}
}
// Write JSON
try (FileWriter writer = new FileWriter(jsonFilePath)) {
objectMapper.writeValue(writer, jsonData);
}
}
private String[] parseCsvLine(String line) {
List<String> fields = new ArrayList<>();
StringBuilder field = new StringBuilder();
boolean inQuotes = false;
for (int i = 0; i < line.length(); i++) {
char c = line.charAt(i);
if (c == '"') {
inQuotes = !inQuotes;
} else if (c == ',' && !inQuotes) {
fields.add(field.toString());
field.setLength(0);
} else {
field.append(c);
}
}
fields.add(field.toString());
return fields.toArray(new String[0]);
}
}

5. Spring Boot CSV Integration

Spring Boot CSV Controller

import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.http.ResponseEntity;
import java.io.*;
import java.util.List;
@RestController
@RequestMapping("/api/csv")
public class CsvController {
private final CsvProcessingService csvProcessingService;
public CsvController(CsvProcessingService csvProcessingService) {
this.csvProcessingService = csvProcessingService;
}
@PostMapping("/upload")
public ResponseEntity<String> uploadCsv(@RequestParam("file") MultipartFile file) {
try {
List<Employee> employees = csvProcessingService.processCsvFile(file);
return ResponseEntity.ok("Processed " + employees.size() + " records");
} catch (IOException e) {
return ResponseEntity.badRequest().body("Error processing file: " + e.getMessage());
}
}
@GetMapping("/download")
public ResponseEntity<byte[]> downloadCsv() throws IOException {
byte[] csvData = csvProcessingService.generateCsvReport();
return ResponseEntity.ok()
.header("Content-Type", "text/csv")
.header("Content-Disposition", "attachment; filename=employees.csv")
.body(csvData);
}
}
@Service
public class CsvProcessingService {
public List<Employee> processCsvFile(MultipartFile file) throws IOException {
try (Reader reader = new InputStreamReader(file.getInputStream())) {
CsvToBean<Employee> csvToBean = new CsvToBeanBuilder<Employee>(reader)
.withType(Employee.class)
.withIgnoreLeadingWhiteSpace(true)
.build();
return csvToBean.parse();
}
}
public byte[] generateCsvReport() throws IOException {
try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Writer writer = new OutputStreamWriter(outputStream);
CSVPrinter printer = new CSVPrinter(writer, CSVFormat.DEFAULT
.withHeader("ID", "Name", "Department", "Salary"))) {
// Sample data - replace with actual data from database
printer.printRecord("1", "John Doe", "Engineering", "75000");
printer.printRecord("2", "Jane Smith", "Marketing", "65000");
printer.printRecord("3", "Bob Johnson", "Sales", "70000");
printer.flush();
return outputStream.toByteArray();
}
}
}

Best Practices Summary

  1. Use Established Libraries: Prefer Apache Commons CSV or OpenCSV over manual parsing
  2. Handle Character Encoding: Always specify encoding (UTF-8 recommended)
  3. Validate Input Data: Implement validation for CSV data
  4. Use Streaming for Large Files: Process large files line by line
  5. Handle Exceptions Gracefully: Provide meaningful error messages
  6. Consider Performance: Use appropriate data structures and avoid unnecessary object creation
  7. Test Edge Cases: Test with various CSV formats and edge cases

This comprehensive guide provides multiple approaches for CSV parsing and generation in Java, from basic implementations to advanced processing with popular libraries.

Leave a Reply

Your email address will not be published. Required fields are marked *


Macro Nepal Helper