Overview
Privacy-preserving analytics enables data analysis while protecting individual privacy through techniques like differential privacy, homomorphic encryption, secure multi-party computation, and federated learning.
Architecture
Core Components
- Data Anonymization: k-anonymity, l-diversity, t-closeness
- Differential Privacy: Adding calibrated noise
- Homomorphic Encryption: Compute on encrypted data
- Secure Multi-Party Computation (MPC): Distributed computation
- Federated Learning: Train models on-device
Dependencies
<dependencies> <!-- Differential Privacy --> <dependency> <groupId>com.google.differentialprivacy</groupId> <artifactId>privacy-on-beam</artifactId> <version>0.5.0</version> </dependency> <!-- Homomorphic Encryption --> <dependency> <groupId>com.microsoft</groupId> <artifactId>seal</artifactId> <version>3.7.2</version> </dependency> <!-- Apache Commons for crypto --> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-math3</artifactId> <version>3.6.1</version> </dependency> <!-- JSON processing --> <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-databind</artifactId> <version>2.15.2</version> </dependency> <!-- Big data processing --> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.12</artifactId> <version>3.4.0</version> </dependency> <!-- Database connectivity --> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-data-jpa</artifactId> </dependency> </dependencies>
Core Implementation
1. Differential Privacy Service
@Service
public class DifferentialPrivacyService {
private final double epsilon;
private final double delta;
private final Random random;
public DifferentialPrivacyService(
@Value("${privacy.epsilon:1.0}") double epsilon,
@Value("${privacy.delta:0.00001}") double delta) {
this.epsilon = epsilon;
this.delta = delta;
this.random = new SecureRandom();
}
// Laplace mechanism for numerical data
public double addLaplaceNoise(double trueValue, double sensitivity) {
double scale = sensitivity / epsilon;
double noise = laplace(scale);
return trueValue + noise;
}
private double laplace(double scale) {
double u = random.nextDouble() - 0.5;
return -scale * Math.signum(u) * Math.log(1 - 2 * Math.abs(u));
}
// Exponential mechanism for categorical data
public <T> T exponentialMechanism(List<T> candidates,
Function<T, Double> scoreFunction,
double sensitivity) {
double[] scores = candidates.stream()
.mapToDouble(candidate -> {
double score = scoreFunction.apply(candidate);
double probability = Math.exp(epsilon * score / (2 * sensitivity));
return probability;
})
.toArray();
// Normalize probabilities
double sum = Arrays.stream(scores).sum();
double[] probabilities = Arrays.stream(scores)
.map(score -> score / sum)
.toArray();
// Sample according to probabilities
return weightedRandomChoice(candidates, probabilities);
}
// Count with noise
public long noisyCount(long trueCount, double sensitivity) {
double noisyCount = addLaplaceNoise(trueCount, sensitivity);
return Math.max(0, Math.round(noisyCount)); // Ensure non-negative
}
// Mean with noise
public double noisyMean(List<Double> values, double range) {
double sum = values.stream().mapToDouble(Double::doubleValue).sum();
double count = values.size();
// Add noise to sum and count
double noisySum = addLaplaceNoise(sum, range);
double noisyCount = addLaplaceNoise(count, 1.0);
return noisyCount > 0 ? noisySum / noisyCount : 0.0;
}
// Histogram with differential privacy
public <T> Map<T, Long> noisyHistogram(Map<T, Long> trueHistogram, double sensitivity) {
Map<T, Long> noisyHistogram = new HashMap<>();
for (Map.Entry<T, Long> entry : trueHistogram.entrySet()) {
long noisyCount = noisyCount(entry.getValue(), sensitivity);
noisyHistogram.put(entry.getKey(), noisyCount);
}
return noisyHistogram;
}
// Advanced: Gaussian mechanism for (ε,δ)-differential privacy
public double addGaussianNoise(double trueValue, double sensitivity) {
double sigma = sensitivity * Math.sqrt(2 * Math.log(1.25 / delta)) / epsilon;
double noise = random.nextGaussian() * sigma;
return trueValue + noise;
}
private <T> T weightedRandomChoice(List<T> items, double[] probabilities) {
double randomValue = random.nextDouble();
double cumulativeProbability = 0.0;
for (int i = 0; i < items.size(); i++) {
cumulativeProbability += probabilities[i];
if (randomValue <= cumulativeProbability) {
return items.get(i);
}
}
return items.get(items.size() - 1);
}
}
2. Data Anonymization Service
@Service
public class DataAnonymizationService {
// k-anonymity: Each equivalence class has at least k records
public <T> List<AnonymizedRecord<T>> applyKAnonymity(
List<T> records,
Function<T, List<String>> quasiIdentifiers,
int k) {
// Group by quasi-identifiers
Map<List<String>, List<T>> groups = records.stream()
.collect(Collectors.groupingBy(quasiIdentifiers));
// Filter groups with less than k records
List<AnonymizedRecord<T>> anonymizedRecords = new ArrayList<>();
for (Map.Entry<List<String>, List<T>> entry : groups.entrySet()) {
if (entry.getValue().size() >= k) {
for (T record : entry.getValue()) {
anonymizedRecords.add(new AnonymizedRecord<>(record, entry.getKey()));
}
} else {
// Apply suppression or generalization
List<String> generalized = generalizeQuasiIdentifiers(entry.getKey());
for (T record : entry.getValue()) {
anonymizedRecords.add(new AnonymizedRecord<>(record, generalized));
}
}
}
return anonymizedRecords;
}
// l-diversity: Each equivalence class has at least l distinct sensitive values
public <T> boolean checkLDiversity(
Map<List<String>, List<T>> groups,
Function<T, String> sensitiveAttribute,
int l) {
for (List<T> group : groups.values()) {
long distinctSensitiveValues = group.stream()
.map(sensitiveAttribute)
.distinct()
.count();
if (distinctSensitiveValues < l) {
return false;
}
}
return true;
}
// Generalization hierarchy for quasi-identifiers
private List<String> generalizeQuasiIdentifiers(List<String> identifiers) {
List<String> generalized = new ArrayList<>();
for (String identifier : identifiers) {
if (isNumeric(identifier)) {
// Generalize numeric values to ranges
generalized.add(generalizeNumeric(identifier));
} else {
// Generalize categorical values
generalized.add(generalizeCategorical(identifier));
}
}
return generalized;
}
private String generalizeNumeric(String value) {
try {
double num = Double.parseDouble(value);
// Generalize to range (e.g., 25 -> 20-30)
double lower = Math.floor(num / 10) * 10;
double upper = lower + 10;
return lower + "-" + upper;
} catch (NumberFormatException e) {
return value; // Return original if not numeric
}
}
private String generalizeCategorical(String value) {
// Implement domain-specific generalization
if (value.length() > 5) {
return value.substring(0, 3) + "*"; // Partial masking
}
return value;
}
private boolean isNumeric(String str) {
return str.matches("-?\\d+(\\.\\d+)?");
}
}
3. Homomorphic Encryption Service
@Service
public class HomomorphicEncryptionService {
private final EncryptionParameters params;
private final SEALContext context;
private final KeyGenerator keyGenerator;
private final Encryptor encryptor;
private final Evaluator evaluator;
private final Decryptor decryptor;
public HomomorphicEncryptionService() {
// Initialize SEAL context with BFV scheme
this.params = createEncryptionParameters();
this.context = new SEALContext(params);
this.keyGenerator = new KeyGenerator(context);
this.encryptor = new Encryptor(context, keyGenerator.getPublicKey());
this.evaluator = new Evaluator(context);
this.decryptor = new Decryptor(context, keyGenerator.getSecretKey());
}
private EncryptionParameters createEncryptionParameters() {
EncryptionParameters params = new EncryptionParameters(SchemeType.BFV);
params.setPolyModulusDegree(4096);
params.setCoeffModulus(CoeffModulus.BFVDefault(4096));
params.setPlainModulus(PlainModulus.Batching(4096, 20));
return params;
}
// Encrypt numerical data
public Ciphertext encrypt(double value) {
try {
Plaintext plain = new Plaintext(Double.toString(value));
Ciphertext cipher = new Ciphertext();
encryptor.encrypt(plain, cipher);
return cipher;
} catch (Exception e) {
throw new RuntimeException("Encryption failed", e);
}
}
// Homomorphic addition
public Ciphertext add(Ciphertext a, Ciphertext b) {
Ciphertext result = new Ciphertext();
evaluator.add(a, b, result);
return result;
}
// Homomorphic multiplication
public Ciphertext multiply(Ciphertext a, Ciphertext b) {
Ciphertext result = new Ciphertext();
evaluator.multiply(a, b, result);
return result;
}
// Homomorphic sum of encrypted values
public Ciphertext sum(List<Ciphertext> encryptedValues) {
if (encryptedValues.isEmpty()) {
return encrypt(0.0);
}
Ciphertext sum = encryptedValues.get(0);
for (int i = 1; i < encryptedValues.size(); i++) {
sum = add(sum, encryptedValues.get(i));
}
return sum;
}
// Homomorphic mean calculation
public Ciphertext mean(List<Ciphertext> encryptedValues) {
Ciphertext sum = sum(encryptedValues);
Ciphertext count = encrypt(encryptedValues.size());
// For simplicity, we'll return sum and count separately
// Actual division in homomorphic encryption is complex
return sum; // In practice, you'd need to handle division differently
}
// Decrypt result
public double decrypt(Ciphertext cipher) {
try {
Plaintext plain = new Plaintext();
decryptor.decrypt(cipher, plain);
return Double.parseDouble(plain.toString());
} catch (Exception e) {
throw new RuntimeException("Decryption failed", e);
}
}
// Batch encryption for multiple values
public List<Ciphertext> encryptBatch(List<Double> values) {
return values.stream()
.map(this::encrypt)
.collect(Collectors.toList());
}
}
4. Secure Multi-Party Computation Service
@Service
public class MPCService {
private final CryptoService cryptoService;
public MPCService(CryptoService cryptoService) {
this.cryptoService = cryptoService;
}
// Secret sharing for distributed computation
public Map<String, BigInteger[]> secretShare(BigInteger secret, int parties, int threshold) {
// Shamir's Secret Sharing
BigInteger[] coefficients = generatePolynomial(secret, threshold - 1);
Map<String, BigInteger[]> shares = new HashMap<>();
for (int i = 1; i <= parties; i++) {
BigInteger x = BigInteger.valueOf(i);
BigInteger y = evaluatePolynomial(coefficients, x);
shares.put("party_" + i, new BigInteger[]{x, y});
}
return shares;
}
// Secure sum protocol
public BigInteger secureSum(List<BigInteger> localSums, String partyId) {
// Each party adds random noise and shares
BigInteger randomMask = cryptoService.generateRandomNumber();
BigInteger maskedValue = localSums.stream()
.reduce(BigInteger.ZERO, BigInteger::add)
.add(randomMask);
// In practice, this would be sent to other parties
// and reconstructed without revealing individual values
return maskedValue;
}
// Private set intersection
public Set<String> privateSetIntersection(Set<String> localSet, List<String> otherParties) {
// Using PSI protocol to find common elements without revealing sets
Map<String, String> localHashes = localSet.stream()
.collect(Collectors.toMap(
item -> cryptoService.hash(item + "_salt"),
item -> item
));
// Exchange hashes with other parties and find intersections
// This is a simplified version
Set<String> intersection = new HashSet<>();
for (String otherParty : otherParties) {
// In practice, you'd receive hashes from other parties
Set<String> otherHashes = receiveHashes(otherParty);
Set<String> commonHashes = new HashSet<>(localHashes.keySet());
commonHashes.retainAll(otherHashes);
intersection.addAll(commonHashes.stream()
.map(localHashes::get)
.collect(Collectors.toSet()));
}
return intersection;
}
private BigInteger[] generatePolynomial(BigInteger secret, int degree) {
BigInteger[] coefficients = new BigInteger[degree + 1];
coefficients[0] = secret; // Constant term is the secret
for (int i = 1; i <= degree; i++) {
coefficients[i] = cryptoService.generateRandomNumber();
}
return coefficients;
}
private BigInteger evaluatePolynomial(BigInteger[] coefficients, BigInteger x) {
BigInteger result = BigInteger.ZERO;
for (int i = 0; i < coefficients.length; i++) {
BigInteger term = coefficients[i].multiply(x.pow(i));
result = result.add(term);
}
return result;
}
private Set<String> receiveHashes(String partyId) {
// Mock implementation - in practice, this would receive from network
return Set.of();
}
}
Advanced Analytics Implementations
1. Privacy-Preserving Machine Learning
@Service
public class PrivacyPreservingMLService {
private final DifferentialPrivacyService dpService;
private final HomomorphicEncryptionService heService;
public PrivacyPreservingMLService(DifferentialPrivacyService dpService,
HomomorphicEncryptionService heService) {
this.dpService = dpService;
this.heService = heService;
}
// Differentially private linear regression
public LinearRegressionModel trainLinearRegressionWithDP(
List<double[]> features,
double[] targets,
double epsilon) {
int n = features.size();
int d = features.get(0).length;
// Add noise to sufficient statistics
double[][] noisyXTX = addNoiseToXTX(computeXTX(features), epsilon / 2);
double[] noisyXTY = addNoiseToXTY(computeXTY(features, targets), epsilon / 2);
// Solve noisy normal equations
double[] coefficients = solveLinearSystem(noisyXTX, noisyXTY);
return new LinearRegressionModel(coefficients, epsilon);
}
// Federated learning aggregation with differential privacy
public FederatedModel aggregateModelsWithDP(
List<FederatedModel> clientModels,
double epsilon) {
// Average model parameters with noise
int paramSize = clientModels.get(0).getParameters().length;
double[] aggregatedParams = new double[paramSize];
for (int i = 0; i < paramSize; i++) {
double sum = 0.0;
for (FederatedModel model : clientModels) {
sum += model.getParameters()[i];
}
double mean = sum / clientModels.size();
aggregatedParams[i] = dpService.addLaplaceNoise(mean, 1.0 / epsilon);
}
return new FederatedModel(aggregatedParams);
}
// Homomorphic encrypted prediction
public List<Ciphertext> predictEncrypted(LinearRegressionModel model,
List<Ciphertext[]> encryptedFeatures) {
List<Ciphertext> predictions = new ArrayList<>();
for (Ciphertext[] features : encryptedFeatures) {
Ciphertext prediction = heService.encrypt(0.0); // Start with intercept
for (int i = 0; i < features.length; i++) {
Ciphertext weightedFeature = heService.multiply(
features[i],
heService.encrypt(model.getCoefficients()[i + 1])
);
prediction = heService.add(prediction, weightedFeature);
}
predictions.add(prediction);
}
return predictions;
}
private double[][] computeXTX(List<double[]> features) {
int d = features.get(0).length;
double[][] XTX = new double[d][d];
for (double[] x : features) {
for (int i = 0; i < d; i++) {
for (int j = 0; j < d; j++) {
XTX[i][j] += x[i] * x[j];
}
}
}
return XTX;
}
private double[] computeXTY(List<double[]> features, double[] targets) {
int d = features.get(0).length;
double[] XTY = new double[d];
for (int k = 0; k < features.size(); k++) {
double[] x = features.get(k);
for (int i = 0; i < d; i++) {
XTY[i] += x[i] * targets[k];
}
}
return XTY;
}
private double[][] addNoiseToXTX(double[][] XTX, double epsilon) {
int d = XTX.length;
double[][] noisyXTX = new double[d][d];
double sensitivity = computeSensitivityXTX(d);
for (int i = 0; i < d; i++) {
for (int j = 0; j < d; j++) {
noisyXTX[i][j] = dpService.addLaplaceNoise(XTX[i][j], sensitivity / epsilon);
}
}
return noisyXTX;
}
private double computeSensitivityXTX(int d) {
// Maximum possible change when adding/removing one record
return 1.0; // This depends on data normalization
}
}
2. Privacy-Preserving Analytics Engine
@Service
public class PrivacyPreservingAnalyticsEngine {
private final DifferentialPrivacyService dpService;
private final DataAnonymizationService anonymizationService;
public PrivacyPreservingAnalyticsEngine(DifferentialPrivacyService dpService,
DataAnonymizationService anonymizationService) {
this.dpService = dpService;
this.anonymizationService = anonymizationService;
}
public AnalyticsResult computePrivateStatistics(List<AnalyticsRecord> records,
PrivacyBudget budget) {
// Apply k-anonymity first
List<AnonymizedRecord<AnalyticsRecord>> anonymizedRecords =
anonymizationService.applyKAnonymity(
records,
this::extractQuasiIdentifiers,
5
);
// Extract sensitive attributes for analysis
List<Double> sensitiveValues = anonymizedRecords.stream()
.map(record -> record.getOriginal().getSensitiveValue())
.collect(Collectors.toList());
// Compute statistics with differential privacy
double privateMean = dpService.noisyMean(sensitiveValues, budget.getEpsilon() / 3);
long privateCount = dpService.noisyCount(sensitiveValues.size(), budget.getEpsilon() / 3);
// Compute histogram with remaining budget
Map<String, Long> categories = extractCategories(anonymizedRecords);
Map<String, Long> privateHistogram = dpService.noisyHistogram(
categories, budget.getEpsilon() / 3);
return new AnalyticsResult(privateMean, privateCount, privateHistogram);
}
public PrivateAggregationResult aggregatePrivateData(
List<PrivateDataContributor> contributors,
AggregationFunction function,
double epsilon) {
// Secure aggregation using MPC techniques
List<BigInteger> localAggregates = contributors.stream()
.map(contributor -> contributor.computeLocalAggregate())
.collect(Collectors.toList());
// Add differential privacy noise
List<BigInteger> noisyAggregates = localAggregates.stream()
.map(agg -> addDifferentialPrivacy(agg, epsilon))
.collect(Collectors.toList());
// Combine aggregates (in practice, this would be done securely)
BigInteger finalResult = noisyAggregates.stream()
.reduce(BigInteger.ZERO, BigInteger::add);
return new PrivateAggregationResult(finalResult, epsilon);
}
// Time-series analytics with privacy
public List<TimeSeriesPoint> computePrivateTimeSeries(
List<Event> events,
Duration interval,
double epsilon) {
Map<Instant, List<Event>> timeBuckets = events.stream()
.collect(Collectors.groupingBy(event ->
roundToInterval(event.getTimestamp(), interval)));
List<TimeSeriesPoint> privateSeries = new ArrayList<>();
double epsilonPerPoint = epsilon / timeBuckets.size();
for (Map.Entry<Instant, List<Event>> entry : timeBuckets.entrySet()) {
long trueCount = entry.getValue().size();
long privateCount = dpService.noisyCount(trueCount, epsilonPerPoint);
privateSeries.add(new TimeSeriesPoint(entry.getKey(), privateCount));
}
return privateSeries;
}
private List<String> extractQuasiIdentifiers(AnalyticsRecord record) {
return Arrays.asList(
record.getAgeGroup(),
record.getZipCode().substring(0, 3), // First 3 digits
record.getGender()
);
}
private Map<String, Long> extractCategories(List<AnonymizedRecord<AnalyticsRecord>> records) {
return records.stream()
.map(record -> record.getOriginal().getCategory())
.collect(Collectors.groupingBy(
category -> category,
Collectors.counting()
));
}
private BigInteger addDifferentialPrivacy(BigInteger value, double epsilon) {
double noisyValue = dpService.addLaplaceNoise(
value.doubleValue(),
1.0 / epsilon
);
return BigInteger.valueOf(Math.max(0, Math.round(noisyValue)));
}
private Instant roundToInterval(Instant timestamp, Duration interval) {
long intervalMillis = interval.toMillis();
long rounded = (timestamp.toEpochMilli() / intervalMillis) * intervalMillis;
return Instant.ofEpochMilli(rounded);
}
}
Configuration and Security
application.yml
privacy: epsilon: 1.0 delta: 0.00001 k-anonymity: 5 l-diversity: 2 analytics: max-records: 1000000 allowed-queries: - COUNT - SUM - MEAN - HISTOGRAM security: data-retention-days: 30 audit-logging: true access-control: strict
Security Configuration
@Configuration
@EnableWebSecurity
public class PrivacySecurityConfig extends WebSecurityConfigurerAdapter {
@Override
protected void configure(HttpSecurity http) throws Exception {
http
.authorizeRequests()
.antMatchers("/api/analytics/public").permitAll()
.antMatchers("/api/analytics/private").hasRole("ANALYST")
.antMatchers("/api/analytics/raw").hasRole("ADMIN")
.anyRequest().authenticated()
.and()
.csrf().disable() // For API, consider keeping CSRF for web interfaces
.headers()
.contentSecurityPolicy("default-src 'self'");
}
}
Monitoring and Audit
@Service
public class PrivacyAuditService {
private final AuditRepository auditRepository;
public PrivacyAuditService(AuditRepository auditRepository) {
this.auditRepository = auditRepository;
}
public void logQueryExecution(String userId, String queryType,
double epsilonUsed, int recordsProcessed) {
AuditRecord record = new AuditRecord(
UUID.randomUUID().toString(),
userId,
queryType,
Instant.now(),
epsilonUsed,
recordsProcessed,
"QUERY_EXECUTED"
);
auditRepository.save(record);
}
public PrivacyBudget getRemainingBudget(String userId, Duration period) {
Instant startTime = Instant.now().minus(period);
List<AuditRecord> recentQueries = auditRepository.findByUserIdAndTimestampAfter(
userId, startTime);
double totalEpsilonUsed = recentQueries.stream()
.mapToDouble(AuditRecord::getEpsilonUsed)
.sum();
double maxEpsilon = 10.0; // Daily budget
double remaining = Math.max(0, maxEpsilon - totalEpsilonUsed);
return new PrivacyBudget(remaining, period);
}
public ComplianceReport generateComplianceReport(Instant from, Instant to) {
List<AuditRecord> records = auditRepository.findByTimestampBetween(from, to);
Map<String, Double> epsilonByUser = records.stream()
.collect(Collectors.groupingBy(
AuditRecord::getUserId,
Collectors.summingDouble(AuditRecord::getEpsilonUsed)
));
return new ComplianceReport(records.size(), epsilonByUser);
}
}
Testing
@SpringBootTest
class PrivacyPreservingAnalyticsTest {
@Autowired
private DifferentialPrivacyService dpService;
@Autowired
private PrivacyPreservingAnalyticsEngine analyticsEngine;
@Test
void testDifferentialPrivacy() {
List<Double> values = Arrays.asList(10.0, 20.0, 30.0, 40.0, 50.0);
double trueMean = values.stream().mapToDouble(Double::doubleValue).average().orElse(0.0);
double privateMean = dpService.noisyMean(values, 1.0);
// The private mean should be close to true mean but not exact
assertTrue(Math.abs(privateMean - trueMean) < 10.0); // Reasonable bound
}
@Test
void testKAnonymity() {
List<TestRecord> records = createTestRecords();
int k = 3;
List<AnonymizedRecord<TestRecord>> anonymized =
anonymizationService.applyKAnonymity(records, this::getQuasiIdentifiers, k);
// Verify all groups have at least k records or are generalized
Map<List<String>, Long> groupSizes = anonymized.stream()
.collect(Collectors.groupingBy(
AnonymizedRecord::getQuasiIdentifiers,
Collectors.counting()
));
assertTrue(groupSizes.values().stream().allMatch(size -> size >= k));
}
@Test
void testHomomorphicOperations() {
double a = 5.0;
double b = 3.0;
Ciphertext encryptedA = heService.encrypt(a);
Ciphertext encryptedB = heService.encrypt(b);
Ciphertext encryptedSum = heService.add(encryptedA, encryptedB);
double decryptedSum = heService.decrypt(encryptedSum);
assertEquals(a + b, decryptedSum, 0.001);
}
}
Best Practices
- Privacy Budget Management: Track and limit epsilon consumption
- Data Minimization: Only collect necessary data
- Access Controls: Strict controls on raw data access
- Audit Logging: Comprehensive logging of all data accesses
- Encryption: Encrypt data at rest and in transit
- Regular Audits: Conduct privacy impact assessments
- Transparency: Document privacy practices and techniques used
This implementation provides a comprehensive foundation for privacy-preserving analytics in Java, enabling organizations to derive insights from data while protecting individual privacy through state-of-the-art techniques.