K-Anonymity Implementation in Java

Introduction

K-Anonymity is a privacy-preserving data anonymization technique that ensures each record in a dataset is indistinguishable from at least k-1 other records. This implementation provides comprehensive K-Anonymity with generalization, suppression, and risk analysis.

Core K-Anonymity Models

Data Structures and Domain Models

package com.kanonymity.core;
import java.util.*;
import java.util.stream.Collectors;
public class DataRecord {
private final String id;
private final Map<String, Object> attributes;
private final Set<String> quasiIdentifiers;
private final Set<String> sensitiveAttributes;
public DataRecord(String id, Map<String, Object> attributes, 
Set<String> quasiIdentifiers, Set<String> sensitiveAttributes) {
this.id = id;
this.attributes = new LinkedHashMap<>(attributes);
this.quasiIdentifiers = new HashSet<>(quasiIdentifiers);
this.sensitiveAttributes = new HashSet<>(sensitiveAttributes);
}
public Object getAttribute(String attribute) {
return attributes.get(attribute);
}
public void setAttribute(String attribute, Object value) {
attributes.put(attribute, value);
}
public boolean isQuasiIdentifier(String attribute) {
return quasiIdentifiers.contains(attribute);
}
public boolean isSensitiveAttribute(String attribute) {
return sensitiveAttributes.contains(attribute);
}
public Map<String, Object> getQuasiIdentifierValues() {
return attributes.entrySet().stream()
.filter(entry -> quasiIdentifiers.contains(entry.getKey()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}
public Map<String, Object> getSensitiveValues() {
return attributes.entrySet().stream()
.filter(entry -> sensitiveAttributes.contains(entry.getKey()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}
// Getters
public String getId() { return id; }
public Map<String, Object> getAttributes() { return new LinkedHashMap<>(attributes); }
public Set<String> getQuasiIdentifiers() { return new HashSet<>(quasiIdentifiers); }
public Set<String> getSensitiveAttributes() { return new HashSet<>(sensitiveAttributes); }
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
DataRecord that = (DataRecord) o;
return id.equals(that.id);
}
@Override
public int hashCode() {
return Objects.hash(id);
}
@Override
public String toString() {
return "DataRecord{id='" + id + "', attributes=" + attributes + "}";
}
}
public class EquivalenceClass {
private final Map<String, Object> quasiIdentifierValues;
private final List<DataRecord> records;
private final int size;
public EquivalenceClass(Map<String, Object> quasiIdentifierValues) {
this.quasiIdentifierValues = new LinkedHashMap<>(quasiIdentifierValues);
this.records = new ArrayList<>();
this.size = 0;
}
public void addRecord(DataRecord record) {
records.add(record);
}
public int size() {
return records.size();
}
public boolean isEmpty() {
return records.isEmpty();
}
public Map<String, Object> getQuasiIdentifierValues() {
return new LinkedHashMap<>(quasiIdentifierValues);
}
public List<DataRecord> getRecords() {
return new ArrayList<>(records);
}
public Map<String, List<Object>> getSensitiveAttributeDistributions() {
Map<String, List<Object>> distributions = new HashMap<>();
for (DataRecord record : records) {
Map<String, Object> sensitiveValues = record.getSensitiveValues();
for (Map.Entry<String, Object> entry : sensitiveValues.entrySet()) {
distributions.computeIfAbsent(entry.getKey(), k -> new ArrayList<>())
.add(entry.getValue());
}
}
return distributions;
}
public double calculateDiversity(String sensitiveAttribute) {
List<Object> values = getSensitiveAttributeDistributions()
.getOrDefault(sensitiveAttribute, Collections.emptyList());
if (values.isEmpty()) return 0.0;
long distinctCount = values.stream().distinct().count();
return (double) distinctCount / values.size();
}
@Override
public String toString() {
return String.format("EquivalenceClass{size=%d, QI=%s}", size(), quasiIdentifierValues);
}
}

Generalization Hierarchies

Domain Generalization Implementation

package com.kanonymity.generalization;
import java.util.*;
public interface GeneralizationHierarchy {
String getName();
String getAttribute();
Object generalize(Object value, int level);
int getMaxLevel();
boolean supportsValue(Object value);
List<Object> getDomain();
}
public class NumericRangeHierarchy implements GeneralizationHierarchy {
private final String attribute;
private final List<Range> levels;
private final String format;
public NumericRangeHierarchy(String attribute, List<Range> levels, String format) {
this.attribute = attribute;
this.levels = new ArrayList<>(levels);
this.format = format;
}
@Override
public String getName() {
return "NumericRangeHierarchy-" + attribute;
}
@Override
public String getAttribute() {
return attribute;
}
@Override
public Object generalize(Object value, int level) {
if (level < 0 || level >= levels.size()) {
throw new IllegalArgumentException("Invalid generalization level: " + level);
}
if (!(value instanceof Number)) {
throw new IllegalArgumentException("Value must be numeric: " + value);
}
double numericValue = ((Number) value).doubleValue();
Range range = levels.get(level);
return range.contains(numericValue) ? range.toString() : "OutOfRange";
}
@Override
public int getMaxLevel() {
return levels.size() - 1;
}
@Override
public boolean supportsValue(Object value) {
return value instanceof Number;
}
@Override
public List<Object> getDomain() {
return levels.stream()
.map(Range::toString)
.collect(Collectors.toList());
}
public static class Range {
private final double min;
private final double max;
private final boolean inclusiveMin;
private final boolean inclusiveMax;
public Range(double min, double max, boolean inclusiveMin, boolean inclusiveMax) {
this.min = min;
this.max = max;
this.inclusiveMin = inclusiveMin;
this.inclusiveMax = inclusiveMax;
}
public boolean contains(double value) {
boolean lowerBound = inclusiveMin ? value >= min : value > min;
boolean upperBound = inclusiveMax ? value <= max : value < max;
return lowerBound && upperBound;
}
@Override
public String toString() {
String lower = inclusiveMin ? "[" : "(";
String upper = inclusiveMax ? "]" : ")";
return String.format("%s%.1f-%.1f%s", lower, min, max, upper);
}
}
}
public class CategoricalHierarchy implements GeneralizationHierarchy {
private final String attribute;
private final Map<Object, List<Object>> hierarchy;
private final int maxLevel;
public CategoricalHierarchy(String attribute, Map<Object, List<Object>> hierarchy) {
this.attribute = attribute;
this.hierarchy = new HashMap<>();
// Build the hierarchy mapping
for (Map.Entry<Object, List<Object>> entry : hierarchy.entrySet()) {
this.hierarchy.put(entry.getKey(), new ArrayList<>(entry.getValue()));
}
this.maxLevel = calculateMaxLevel();
}
@Override
public String getName() {
return "CategoricalHierarchy-" + attribute;
}
@Override
public String getAttribute() {
return attribute;
}
@Override
public Object generalize(Object value, int level) {
if (level < 0 || level > maxLevel) {
throw new IllegalArgumentException("Invalid generalization level: " + level);
}
if (level == 0) {
return value; // No generalization
}
Object current = value;
for (int i = 0; i < level; i++) {
current = getParent(current);
if (current == null) {
return "OTHER"; // No parent found
}
}
return current;
}
@Override
public int getMaxLevel() {
return maxLevel;
}
@Override
public boolean supportsValue(Object value) {
return hierarchy.containsKey(value) || isChildValue(value);
}
@Override
public List<Object> getDomain() {
Set<Object> domain = new HashSet<>();
domain.addAll(hierarchy.keySet());
hierarchy.values().forEach(domain::addAll);
return new ArrayList<>(domain);
}
private Object getParent(Object child) {
for (Map.Entry<Object, List<Object>> entry : hierarchy.entrySet()) {
if (entry.getValue().contains(child)) {
return entry.getKey();
}
}
return null;
}
private boolean isChildValue(Object value) {
return hierarchy.values().stream()
.anyMatch(children -> children.contains(value));
}
private int calculateMaxLevel() {
int max = 0;
for (Object value : hierarchy.keySet()) {
max = Math.max(max, calculateDepth(value));
}
return max;
}
private int calculateDepth(Object value) {
int depth = 0;
Object current = value;
while (getParent(current) != null) {
depth++;
current = getParent(current);
}
return depth;
}
}
public class DateHierarchy implements GeneralizationHierarchy {
private final String attribute;
private final List<DateGranularity> levels;
public DateHierarchy(String attribute, List<DateGranularity> levels) {
this.attribute = attribute;
this.levels = new ArrayList<>(levels);
}
@Override
public String getName() {
return "DateHierarchy-" + attribute;
}
@Override
public String getAttribute() {
return attribute;
}
@Override
public Object generalize(Object value, int level) {
if (level < 0 || level >= levels.size()) {
throw new IllegalArgumentException("Invalid generalization level: " + level);
}
if (!(value instanceof Date)) {
throw new IllegalArgumentException("Value must be a Date: " + value);
}
Date date = (Date) value;
DateGranularity granularity = levels.get(level);
Calendar calendar = Calendar.getInstance();
calendar.setTime(date);
switch (granularity) {
case YEAR:
return String.format("%d", calendar.get(Calendar.YEAR));
case YEAR_MONTH:
return String.format("%d-%02d", 
calendar.get(Calendar.YEAR), 
calendar.get(Calendar.MONTH) + 1);
case YEAR_QUARTER:
int quarter = (calendar.get(Calendar.MONTH) / 3) + 1;
return String.format("%d-Q%d", calendar.get(Calendar.YEAR), quarter);
case FULL:
default:
return date.toString();
}
}
@Override
public int getMaxLevel() {
return levels.size() - 1;
}
@Override
public boolean supportsValue(Object value) {
return value instanceof Date;
}
@Override
public List<Object> getDomain() {
return levels.stream()
.map(DateGranularity::name)
.collect(Collectors.toList());
}
public enum DateGranularity {
FULL, YEAR_QUARTER, YEAR_MONTH, YEAR
}
}

K-Anonymity Algorithm Implementation

Core K-Anonymity Engine

package com.kanonymity.algorithm;
import com.kanonymity.core.*;
import com.kanonymity.generalization.GeneralizationHierarchy;
import java.util.*;
import java.util.stream.Collectors;
public class KAnonymityEngine {
private final int k;
private final List<DataRecord> dataset;
private final Set<String> quasiIdentifiers;
private final Map<String, GeneralizationHierarchy> hierarchies;
private final SuppressionStrategy suppressionStrategy;
public KAnonymityEngine(int k, List<DataRecord> dataset, 
Set<String> quasiIdentifiers,
Map<String, GeneralizationHierarchy> hierarchies,
SuppressionStrategy suppressionStrategy) {
if (k < 1) {
throw new IllegalArgumentException("K must be at least 1");
}
this.k = k;
this.dataset = new ArrayList<>(dataset);
this.quasiIdentifiers = new HashSet<>(quasiIdentifiers);
this.hierarchies = new HashMap<>(hierarchies);
this.suppressionStrategy = suppressionStrategy;
}
public KAnonymityResult anonymize() {
return anonymizeWithLevels(Collections.emptyMap());
}
public KAnonymityResult anonymizeWithLevels(Map<String, Integer> generalizationLevels) {
List<DataRecord> anonymizedData = new ArrayList<>();
Map<String, Integer> actualLevels = new HashMap<>(generalizationLevels);
// Apply generalization
List<DataRecord> generalizedData = applyGeneralization(actualLevels);
// Form equivalence classes
List<EquivalenceClass> equivalenceClasses = formEquivalenceClasses(generalizedData);
// Apply suppression if needed
List<EquivalenceClass> suppressedClasses = applySuppression(equivalenceClasses);
// Build anonymized dataset
for (EquivalenceClass eqClass : suppressedClasses) {
for (DataRecord record : eqClass.getRecords()) {
anonymizedData.add(createAnonymizedRecord(record, eqClass));
}
}
// Calculate metrics
KAnonymityMetrics metrics = calculateMetrics(suppressedClasses, actualLevels);
return new KAnonymityResult(anonymizedData, suppressedClasses, metrics, actualLevels);
}
private List<DataRecord> applyGeneralization(Map<String, Integer> levels) {
List<DataRecord> generalized = new ArrayList<>();
for (DataRecord record : dataset) {
DataRecord generalizedRecord = new DataRecord(
record.getId(),
record.getAttributes(),
record.getQuasiIdentifiers(),
record.getSensitiveAttributes()
);
for (String qi : quasiIdentifiers) {
if (hierarchies.containsKey(qi)) {
GeneralizationHierarchy hierarchy = hierarchies.get(qi);
Object originalValue = record.getAttribute(qi);
int level = levels.getOrDefault(qi, 0);
if (hierarchy.supportsValue(originalValue)) {
Object generalizedValue = hierarchy.generalize(originalValue, level);
generalizedRecord.setAttribute(qi, generalizedValue);
}
}
}
generalized.add(generalizedRecord);
}
return generalized;
}
private List<EquivalenceClass> formEquivalenceClasses(List<DataRecord> data) {
Map<String, EquivalenceClass> classMap = new HashMap<>();
for (DataRecord record : data) {
Map<String, Object> qiValues = record.getQuasiIdentifierValues();
String key = generateEquivalenceKey(qiValues);
EquivalenceClass eqClass = classMap.computeIfAbsent(key, 
k -> new EquivalenceClass(qiValues));
eqClass.addRecord(record);
}
return new ArrayList<>(classMap.values());
}
private List<EquivalenceClass> applySuppression(List<EquivalenceClass> classes) {
return classes.stream()
.filter(eqClass -> suppressionStrategy.shouldKeep(eqClass, k))
.collect(Collectors.toList());
}
private DataRecord createAnonymizedRecord(DataRecord original, EquivalenceClass eqClass) {
Map<String, Object> anonymizedAttributes = new LinkedHashMap<>(original.getAttributes());
// Replace QI values with equivalence class values
Map<String, Object> qiValues = eqClass.getQuasiIdentifierValues();
for (String qi : quasiIdentifiers) {
if (qiValues.containsKey(qi)) {
anonymizedAttributes.put(qi, qiValues.get(qi));
}
}
return new DataRecord(
original.getId(),
anonymizedAttributes,
original.getQuasiIdentifiers(),
original.getSensitiveAttributes()
);
}
private KAnonymityMetrics calculateMetrics(List<EquivalenceClass> classes, 
Map<String, Integer> levels) {
int totalRecords = classes.stream().mapToInt(EquivalenceClass::size).sum();
int suppressedRecords = dataset.size() - totalRecords;
double suppressionRate = (double) suppressedRecords / dataset.size();
// Calculate average equivalence class size
double avgClassSize = classes.stream()
.mapToInt(EquivalenceClass::size)
.average()
.orElse(0.0);
// Calculate minimum equivalence class size
int minClassSize = classes.stream()
.mapToInt(EquivalenceClass::size)
.min()
.orElse(0);
// Check k-anonymity compliance
boolean isKAnonymous = classes.stream()
.allMatch(eqClass -> eqClass.size() >= k);
// Calculate information loss
double informationLoss = calculateInformationLoss(levels);
return new KAnonymityMetrics(
totalRecords,
suppressedRecords,
suppressionRate,
classes.size(),
avgClassSize,
minClassSize,
isKAnonymous,
informationLoss
);
}
private double calculateInformationLoss(Map<String, Integer> levels) {
double totalLoss = 0.0;
int count = 0;
for (String qi : quasiIdentifiers) {
if (hierarchies.containsKey(qi)) {
GeneralizationHierarchy hierarchy = hierarchies.get(qi);
int level = levels.getOrDefault(qi, 0);
int maxLevel = hierarchy.getMaxLevel();
if (maxLevel > 0) {
double attributeLoss = (double) level / maxLevel;
totalLoss += attributeLoss;
count++;
}
}
}
return count > 0 ? totalLoss / count : 0.0;
}
private String generateEquivalenceKey(Map<String, Object> qiValues) {
return qiValues.entrySet().stream()
.sorted(Map.Entry.comparingByKey())
.map(entry -> entry.getKey() + "=" + entry.getValue())
.collect(Collectors.joining("|"));
}
}
public class KAnonymityResult {
private final List<DataRecord> anonymizedData;
private final List<EquivalenceClass> equivalenceClasses;
private final KAnonymityMetrics metrics;
private final Map<String, Integer> generalizationLevels;
public KAnonymityResult(List<DataRecord> anonymizedData,
List<EquivalenceClass> equivalenceClasses,
KAnonymityMetrics metrics,
Map<String, Integer> generalizationLevels) {
this.anonymizedData = new ArrayList<>(anonymizedData);
this.equivalenceClasses = new ArrayList<>(equivalenceClasses);
this.metrics = metrics;
this.generalizationLevels = new HashMap<>(generalizationLevels);
}
// Getters
public List<DataRecord> getAnonymizedData() { return anonymizedData; }
public List<EquivalenceClass> getEquivalenceClasses() { return equivalenceClasses; }
public KAnonymityMetrics getMetrics() { return metrics; }
public Map<String, Integer> getGeneralizationLevels() { return generalizationLevels; }
}
public class KAnonymityMetrics {
private final int totalRecords;
private final int suppressedRecords;
private final double suppressionRate;
private final int numEquivalenceClasses;
private final double avgClassSize;
private final int minClassSize;
private final boolean isKAnonymous;
private final double informationLoss;
public KAnonymityMetrics(int totalRecords, int suppressedRecords, double suppressionRate,
int numEquivalenceClasses, double avgClassSize, int minClassSize,
boolean isKAnonymous, double informationLoss) {
this.totalRecords = totalRecords;
this.suppressedRecords = suppressedRecords;
this.suppressionRate = suppressionRate;
this.numEquivalenceClasses = numEquivalenceClasses;
this.avgClassSize = avgClassSize;
this.minClassSize = minClassSize;
this.isKAnonymous = isKAnonymous;
this.informationLoss = informationLoss;
}
// Getters
public int getTotalRecords() { return totalRecords; }
public int getSuppressedRecords() { return suppressedRecords; }
public double getSuppressionRate() { return suppressionRate; }
public int getNumEquivalenceClasses() { return numEquivalenceClasses; }
public double getAvgClassSize() { return avgClassSize; }
public int getMinClassSize() { return minClassSize; }
public boolean isKAnonymous() { return isKAnonymous; }
public double getInformationLoss() { return informationLoss; }
@Override
public String toString() {
return String.format(
"KAnonymityMetrics{records=%d, suppressed=%d, suppressionRate=%.2f, " +
"classes=%d, avgSize=%.2f, minSize=%d, kAnonymous=%s, infoLoss=%.2f}",
totalRecords, suppressedRecords, suppressionRate, numEquivalenceClasses,
avgClassSize, minClassSize, isKAnonymous, informationLoss);
}
}

Suppression Strategies

Flexible Suppression Implementation

package com.kanonymity.suppression;
import com.kanonymity.core.EquivalenceClass;
public interface SuppressionStrategy {
boolean shouldKeep(EquivalenceClass eqClass, int k);
default String getName() {
return this.getClass().getSimpleName();
}
}
public class ThresholdSuppression implements SuppressionStrategy {
private final double suppressionThreshold;
public ThresholdSuppression(double suppressionThreshold) {
if (suppressionThreshold < 0 || suppressionThreshold > 1) {
throw new IllegalArgumentException("Suppression threshold must be between 0 and 1");
}
this.suppressionThreshold = suppressionThreshold;
}
@Override
public boolean shouldKeep(EquivalenceClass eqClass, int k) {
return eqClass.size() >= k;
}
public double getSuppressionThreshold() {
return suppressionThreshold;
}
}
public class DiversityAwareSuppression implements SuppressionStrategy {
private final double minDiversity;
private final String sensitiveAttribute;
public DiversityAwareSuppression(String sensitiveAttribute, double minDiversity) {
if (minDiversity < 0 || minDiversity > 1) {
throw new IllegalArgumentException("Minimum diversity must be between 0 and 1");
}
this.sensitiveAttribute = sensitiveAttribute;
this.minDiversity = minDiversity;
}
@Override
public boolean shouldKeep(EquivalenceClass eqClass, int k) {
if (eqClass.size() < k) {
return false;
}
double diversity = eqClass.calculateDiversity(sensitiveAttribute);
return diversity >= minDiversity;
}
public double getMinDiversity() {
return minDiversity;
}
public String getSensitiveAttribute() {
return sensitiveAttribute;
}
}
public class AdaptiveSuppression implements SuppressionStrategy {
private final int maxSuppressionPercent;
private int currentSuppressed;
private int totalProcessed;
public AdaptiveSuppression(int maxSuppressionPercent) {
if (maxSuppressionPercent < 0 || maxSuppressionPercent > 100) {
throw new IllegalArgumentException("Max suppression percent must be between 0 and 100");
}
this.maxSuppressionPercent = maxSuppressionPercent;
this.currentSuppressed = 0;
this.totalProcessed = 0;
}
@Override
public boolean shouldKeep(EquivalenceClass eqClass, int k) {
totalProcessed += eqClass.size();
if (eqClass.size() >= k) {
return true;
}
// Check if we can afford to suppress this class
double currentRate = (double) currentSuppressed / totalProcessed * 100;
if (currentRate + ((double) eqClass.size() / totalProcessed * 100) <= maxSuppressionPercent) {
currentSuppressed += eqClass.size();
return false;
}
return true; // Keep to avoid exceeding suppression limit
}
public void reset() {
currentSuppressed = 0;
totalProcessed = 0;
}
public double getCurrentSuppressionRate() {
return totalProcessed > 0 ? (double) currentSuppressed / totalProcessed * 100 : 0;
}
}

Risk Analysis and Metrics

Privacy Risk Assessment

package com.kanonymity.risk;
import com.kanonymity.core.DataRecord;
import com.kanonymity.core.EquivalenceClass;
import java.util.*;
import java.util.stream.Collectors;
public class RiskAnalyzer {
public RiskAssessment analyzeReidentificationRisk(List<EquivalenceClass> equivalenceClasses, 
List<DataRecord> originalData) {
double avgRisk = calculateAverageReidentificationRisk(equivalenceClasses);
double maxRisk = calculateMaximumReidentificationRisk(equivalenceClasses);
double minRisk = calculateMinimumReidentificationRisk(equivalenceClasses);
Map<String, Double> attributeRisk = calculateAttributeRiskContribution(equivalenceClasses);
double homogeneityRisk = calculateHomogeneityRisk(equivalenceClasses);
return new RiskAssessment(avgRisk, maxRisk, minRisk, attributeRisk, homogeneityRisk);
}
public double calculateAverageReidentificationRisk(List<EquivalenceClass> equivalenceClasses) {
return equivalenceClasses.stream()
.mapToDouble(eqClass -> 1.0 / eqClass.size())
.average()
.orElse(0.0);
}
public double calculateMaximumReidentificationRisk(List<EquivalenceClass> equivalenceClasses) {
return equivalenceClasses.stream()
.mapToDouble(eqClass -> 1.0 / eqClass.size())
.max()
.orElse(0.0);
}
public double calculateMinimumReidentificationRisk(List<EquivalenceClass> equivalenceClasses) {
return equivalenceClasses.stream()
.mapToDouble(eqClass -> 1.0 / eqClass.size())
.min()
.orElse(0.0);
}
public Map<String, Double> calculateAttributeRiskContribution(List<EquivalenceClass> equivalenceClasses) {
// This is a simplified implementation
// In practice, you'd need more sophisticated analysis
Map<String, Double> riskContributions = new HashMap<>();
// Analyze how much each attribute contributes to distinguishing records
// For now, return equal distribution
if (!equivalenceClasses.isEmpty()) {
EquivalenceClass firstClass = equivalenceClasses.get(0);
Map<String, Object> qiValues = firstClass.getQuasiIdentifierValues();
double baseRisk = 1.0 / qiValues.size();
for (String attribute : qiValues.keySet()) {
riskContributions.put(attribute, baseRisk);
}
}
return riskContributions;
}
public double calculateHomogeneityRisk(List<EquivalenceClass> equivalenceClasses) {
// Calculate risk from sensitive attribute homogeneity
double totalHomogeneityRisk = 0.0;
int count = 0;
for (EquivalenceClass eqClass : equivalenceClasses) {
Map<String, List<Object>> distributions = eqClass.getSensitiveAttributeDistributions();
for (List<Object> values : distributions.values()) {
if (!values.isEmpty()) {
long distinctCount = values.stream().distinct().count();
double homogeneity = 1.0 - ((double) distinctCount / values.size());
totalHomogeneityRisk += homogeneity;
count++;
}
}
}
return count > 0 ? totalHomogeneityRisk / count : 0.0;
}
public Map<String, Double> calculateAttributeSensitivity(List<DataRecord> dataset, 
Set<String> quasiIdentifiers) {
Map<String, Double> sensitivityScores = new HashMap<>();
for (String qi : quasiIdentifiers) {
double distinctValues = dataset.stream()
.map(record -> record.getAttribute(qi))
.distinct()
.count();
double sensitivity = distinctValues / dataset.size();
sensitivityScores.put(qi, sensitivity);
}
return sensitivityScores;
}
}
public class RiskAssessment {
private final double averageReidentificationRisk;
private final double maximumReidentificationRisk;
private final double minimumReidentificationRisk;
private final Map<String, Double> attributeRiskContributions;
private final double homogeneityRisk;
private final Date assessmentDate;
public RiskAssessment(double averageReidentificationRisk, double maximumReidentificationRisk,
double minimumReidentificationRisk, Map<String, Double> attributeRiskContributions,
double homogeneityRisk) {
this.averageReidentificationRisk = averageReidentificationRisk;
this.maximumReidentificationRisk = maximumReidentificationRisk;
this.minimumReidentificationRisk = minimumReidentificationRisk;
this.attributeRiskContributions = new HashMap<>(attributeRiskContributions);
this.homogeneityRisk = homogeneityRisk;
this.assessmentDate = new Date();
}
// Getters
public double getAverageReidentificationRisk() { return averageReidentificationRisk; }
public double getMaximumReidentificationRisk() { return maximumReidentificationRisk; }
public double getMinimumReidentificationRisk() { return minimumReidentificationRisk; }
public Map<String, Double> getAttributeRiskContributions() { return new HashMap<>(attributeRiskContributions); }
public double getHomogeneityRisk() { return homogeneityRisk; }
public Date getAssessmentDate() { return assessmentDate; }
public boolean isAcceptableRisk(double threshold) {
return maximumReidentificationRisk <= threshold;
}
@Override
public String toString() {
return String.format(
"RiskAssessment{avgRisk=%.4f, maxRisk=%.4f, minRisk=%.4f, homogeneityRisk=%.4f}",
averageReidentificationRisk, maximumReidentificationRisk, 
minimumReidentificationRisk, homogeneityRisk);
}
}

Spring Boot Integration

Configuration and REST API

package com.kanonymity.spring;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.bind.annotation.*;
import java.util.*;
@Configuration
@ConfigurationProperties(prefix = "kanonymity")
public class KAnonymityConfig {
private int defaultK = 3;
private double defaultSuppressionThreshold = 0.1;
private Map<String, HierarchyConfig> hierarchies = new HashMap<>();
@Bean
public SuppressionStrategy suppressionStrategy() {
return new ThresholdSuppression(defaultSuppressionThreshold);
}
@Bean
public RiskAnalyzer riskAnalyzer() {
return new RiskAnalyzer();
}
// Getters and setters
public int getDefaultK() { return defaultK; }
public void setDefaultK(int defaultK) { this.defaultK = defaultK; }
public double getDefaultSuppressionThreshold() { return defaultSuppressionThreshold; }
public void setDefaultSuppressionThreshold(double threshold) { 
this.defaultSuppressionThreshold = threshold; 
}
public Map<String, HierarchyConfig> getHierarchies() { return hierarchies; }
public void setHierarchies(Map<String, HierarchyConfig> hierarchies) { 
this.hierarchies = hierarchies; 
}
public static class HierarchyConfig {
private String type;
private Map<String, Object> properties = new HashMap<>();
// Getters and setters
public String getType() { return type; }
public void setType(String type) { this.type = type; }
public Map<String, Object> getProperties() { return properties; }
public void setProperties(Map<String, Object> properties) { 
this.properties = properties; 
}
}
}
@RestController
@RequestMapping("/api/kanonymity")
public class KAnonymityController {
@Autowired
private SuppressionStrategy suppressionStrategy;
@Autowired
private RiskAnalyzer riskAnalyzer;
@PostMapping("/anonymize")
public ResponseEntity<KAnonymityResponse> anonymizeData(
@RequestBody KAnonymityRequest request) {
try {
// Build generalization hierarchies
Map<String, GeneralizationHierarchy> hierarchies = 
buildHierarchies(request.getHierarchyConfigs());
// Create K-Anonymity engine
KAnonymityEngine engine = new KAnonymityEngine(
request.getK(),
request.getDataset(),
request.getQuasiIdentifiers(),
hierarchies,
suppressionStrategy
);
// Perform anonymization
KAnonymityResult result = request.getGeneralizationLevels() != null ?
engine.anonymizeWithLevels(request.getGeneralizationLevels()) :
engine.anonymize();
// Perform risk assessment
RiskAssessment riskAssessment = riskAnalyzer.analyzeReidentificationRisk(
result.getEquivalenceClasses(), request.getDataset());
KAnonymityResponse response = new KAnonymityResponse(
result, riskAssessment, "SUCCESS");
return ResponseEntity.ok(response);
} catch (Exception e) {
KAnonymityResponse response = new KAnonymityResponse(
null, null, "ERROR: " + e.getMessage());
return ResponseEntity.badRequest().body(response);
}
}
@PostMapping("/analyze-risk")
public ResponseEntity<RiskAssessment> analyzeRisk(@RequestBody RiskAnalysisRequest request) {
try {
RiskAssessment assessment = riskAnalyzer.analyzeReidentificationRisk(
request.getEquivalenceClasses(), request.getOriginalData());
return ResponseEntity.ok(assessment);
} catch (Exception e) {
return ResponseEntity.badRequest().build();
}
}
private Map<String, GeneralizationHierarchy> buildHierarchies(
Map<String, Object> hierarchyConfigs) {
// Implementation to build hierarchies from configuration
return new HashMap<>();
}
}
// Request/Response DTOs
class KAnonymityRequest {
private int k;
private List<DataRecord> dataset;
private Set<String> quasiIdentifiers;
private Set<String> sensitiveAttributes;
private Map<String, Object> hierarchyConfigs;
private Map<String, Integer> generalizationLevels;
// Getters and setters
public int getK() { return k; }
public void setK(int k) { this.k = k; }
public List<DataRecord> getDataset() { return dataset; }
public void setDataset(List<DataRecord> dataset) { this.dataset = dataset; }
public Set<String> getQuasiIdentifiers() { return quasiIdentifiers; }
public void setQuasiIdentifiers(Set<String> quasiIdentifiers) { this.quasiIdentifiers = quasiIdentifiers; }
public Set<String> getSensitiveAttributes() { return sensitiveAttributes; }
public void setSensitiveAttributes(Set<String> sensitiveAttributes) { this.sensitiveAttributes = sensitiveAttributes; }
public Map<String, Object> getHierarchyConfigs() { return hierarchyConfigs; }
public void setHierarchyConfigs(Map<String, Object> hierarchyConfigs) { this.hierarchyConfigs = hierarchyConfigs; }
public Map<String, Integer> getGeneralizationLevels() { return generalizationLevels; }
public void setGeneralizationLevels(Map<String, Integer> generalizationLevels) { this.generalizationLevels = generalizationLevels; }
}
class KAnonymityResponse {
private KAnonymityResult result;
private RiskAssessment riskAssessment;
private String status;
public KAnonymityResponse(KAnonymityResult result, RiskAssessment riskAssessment, String status) {
this.result = result;
this.riskAssessment = riskAssessment;
this.status = status;
}
// Getters and setters
public KAnonymityResult getResult() { return result; }
public void setResult(KAnonymityResult result) { this.result = result; }
public RiskAssessment getRiskAssessment() { return riskAssessment; }
public void setRiskAssessment(RiskAssessment riskAssessment) { this.riskAssessment = riskAssessment; }
public String getStatus() { return status; }
public void setStatus(String status) { this.status = status; }
}
class RiskAnalysisRequest {
private List<EquivalenceClass> equivalenceClasses;
private List<DataRecord> originalData;
// Getters and setters
public List<EquivalenceClass> getEquivalenceClasses() { return equivalenceClasses; }
public void setEquivalenceClasses(List<EquivalenceClass> equivalenceClasses) { this.equivalenceClasses = equivalenceClasses; }
public List<DataRecord> getOriginalData() { return originalData; }
public void setOriginalData(List<DataRecord> originalData) { this.originalData = originalData; }
}

Example Usage and Testing

Comprehensive Example

package com.kanonymity.example;
import com.kanonymity.algorithm.*;
import com.kanonymity.core.*;
import com.kanonymity.generalization.*;
import com.kanonymity.suppression.SuppressionStrategy;
import com.kanonymity.suppression.ThresholdSuppression;
import java.util.*;
public class KAnonymityExample {
public static void main(String[] args) {
// Create sample dataset
List<DataRecord> dataset = createSampleDataset();
// Define quasi-identifiers and sensitive attributes
Set<String> quasiIdentifiers = Set.of("age", "zipcode", "gender");
Set<String> sensitiveAttributes = Set.of("disease");
// Create generalization hierarchies
Map<String, GeneralizationHierarchy> hierarchies = createHierarchies();
// Create suppression strategy
SuppressionStrategy suppression = new ThresholdSuppression(0.1);
// Create K-Anonymity engine
KAnonymityEngine engine = new KAnonymityEngine(
3, dataset, quasiIdentifiers, hierarchies, suppression);
// Perform anonymization
KAnonymityResult result = engine.anonymize();
// Print results
System.out.println("Original dataset size: " + dataset.size());
System.out.println("Anonymized dataset size: " + result.getAnonymizedData().size());
System.out.println("Metrics: " + result.getMetrics());
// Print equivalence classes
System.out.println("\nEquivalence Classes:");
for (EquivalenceClass eqClass : result.getEquivalenceClasses()) {
System.out.println(eqClass);
System.out.println("  Sensitive distribution: " + 
eqClass.getSensitiveAttributeDistributions());
}
}
private static List<DataRecord> createSampleDataset() {
List<DataRecord> dataset = new ArrayList<>();
// Sample medical data
String[] diseases = {"Flu", "Cold", "COVID", "Allergy", "Migraine"};
String[] genders = {"Male", "Female"};
Random random = new Random(42);
for (int i = 0; i < 100; i++) {
Map<String, Object> attributes = new HashMap<>();
attributes.put("id", "P" + i);
attributes.put("age", 20 + random.nextInt(50)); // 20-69
attributes.put("zipcode", 10000 + random.nextInt(89999)); // 10000-99999
attributes.put("gender", genders[random.nextInt(genders.length)]);
attributes.put("disease", diseases[random.nextInt(diseases.length)]);
attributes.put("treatment", "Treatment" + random.nextInt(5));
DataRecord record = new DataRecord(
"P" + i,
attributes,
Set.of("age", "zipcode", "gender"), // QIs
Set.of("disease") // Sensitive
);
dataset.add(record);
}
return dataset;
}
private static Map<String, GeneralizationHierarchy> createHierarchies() {
Map<String, GeneralizationHierarchy> hierarchies = new HashMap<>();
// Age hierarchy (5-year intervals)
List<NumericRangeHierarchy.Range> ageRanges = Arrays.asList(
new NumericRangeHierarchy.Range(20, 24, true, true),
new NumericRangeHierarchy.Range(25, 29, true, true),
new NumericRangeHierarchy.Range(30, 34, true, true),
new NumericRangeHierarchy.Range(35, 39, true, true),
new NumericRangeHierarchy.Range(40, 44, true, true),
new NumericRangeHierarchy.Range(45, 49, true, true),
new NumericRangeHierarchy.Range(50, 54, true, true),
new NumericRangeHierarchy.Range(55, 59, true, true),
new NumericRangeHierarchy.Range(60, 69, true, true)
);
hierarchies.put("age", new NumericRangeHierarchy("age", ageRanges, "%.0f"));
// Zipcode hierarchy (first 3 digits)
Map<Object, List<Object>> zipcodeHierarchy = new HashMap<>();
zipcodeHierarchy.put("100**", Arrays.asList("10000", "10001", "10002", "10003"));
zipcodeHierarchy.put("200**", Arrays.asList("20000", "20001", "20002", "20003"));
// Add more ranges as needed
hierarchies.put("zipcode", new CategoricalHierarchy("zipcode", zipcodeHierarchy));
// Gender hierarchy (already minimal)
Map<Object, List<Object>> genderHierarchy = new HashMap<>();
genderHierarchy.put("Person", Arrays.asList("Male", "Female"));
hierarchies.put("gender", new CategoricalHierarchy("gender", genderHierarchy));
return hierarchies;
}
}

This comprehensive K-Anonymity implementation provides:

  1. Core Data Models - Records, equivalence classes, and hierarchies
  2. Generalization - Numeric, categorical, and date generalization
  3. Suppression Strategies - Multiple suppression approaches
  4. Risk Analysis - Re-identification risk assessment
  5. Spring Boot Integration - REST API and configuration
  6. Metrics - Comprehensive anonymization metrics
  7. Flexible Configuration - Customizable hierarchies and strategies

The implementation follows privacy-preserving data publishing best practices and can be extended for specific use cases like healthcare, finance, or government data.

Leave a Reply

Your email address will not be published. Required fields are marked *


Macro Nepal Helper