Introduction
Canary testing is a deployment strategy where new software versions are released to a small subset of users before rolling out to the entire user base. This framework provides comprehensive canary testing capabilities for Java applications.
Dependencies and Setup
1. Maven Dependencies
<properties>
<spring-boot.version>3.2.0</spring-boot.version>
<micrometer.version>1.12.0</micrometer.version>
<resilience4j.version>2.1.0</resilience4j.version>
</properties>
<dependencies>
<!-- Spring Boot -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<version>${spring-boot.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
<version>${spring-boot.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-aop</artifactId>
<version>${spring-boot.version}</version>
</dependency>
<!-- Metrics and Monitoring -->
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-core</artifactId>
<version>${micrometer.version}</version>
</dependency>
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-registry-prometheus</artifactId>
<version>${micrometer.version}</version>
</dependency>
<!-- Resilience4j for Circuit Breaker -->
<dependency>
<groupId>io.github.resilience4j</groupId>
<artifactId>resilience4j-spring-boot3</artifactId>
<version>${resilience4j.version}</version>
</dependency>
<dependency>
<groupId>io.github.resilience4j</groupId>
<artifactId>resilience4j-micrometer</artifactId>
<version>${resilience4j.version}</version>
</dependency>
<!-- Testing -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<version>${spring-boot.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
Core Framework Models
2. Canary Configuration Models
package com.example.canary.model;
import com.fasterxml.jackson.annotation.JsonInclude;
import java.time.Duration;
import java.util.Map;
@JsonInclude(JsonInclude.Include.NON_NULL)
public class CanaryConfig {
private String id;
private String name;
private String description;
private TrafficConfig traffic;
private MetricsConfig metrics;
private RolloutConfig rollout;
private Map<String, Object> customProperties;
// Constructors
public CanaryConfig() {}
public CanaryConfig(String id, String name, TrafficConfig traffic,
MetricsConfig metrics, RolloutConfig rollout) {
this.id = id;
this.name = name;
this.traffic = traffic;
this.metrics = metrics;
this.rollout = rollout;
}
// Getters and Setters
public String getId() { return id; }
public void setId(String id) { this.id = id; }
public String getName() { return name; }
public void setName(String name) { this.name = name; }
public String getDescription() { return description; }
public void setDescription(String description) { this.description = description; }
public TrafficConfig getTraffic() { return traffic; }
public void setTraffic(TrafficConfig traffic) { this.traffic = traffic; }
public MetricsConfig getMetrics() { return metrics; }
public void setMetrics(MetricsConfig metrics) { this.metrics = metrics; }
public RolloutConfig getRollout() { return rollout; }
public void setRollout(RolloutConfig rollout) { this.rollout = rollout; }
public Map<String, Object> getCustomProperties() { return customProperties; }
public void setCustomProperties(Map<String, Object> customProperties) { this.customProperties = customProperties; }
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class TrafficConfig {
private int initialPercentage = 10;
private int maxPercentage = 50;
private Duration evaluationInterval = Duration.ofMinutes(5);
private String routingStrategy = "RANDOM"; // RANDOM, USER_ID, SESSION_ID, HEADER
// Getters and Setters
public int getInitialPercentage() { return initialPercentage; }
public void setInitialPercentage(int initialPercentage) { this.initialPercentage = initialPercentage; }
public int getMaxPercentage() { return maxPercentage; }
public void setMaxPercentage(int maxPercentage) { this.maxPercentage = maxPercentage; }
public Duration getEvaluationInterval() { return evaluationInterval; }
public void setEvaluationInterval(Duration evaluationInterval) { this.evaluationInterval = evaluationInterval; }
public String getRoutingStrategy() { return routingStrategy; }
public void setRoutingStrategy(String routingStrategy) { this.routingStrategy = routingStrategy; }
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class MetricsConfig {
private Duration collectionWindow = Duration.ofMinutes(5);
private double errorRateThreshold = 2.0; // percentage
private double p95LatencyThreshold = 500.0; // milliseconds
private double throughputThreshold = 100.0; // requests per second
private boolean enableCustomMetrics = false;
// Getters and Setters
public Duration getCollectionWindow() { return collectionWindow; }
public void setCollectionWindow(Duration collectionWindow) { this.collectionWindow = collectionWindow; }
public double getErrorRateThreshold() { return errorRateThreshold; }
public void setErrorRateThreshold(double errorRateThreshold) { this.errorRateThreshold = errorRateThreshold; }
public double getP95LatencyThreshold() { return p95LatencyThreshold; }
public void setP95LatencyThreshold(double p95LatencyThreshold) { this.p95LatencyThreshold = p95LatencyThreshold; }
public double getThroughputThreshold() { return throughputThreshold; }
public void setThroughputThreshold(double throughputThreshold) { this.throughputThreshold = throughputThreshold; }
public boolean isEnableCustomMetrics() { return enableCustomMetrics; }
public void setEnableCustomMetrics(boolean enableCustomMetrics) { this.enableCustomMetrics = enableCustomMetrics; }
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public static class RolloutConfig {
private Duration stepInterval = Duration.ofMinutes(10);
private int stepPercentage = 10;
private boolean autoPromote = true;
private boolean autoRollback = true;
private int consecutiveSuccessSteps = 3;
// Getters and Setters
public Duration getStepInterval() { return stepInterval; }
public void setStepInterval(Duration stepInterval) { this.stepInterval = stepInterval; }
public int getStepPercentage() { return stepPercentage; }
public void setStepPercentage(int stepPercentage) { this.stepPercentage = stepPercentage; }
public boolean isAutoPromote() { return autoPromote; }
public void setAutoPromote(boolean autoPromote) { this.autoPromote = autoPromote; }
public boolean isAutoRollback() { return autoRollback; }
public void setAutoRollback(boolean autoRollback) { this.autoRollback = autoRollback; }
public int getConsecutiveSuccessSteps() { return consecutiveSuccessSteps; }
public void setConsecutiveSuccessSteps(int consecutiveSuccessSteps) { this.consecutiveSuccessSteps = consecutiveSuccessSteps; }
}
}
3. Canary Deployment State
package com.example.canary.model;
import java.time.LocalDateTime;
import java.util.Map;
public class CanaryDeployment {
private String id;
private String canaryConfigId;
private String serviceName;
private String baselineVersion;
private String canaryVersion;
private DeploymentStatus status;
private int currentTrafficPercentage;
private LocalDateTime startTime;
private LocalDateTime lastEvaluationTime;
private Map<String, Object> metrics;
private Map<String, Object> metadata;
// Constructors
public CanaryDeployment() {}
public CanaryDeployment(String id, String canaryConfigId, String serviceName,
String baselineVersion, String canaryVersion) {
this.id = id;
this.canaryConfigId = canaryConfigId;
this.serviceName = serviceName;
this.baselineVersion = baselineVersion;
this.canaryVersion = canaryVersion;
this.status = DeploymentStatus.PENDING;
this.currentTrafficPercentage = 0;
this.startTime = LocalDateTime.now();
}
// Getters and Setters
public String getId() { return id; }
public void setId(String id) { this.id = id; }
public String getCanaryConfigId() { return canaryConfigId; }
public void setCanaryConfigId(String canaryConfigId) { this.canaryConfigId = canaryConfigId; }
public String getServiceName() { return serviceName; }
public void setServiceName(String serviceName) { this.serviceName = serviceName; }
public String getBaselineVersion() { return baselineVersion; }
public void setBaselineVersion(String baselineVersion) { this.baselineVersion = baselineVersion; }
public String getCanaryVersion() { return canaryVersion; }
public void setCanaryVersion(String canaryVersion) { this.canaryVersion = canaryVersion; }
public DeploymentStatus getStatus() { return status; }
public void setStatus(DeploymentStatus status) { this.status = status; }
public int getCurrentTrafficPercentage() { return currentTrafficPercentage; }
public void setCurrentTrafficPercentage(int currentTrafficPercentage) { this.currentTrafficPercentage = currentTrafficPercentage; }
public LocalDateTime getStartTime() { return startTime; }
public void setStartTime(LocalDateTime startTime) { this.startTime = startTime; }
public LocalDateTime getLastEvaluationTime() { return lastEvaluationTime; }
public void setLastEvaluationTime(LocalDateTime lastEvaluationTime) { this.lastEvaluationTime = lastEvaluationTime; }
public Map<String, Object> getMetrics() { return metrics; }
public void setMetrics(Map<String, Object> metrics) { this.metrics = metrics; }
public Map<String, Object> getMetadata() { return metadata; }
public void setMetadata(Map<String, Object> metadata) { this.metadata = metadata; }
public enum DeploymentStatus {
PENDING, RUNNING, PROMOTED, ROLLED_BACK, FAILED, PAUSED
}
}
4. Canary Metrics and Analysis
package com.example.canary.model;
import java.time.LocalDateTime;
import java.util.Map;
public class CanaryAnalysis {
private String deploymentId;
private LocalDateTime analysisTime;
private AnalysisResult result;
private Map<String, MetricAnalysis> metrics;
private String recommendation;
private double confidenceScore;
// Constructors
public CanaryAnalysis() {}
public CanaryAnalysis(String deploymentId, AnalysisResult result) {
this.deploymentId = deploymentId;
this.analysisTime = LocalDateTime.now();
this.result = result;
}
// Getters and Setters
public String getDeploymentId() { return deploymentId; }
public void setDeploymentId(String deploymentId) { this.deploymentId = deploymentId; }
public LocalDateTime getAnalysisTime() { return analysisTime; }
public void setAnalysisTime(LocalDateTime analysisTime) { this.analysisTime = analysisTime; }
public AnalysisResult getResult() { return result; }
public void setResult(AnalysisResult result) { this.result = result; }
public Map<String, MetricAnalysis> getMetrics() { return metrics; }
public void setMetrics(Map<String, MetricAnalysis> metrics) { this.metrics = metrics; }
public String getRecommendation() { return recommendation; }
public void setRecommendation(String recommendation) { this.recommendation = recommendation; }
public double getConfidenceScore() { return confidenceScore; }
public void setConfidenceScore(double confidenceScore) { this.confidenceScore = confidenceScore; }
public enum AnalysisResult {
PASS, FAIL, INCONCLUSIVE, NEEDS_MORE_DATA
}
public static class MetricAnalysis {
private String name;
private double baselineValue;
private double canaryValue;
private double threshold;
private boolean passed;
private double deviation;
private String unit;
// Constructors
public MetricAnalysis() {}
public MetricAnalysis(String name, double baselineValue, double canaryValue,
double threshold, String unit) {
this.name = name;
this.baselineValue = baselineValue;
this.canaryValue = canaryValue;
this.threshold = threshold;
this.unit = unit;
this.deviation = calculateDeviation();
this.passed = Math.abs(deviation) <= threshold;
}
private double calculateDeviation() {
if (baselineValue == 0) return 0;
return ((canaryValue - baselineValue) / baselineValue) * 100;
}
// Getters and Setters
public String getName() { return name; }
public void setName(String name) { this.name = name; }
public double getBaselineValue() { return baselineValue; }
public void setBaselineValue(double baselineValue) { this.baselineValue = baselineValue; }
public double getCanaryValue() { return canaryValue; }
public void setCanaryValue(double canaryValue) { this.canaryValue = canaryValue; }
public double getThreshold() { return threshold; }
public void setThreshold(double threshold) { this.threshold = threshold; }
public boolean isPassed() { return passed; }
public void setPassed(boolean passed) { this.passed = passed; }
public double getDeviation() { return deviation; }
public void setDeviation(double deviation) { this.deviation = deviation; }
public String getUnit() { return unit; }
public void setUnit(String unit) { this.unit = unit; }
}
}
Core Framework Components
5. Traffic Routing Service
package com.example.canary.service;
import com.example.canary.model.CanaryConfig;
import com.example.canary.model.CanaryDeployment;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import javax.servlet.http.HttpServletRequest;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ThreadLocalRandom;
@Service
public class TrafficRoutingService {
private static final Logger log = LoggerFactory.getLogger(TrafficRoutingService.class);
private final Map<String, CanaryDeployment> activeDeployments = new ConcurrentHashMap<>();
private final Map<String, CanaryConfig> canaryConfigs = new ConcurrentHashMap<>();
public boolean shouldRouteToCanary(String deploymentId, HttpServletRequest request) {
CanaryDeployment deployment = activeDeployments.get(deploymentId);
if (deployment == null || deployment.getCurrentTrafficPercentage() <= 0) {
return false;
}
CanaryConfig config = canaryConfigs.get(deployment.getCanaryConfigId());
if (config == null) {
log.warn("No configuration found for deployment: {}", deploymentId);
return false;
}
return evaluateRouting(deployment, config, request);
}
private boolean evaluateRouting(CanaryDeployment deployment, CanaryConfig config,
HttpServletRequest request) {
String routingStrategy = config.getTraffic().getRoutingStrategy();
int trafficPercentage = deployment.getCurrentTrafficPercentage();
switch (routingStrategy.toUpperCase()) {
case "RANDOM":
return randomRouting(trafficPercentage);
case "USER_ID":
return userIdRouting(request, trafficPercentage);
case "SESSION_ID":
return sessionIdRouting(request, trafficPercentage);
case "HEADER":
return headerBasedRouting(request, trafficPercentage,
config.getTraffic().getCustomProperties());
default:
log.warn("Unknown routing strategy: {}, falling back to random", routingStrategy);
return randomRouting(trafficPercentage);
}
}
private boolean randomRouting(int trafficPercentage) {
return ThreadLocalRandom.current().nextInt(100) < trafficPercentage;
}
private boolean userIdRouting(HttpServletRequest request, int trafficPercentage) {
String userId = extractUserId(request);
if (userId == null) {
return randomRouting(trafficPercentage);
}
// Use consistent hashing for user-based routing
int hash = Math.abs(userId.hashCode()) % 100;
return hash < trafficPercentage;
}
private boolean sessionIdRouting(HttpServletRequest request, int trafficPercentage) {
String sessionId = request.getSession(false) != null ?
request.getSession().getId() : null;
if (sessionId == null) {
return randomRouting(trafficPercentage);
}
int hash = Math.abs(sessionId.hashCode()) % 100;
return hash < trafficPercentage;
}
private boolean headerBasedRouting(HttpServletRequest request, int trafficPercentage,
Map<String, Object> customProperties) {
String headerName = (String) customProperties.get("routingHeader");
if (headerName == null) {
return randomRouting(trafficPercentage);
}
String headerValue = request.getHeader(headerName);
if (headerValue == null) {
return randomRouting(trafficPercentage);
}
int hash = Math.abs(headerValue.hashCode()) % 100;
return hash < trafficPercentage;
}
private String extractUserId(HttpServletRequest request) {
// Extract user ID from authentication context, header, or parameter
// This is application-specific
return request.getHeader("X-User-ID");
}
public void registerDeployment(CanaryDeployment deployment, CanaryConfig config) {
activeDeployments.put(deployment.getId(), deployment);
canaryConfigs.put(deployment.getCanaryConfigId(), config);
log.info("Registered canary deployment: {} with {}% traffic",
deployment.getId(), deployment.getCurrentTrafficPercentage());
}
public void updateTrafficPercentage(String deploymentId, int newPercentage) {
CanaryDeployment deployment = activeDeployments.get(deploymentId);
if (deployment != null) {
deployment.setCurrentTrafficPercentage(newPercentage);
log.info("Updated traffic percentage for deployment {} to {}%",
deploymentId, newPercentage);
}
}
public void removeDeployment(String deploymentId) {
CanaryDeployment deployment = activeDeployments.remove(deploymentId);
if (deployment != null) {
canaryConfigs.remove(deployment.getCanaryConfigId());
log.info("Removed canary deployment: {}", deploymentId);
}
}
}
6. Metrics Collection Service
package com.example.canary.service;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Timer;
import org.springframework.stereotype.Service;
import java.time.Duration;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
@Service
public class MetricsCollectionService {
private final MeterRegistry meterRegistry;
private final Map<String, DeploymentMetrics> deploymentMetrics = new ConcurrentHashMap<>();
public MetricsCollectionService(MeterRegistry meterRegistry) {
this.meterRegistry = meterRegistry;
}
public void recordRequest(String deploymentId, String version, boolean isSuccess,
long durationMs, String endpoint) {
DeploymentMetrics metrics = deploymentMetrics.computeIfAbsent(
deploymentId, id -> new DeploymentMetrics(meterRegistry, id));
metrics.recordRequest(version, isSuccess, durationMs, endpoint);
}
public void recordCustomMetric(String deploymentId, String metricName,
double value, String version) {
DeploymentMetrics metrics = deploymentMetrics.computeIfAbsent(
deploymentId, id -> new DeploymentMetrics(meterRegistry, id));
metrics.recordCustomMetric(metricName, value, version);
}
public Map<String, Object> getMetricsSnapshot(String deploymentId) {
DeploymentMetrics metrics = deploymentMetrics.get(deploymentId);
if (metrics != null) {
return metrics.getSnapshot();
}
return Map.of();
}
public void cleanupMetrics(String deploymentId) {
deploymentMetrics.remove(deploymentId);
}
private static class DeploymentMetrics {
private final String deploymentId;
private final MeterRegistry meterRegistry;
private final Map<String, Counter> requestCounters = new ConcurrentHashMap<>();
private final Map<String, Counter> errorCounters = new ConcurrentHashMap<>();
private final Map<String, Timer> latencyTimers = new ConcurrentHashMap<>();
public DeploymentMetrics(MeterRegistry meterRegistry, String deploymentId) {
this.meterRegistry = meterRegistry;
this.deploymentId = deploymentId;
}
public void recordRequest(String version, boolean isSuccess,
long durationMs, String endpoint) {
String counterKey = version + ":" + endpoint;
// Record request count
Counter requestCounter = requestCounters.computeIfAbsent(counterKey,
key -> Counter.builder("canary.requests")
.tag("deployment", deploymentId)
.tag("version", version)
.tag("endpoint", endpoint)
.register(meterRegistry));
requestCounter.increment();
// Record error if applicable
if (!isSuccess) {
Counter errorCounter = errorCounters.computeIfAbsent(counterKey,
key -> Counter.builder("canary.errors")
.tag("deployment", deploymentId)
.tag("version", version)
.tag("endpoint", endpoint)
.register(meterRegistry));
errorCounter.increment();
}
// Record latency
Timer latencyTimer = latencyTimers.computeIfAbsent(counterKey,
key -> Timer.builder("canary.latency")
.tag("deployment", deploymentId)
.tag("version", version)
.tag("endpoint", endpoint)
.register(meterRegistry));
latencyTimer.record(durationMs, TimeUnit.MILLISECONDS);
}
public void recordCustomMetric(String metricName, double value, String version) {
meterRegistry.gauge("canary.custom." + metricName,
Map.of("deployment", deploymentId, "version", version),
value);
}
public Map<String, Object> getSnapshot() {
// This would collect and aggregate metrics from Micrometer
// In a real implementation, you'd query Micrometer for specific metrics
return Map.of(
"deploymentId", deploymentId,
"timestamp", System.currentTimeMillis()
);
}
}
}
7. Canary Analysis Engine
package com.example.canary.service;
import com.example.canary.model.CanaryAnalysis;
import com.example.canary.model.CanaryConfig;
import com.example.canary.model.CanaryDeployment;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.Map;
@Service
public class CanaryAnalysisEngine {
private static final Logger log = LoggerFactory.getLogger(CanaryAnalysisEngine.class);
private final MetricsCollectionService metricsService;
public CanaryAnalysisEngine(MetricsCollectionService metricsService) {
this.metricsService = metricsService;
}
public CanaryAnalysis analyzeDeployment(CanaryDeployment deployment, CanaryConfig config) {
log.info("Analyzing canary deployment: {}", deployment.getId());
Map<String, Object> baselineMetrics = getMetricsForVersion(deployment, deployment.getBaselineVersion());
Map<String, Object> canaryMetrics = getMetricsForVersion(deployment, deployment.getCanaryVersion());
Map<String, CanaryAnalysis.MetricAnalysis> metricAnalysis = new HashMap<>();
CanaryAnalysis.AnalysisResult overallResult = CanaryAnalysis.AnalysisResult.PASS;
// Analyze error rate
CanaryAnalysis.MetricAnalysis errorRateAnalysis = analyzeErrorRate(
baselineMetrics, canaryMetrics, config.getMetrics().getErrorRateThreshold());
metricAnalysis.put("error_rate", errorRateAnalysis);
if (!errorRateAnalysis.isPassed()) {
overallResult = CanaryAnalysis.AnalysisResult.FAIL;
}
// Analyze latency
CanaryAnalysis.MetricAnalysis latencyAnalysis = analyzeLatency(
baselineMetrics, canaryMetrics, config.getMetrics().getP95LatencyThreshold());
metricAnalysis.put("p95_latency", latencyAnalysis);
if (!latencyAnalysis.isPassed() && overallResult != CanaryAnalysis.AnalysisResult.FAIL) {
overallResult = CanaryAnalysis.AnalysisResult.FAIL;
}
// Analyze throughput
CanaryAnalysis.MetricAnalysis throughputAnalysis = analyzeThroughput(
baselineMetrics, canaryMetrics, config.getMetrics().getThroughputThreshold());
metricAnalysis.put("throughput", throughputAnalysis);
CanaryAnalysis analysis = new CanaryAnalysis(deployment.getId(), overallResult);
analysis.setMetrics(metricAnalysis);
analysis.setRecommendation(generateRecommendation(analysis, deployment));
analysis.setConfidenceScore(calculateConfidenceScore(metricAnalysis));
log.info("Analysis completed for deployment {}: {}", deployment.getId(), overallResult);
return analysis;
}
private CanaryAnalysis.MetricAnalysis analyzeErrorRate(Map<String, Object> baselineMetrics,
Map<String, Object> canaryMetrics,
double threshold) {
double baselineErrorRate = getMetricValue(baselineMetrics, "error_rate", 0.0);
double canaryErrorRate = getMetricValue(canaryMetrics, "error_rate", 0.0);
return new CanaryAnalysis.MetricAnalysis(
"error_rate", baselineErrorRate, canaryErrorRate, threshold, "%");
}
private CanaryAnalysis.MetricAnalysis analyzeLatency(Map<String, Object> baselineMetrics,
Map<String, Object> canaryMetrics,
double threshold) {
double baselineLatency = getMetricValue(baselineMetrics, "p95_latency", 0.0);
double canaryLatency = getMetricValue(canaryMetrics, "p95_latency", 0.0);
return new CanaryAnalysis.MetricAnalysis(
"p95_latency", baselineLatency, canaryLatency, threshold, "ms");
}
private CanaryAnalysis.MetricAnalysis analyzeThroughput(Map<String, Object> baselineMetrics,
Map<String, Object> canaryMetrics,
double threshold) {
double baselineThroughput = getMetricValue(baselineMetrics, "throughput", 0.0);
double canaryThroughput = getMetricValue(canaryMetrics, "throughput", 0.0);
return new CanaryAnalysis.MetricAnalysis(
"throughput", baselineThroughput, canaryThroughput, threshold, "rps");
}
private double getMetricValue(Map<String, Object> metrics, String metricName, double defaultValue) {
Object value = metrics.get(metricName);
if (value instanceof Number) {
return ((Number) value).doubleValue();
}
return defaultValue;
}
private Map<String, Object> getMetricsForVersion(CanaryDeployment deployment, String version) {
// In a real implementation, this would query your metrics backend
// For now, return mock data
Map<String, Object> metrics = metricsService.getMetricsSnapshot(deployment.getId());
// Add version-specific metrics (this would come from your metrics system)
metrics.put("error_rate", Math.random() * 5); // 0-5% error rate
metrics.put("p95_latency", 100 + Math.random() * 400); // 100-500ms latency
metrics.put("throughput", 50 + Math.random() * 150); // 50-200 requests per second
return metrics;
}
private String generateRecommendation(CanaryAnalysis analysis, CanaryDeployment deployment) {
switch (analysis.getResult()) {
case PASS:
return "Canary is performing well. Consider increasing traffic or promoting.";
case FAIL:
return "Canary is underperforming. Consider rolling back or reducing traffic.";
case INCONCLUSIVE:
return "Results are inconclusive. Continue monitoring with current traffic.";
case NEEDS_MORE_DATA:
return "Insufficient data for analysis. Continue data collection.";
default:
return "Unknown analysis result.";
}
}
private double calculateConfidenceScore(Map<String, CanaryAnalysis.MetricAnalysis> metrics) {
if (metrics.isEmpty()) return 0.0;
double totalWeight = 0.0;
double weightedScore = 0.0;
for (CanaryAnalysis.MetricAnalysis metric : metrics.values()) {
double weight = getMetricWeight(metric.getName());
double score = metric.isPassed() ? 1.0 : 0.0;
weightedScore += weight * score;
totalWeight += weight;
}
return totalWeight > 0 ? (weightedScore / totalWeight) * 100 : 0.0;
}
private double getMetricWeight(String metricName) {
switch (metricName) {
case "error_rate": return 0.5; // Highest weight for errors
case "p95_latency": return 0.3; // Medium weight for latency
case "throughput": return 0.2; // Lower weight for throughput
default: return 0.1;
}
}
}
8. Canary Deployment Manager
package com.example.canary.service;
import com.example.canary.model.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import java.time.LocalDateTime;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
@Service
public class CanaryDeploymentManager {
private static final Logger log = LoggerFactory.getLogger(CanaryDeploymentManager.class);
private final TrafficRoutingService routingService;
private final CanaryAnalysisEngine analysisEngine;
private final MetricsCollectionService metricsService;
private final Map<String, CanaryDeployment> activeDeployments = new ConcurrentHashMap<>();
private final Map<String, CanaryConfig> canaryConfigs = new ConcurrentHashMap<>();
private final AtomicInteger successStepCount = new AtomicInteger(0);
public CanaryDeploymentManager(TrafficRoutingService routingService,
CanaryAnalysisEngine analysisEngine,
MetricsCollectionService metricsService) {
this.routingService = routingService;
this.analysisEngine = analysisEngine;
this.metricsService = metricsService;
}
public CanaryDeployment startCanaryDeployment(String configId, String serviceName,
String baselineVersion, String canaryVersion) {
CanaryConfig config = canaryConfigs.get(configId);
if (config == null) {
throw new IllegalArgumentException("Canary configuration not found: " + configId);
}
String deploymentId = generateDeploymentId(serviceName, canaryVersion);
CanaryDeployment deployment = new CanaryDeployment(
deploymentId, configId, serviceName, baselineVersion, canaryVersion);
deployment.setCurrentTrafficPercentage(config.getTraffic().getInitialPercentage());
deployment.setStatus(CanaryDeployment.DeploymentStatus.RUNNING);
activeDeployments.put(deploymentId, deployment);
routingService.registerDeployment(deployment, config);
log.info("Started canary deployment: {} with {}% traffic",
deploymentId, deployment.getCurrentTrafficPercentage());
return deployment;
}
public void updateTrafficPercentage(String deploymentId, int newPercentage) {
CanaryDeployment deployment = activeDeployments.get(deploymentId);
if (deployment != null) {
CanaryConfig config = canaryConfigs.get(deployment.getCanaryConfigId());
int maxPercentage = config.getTraffic().getMaxPercentage();
if (newPercentage > maxPercentage) {
log.warn("Requested traffic percentage {} exceeds maximum {}. Using maximum.",
newPercentage, maxPercentage);
newPercentage = maxPercentage;
}
deployment.setCurrentTrafficPercentage(newPercentage);
routingService.updateTrafficPercentage(deploymentId, newPercentage);
log.info("Updated traffic percentage for deployment {} to {}%",
deploymentId, newPercentage);
}
}
public void promoteDeployment(String deploymentId) {
CanaryDeployment deployment = activeDeployments.get(deploymentId);
if (deployment != null) {
deployment.setStatus(CanaryDeployment.DeploymentStatus.PROMOTED);
routingService.removeDeployment(deploymentId);
metricsService.cleanupMetrics(deploymentId);
log.info("Promoted canary deployment: {}", deploymentId);
}
}
public void rollbackDeployment(String deploymentId) {
CanaryDeployment deployment = activeDeployments.get(deploymentId);
if (deployment != null) {
deployment.setStatus(CanaryDeployment.DeploymentStatus.ROLLED_BACK);
routingService.removeDeployment(deploymentId);
metricsService.cleanupMetrics(deploymentId);
log.info("Rolled back canary deployment: {}", deploymentId);
}
}
@Scheduled(fixedRate = 300000) // Run every 5 minutes
public void evaluateActiveDeployments() {
for (CanaryDeployment deployment : activeDeployments.values()) {
if (deployment.getStatus() != CanaryDeployment.DeploymentStatus.RUNNING) {
continue;
}
try {
evaluateDeployment(deployment);
} catch (Exception e) {
log.error("Error evaluating deployment: {}", deployment.getId(), e);
}
}
}
private void evaluateDeployment(CanaryDeployment deployment) {
CanaryConfig config = canaryConfigs.get(deployment.getCanaryConfigId());
if (config == null) {
log.warn("No configuration found for deployment: {}", deployment.getId());
return;
}
CanaryAnalysis analysis = analysisEngine.analyzeDeployment(deployment, config);
deployment.setLastEvaluationTime(LocalDateTime.now());
deployment.setMetrics(Map.of("last_analysis", analysis));
log.info("Deployment {} analysis: {}", deployment.getId(), analysis.getResult());
if (config.getRollout().isAutoPromote() || config.getRollout().isAutoRollback()) {
handleAutomaticActions(deployment, analysis, config);
}
}
private void handleAutomaticActions(CanaryDeployment deployment, CanaryAnalysis analysis,
CanaryConfig config) {
switch (analysis.getResult()) {
case PASS:
handleSuccessfulAnalysis(deployment, config);
break;
case FAIL:
handleFailedAnalysis(deployment, config);
break;
case INCONCLUSIVE:
case NEEDS_MORE_DATA:
// No automatic action, continue monitoring
break;
}
}
private void handleSuccessfulAnalysis(CanaryDeployment deployment, CanaryConfig config) {
successStepCount.incrementAndGet();
if (successStepCount.get() >= config.getRollout().getConsecutiveSuccessSteps()) {
int currentTraffic = deployment.getCurrentTrafficPercentage();
int stepPercentage = config.getRollout().getStepPercentage();
int maxPercentage = config.getTraffic().getMaxPercentage();
if (currentTraffic >= maxPercentage) {
if (config.getRollout().isAutoPromote()) {
promoteDeployment(deployment.getId());
}
} else {
int newTraffic = Math.min(currentTraffic + stepPercentage, maxPercentage);
updateTrafficPercentage(deployment.getId(), newTraffic);
successStepCount.set(0); // Reset for next step
}
}
}
private void handleFailedAnalysis(CanaryDeployment deployment, CanaryConfig config) {
successStepCount.set(0); // Reset success counter
if (config.getRollout().isAutoRollback()) {
rollbackDeployment(deployment.getId());
log.warn("Automatically rolled back deployment due to failed analysis: {}",
deployment.getId());
} else {
// Reduce traffic or take other mitigation actions
int currentTraffic = deployment.getCurrentTrafficPercentage();
int newTraffic = Math.max(0, currentTraffic - config.getRollout().getStepPercentage());
updateTrafficPercentage(deployment.getId(), newTraffic);
log.warn("Reduced traffic for deployment {} to {}% due to failed analysis",
deployment.getId(), newTraffic);
}
}
private String generateDeploymentId(String serviceName, String version) {
return serviceName + "-" + version + "-" + System.currentTimeMillis();
}
public void registerConfig(CanaryConfig config) {
canaryConfigs.put(config.getId(), config);
log.info("Registered canary configuration: {}", config.getId());
}
public CanaryDeployment getDeployment(String deploymentId) {
return activeDeployments.get(deploymentId);
}
public Map<String, CanaryDeployment> getActiveDeployments() {
return Map.copyOf(activeDeployments);
}
}
Spring Boot Integration
9. Web Interceptor for Traffic Routing
package com.example.canary.interceptor;
import com.example.canary.service.TrafficRoutingService;
import org.springframework.stereotype.Component;
import org.springframework.web.servlet.HandlerInterceptor;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
@Component
public class CanaryRoutingInterceptor implements HandlerInterceptor {
private final TrafficRoutingService routingService;
private final String CANARY_DEPLOYMENT_HEADER = "X-Canary-Deployment";
private final String CANARY_VERSION_HEADER = "X-Canary-Version";
public CanaryRoutingInterceptor(TrafficRoutingService routingService) {
this.routingService = routingService;
}
@Override
public boolean preHandle(HttpServletRequest request, HttpServletResponse response,
Object handler) throws Exception {
// Check if request should be routed to canary
// This is a simplified example - in reality, you'd have multiple deployments
String deploymentId = getActiveDeploymentId(request);
if (deploymentId != null && routingService.shouldRouteToCanary(deploymentId, request)) {
response.setHeader(CANARY_DEPLOYMENT_HEADER, deploymentId);
// In a real scenario, you might route to a different service instance
}
return true;
}
private String getActiveDeploymentId(HttpServletRequest request) {
// In a real implementation, you'd determine which deployment is active
// based on the request path, service, or other criteria
return "example-deployment-1";
}
}
10. Configuration
package com.example.canary.config;
import com.example.canary.interceptor.CanaryRoutingInterceptor;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.servlet.config.annotation.InterceptorRegistry;
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
@Configuration
public class WebConfig implements WebMvcConfigurer {
private final CanaryRoutingInterceptor canaryRoutingInterceptor;
public WebConfig(CanaryRoutingInterceptor canaryRoutingInterceptor) {
this.canaryRoutingInterceptor = canaryRoutingInterceptor;
}
@Override
public void addInterceptors(InterceptorRegistry registry) {
registry.addInterceptor(canaryRoutingInterceptor)
.addPathPatterns("/api/**");
}
}
REST API Controllers
11. Canary Management API
package com.example.canary.controller;
import com.example.canary.model.CanaryConfig;
import com.example.canary.model.CanaryDeployment;
import com.example.canary.service.CanaryDeploymentManager;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.Map;
@RestController
@RequestMapping("/api/canary")
public class CanaryController {
private final CanaryDeploymentManager deploymentManager;
public CanaryController(CanaryDeploymentManager deploymentManager) {
this.deploymentManager = deploymentManager;
}
@PostMapping("/config")
public ResponseEntity<CanaryConfig> createConfig(@RequestBody CanaryConfig config) {
deploymentManager.registerConfig(config);
return ResponseEntity.ok(config);
}
@PostMapping("/deployments")
public ResponseEntity<CanaryDeployment> startDeployment(
@RequestBody StartDeploymentRequest request) {
CanaryDeployment deployment = deploymentManager.startCanaryDeployment(
request.getConfigId(),
request.getServiceName(),
request.getBaselineVersion(),
request.getCanaryVersion()
);
return ResponseEntity.ok(deployment);
}
@PutMapping("/deployments/{deploymentId}/traffic")
public ResponseEntity<Map<String, Object>> updateTraffic(
@PathVariable String deploymentId,
@RequestBody UpdateTrafficRequest request) {
deploymentManager.updateTrafficPercentage(deploymentId, request.getPercentage());
return ResponseEntity.ok(Map.of(
"deploymentId", deploymentId,
"trafficPercentage", request.getPercentage(),
"status", "updated"
));
}
@PostMapping("/deployments/{deploymentId}/promote")
public ResponseEntity<Map<String, String>> promoteDeployment(
@PathVariable String deploymentId) {
deploymentManager.promoteDeployment(deploymentId);
return ResponseEntity.ok(Map.of(
"deploymentId", deploymentId,
"status", "promoted"
));
}
@PostMapping("/deployments/{deploymentId}/rollback")
public ResponseEntity<Map<String, String>> rollbackDeployment(
@PathVariable String deploymentId) {
deploymentManager.rollbackDeployment(deploymentId);
return ResponseEntity.ok(Map.of(
"deploymentId", deploymentId,
"status", "rolled_back"
));
}
@GetMapping("/deployments/{deploymentId}")
public ResponseEntity<CanaryDeployment> getDeployment(
@PathVariable String deploymentId) {
CanaryDeployment deployment = deploymentManager.getDeployment(deploymentId);
if (deployment != null) {
return ResponseEntity.ok(deployment);
} else {
return ResponseEntity.notFound().build();
}
}
@GetMapping("/deployments")
public ResponseEntity<Map<String, CanaryDeployment>> getActiveDeployments() {
return ResponseEntity.ok(deploymentManager.getActiveDeployments());
}
// Request DTOs
public static class StartDeploymentRequest {
private String configId;
private String serviceName;
private String baselineVersion;
private String canaryVersion;
// Getters and Setters
public String getConfigId() { return configId; }
public void setConfigId(String configId) { this.configId = configId; }
public String getServiceName() { return serviceName; }
public void setServiceName(String serviceName) { this.serviceName = serviceName; }
public String getBaselineVersion() { return baselineVersion; }
public void setBaselineVersion(String baselineVersion) { this.baselineVersion = baselineVersion; }
public String getCanaryVersion() { return canaryVersion; }
public void setCanaryVersion(String canaryVersion) { this.canaryVersion = canaryVersion; }
}
public static class UpdateTrafficRequest {
private int percentage;
// Getters and Setters
public int getPercentage() { return percentage; }
public void setPercentage(int percentage) { this.percentage = percentage; }
}
}
Testing
12. Unit Tests
package com.example.canary.test;
import com.example.canary.model.CanaryConfig;
import com.example.canary.model.CanaryDeployment;
import com.example.canary.service.CanaryAnalysisEngine;
import com.example.canary.service.CanaryDeploymentManager;
import com.example.canary.service.MetricsCollectionService;
import com.example.canary.service.TrafficRoutingService;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.*;
@ExtendWith(MockitoExtension.class)
class CanaryDeploymentManagerTest {
@Mock
private TrafficRoutingService routingService;
@Mock
private CanaryAnalysisEngine analysisEngine;
@Mock
private MetricsCollectionService metricsService;
private CanaryDeploymentManager deploymentManager;
private CanaryConfig testConfig;
@BeforeEach
void setUp() {
deploymentManager = new CanaryDeploymentManager(
routingService, analysisEngine, metricsService);
testConfig = new CanaryConfig();
testConfig.setId("test-config");
testConfig.setName("Test Configuration");
CanaryConfig.TrafficConfig trafficConfig = new CanaryConfig.TrafficConfig();
trafficConfig.setInitialPercentage(10);
trafficConfig.setMaxPercentage(50);
testConfig.setTraffic(trafficConfig);
deploymentManager.registerConfig(testConfig);
}
@Test
void testStartCanaryDeployment() {
// When
CanaryDeployment deployment = deploymentManager.startCanaryDeployment(
"test-config", "test-service", "v1.0.0", "v1.1.0");
// Then
assertThat(deployment).isNotNull();
assertThat(deployment.getServiceName()).isEqualTo("test-service");
assertThat(deployment.getCurrentTrafficPercentage()).isEqualTo(10);
assertThat(deployment.getStatus()).isEqualTo(CanaryDeployment.DeploymentStatus.RUNNING);
verify(routingService).registerDeployment(any(CanaryDeployment.class), any(CanaryConfig.class));
}
@Test
void testUpdateTrafficPercentage() {
// Given
CanaryDeployment deployment = deploymentManager.startCanaryDeployment(
"test-config", "test-service", "v1.0.0", "v1.1.0");
// When
deploymentManager.updateTrafficPercentage(deployment.getId(), 25);
// Then
CanaryDeployment updated = deploymentManager.getDeployment(deployment.getId());
assertThat(updated.getCurrentTrafficPercentage()).isEqualTo(25);
verify(routingService).updateTrafficPercentage(deployment.getId(), 25);
}
}
Configuration Files
13. Application Configuration
# application.yml spring: application: name: canary-framework server: port: 8080 management: endpoints: web: exposure: include: health,metrics,prometheus endpoint: health: show-details: always metrics: enabled: true prometheus: enabled: true canary: default: initial-traffic-percentage: 10 max-traffic-percentage: 50 evaluation-interval: PT5M auto-rollback: true auto-promote: true logging: level: com.example.canary: DEBUG
Usage Example
14. Example Canary Deployment
package com.example.canary.example;
import com.example.canary.controller.CanaryController;
import com.example.canary.model.CanaryConfig;
import org.springframework.boot.CommandLineRunner;
import org.springframework.stereotype.Component;
@Component
public class CanaryExample implements CommandLineRunner {
private final CanaryController canaryController;
public CanaryExample(CanaryController canaryController) {
this.canaryController = canaryController;
}
@Override
public void run(String... args) throws Exception {
// Create canary configuration
CanaryConfig config = new CanaryConfig();
config.setId("user-service-config");
config.setName("User Service Canary Configuration");
config.setDescription("Canary deployment configuration for user service");
CanaryConfig.TrafficConfig trafficConfig = new CanaryConfig.TrafficConfig();
trafficConfig.setInitialPercentage(5);
trafficConfig.setMaxPercentage(100);
trafficConfig.setRoutingStrategy("USER_ID");
config.setTraffic(trafficConfig);
CanaryConfig.MetricsConfig metricsConfig = new CanaryConfig.MetricsConfig();
metricsConfig.setErrorRateThreshold(1.0);
metricsConfig.setP95LatencyThreshold(300.0);
metricsConfig.setThroughputThreshold(80.0);
config.setMetrics(metricsConfig);
CanaryConfig.RolloutConfig rolloutConfig = new CanaryConfig.RolloutConfig();
rolloutConfig.setStepInterval(java.time.Duration.ofMinutes(10));
rolloutConfig.setStepPercentage(10);
rolloutConfig.setAutoPromote(true);
rolloutConfig.setAutoRollback(true);
rolloutConfig.setConsecutiveSuccessSteps(2);
config.setRollout(rolloutConfig);
// Register configuration
canaryController.createConfig(config);
System.out.println("Canary configuration created: " + config.getId());
}
}
Best Practices
- Start Small: Begin with low traffic percentages (1-5%)
- Monitor Closely: Watch key metrics during canary deployment
- Set Clear Thresholds: Define clear pass/fail criteria for metrics
- Automate Rollbacks: Implement automatic rollback for critical failures
- Use Multiple Metrics: Combine error rates, latency, and business metrics
- Test in Staging: Always test canary process in staging first
- Document Procedures: Document rollback and escalation procedures
- Involve Stakeholders: Keep relevant teams informed during canary deployments
Conclusion
This comprehensive canary testing framework provides:
- Traffic Routing: Multiple strategies for routing traffic to canary instances
- Metrics Collection: Comprehensive monitoring of key performance indicators
- Automated Analysis: Real-time analysis of canary performance vs baseline
- Progressive Rollout: Automated traffic increase based on success criteria
- Safety Mechanisms: Automatic rollback for failing deployments
- REST API: Full management API for canary deployments
- Spring Integration: Seamless integration with Spring Boot applications
The framework enables safe, controlled deployment of new software versions with minimal risk to production systems.