Error Budgets and SLOs in Java Applications

Comprehensive Implementation Guide

1. Core SLO Configuration Classes

// SLOConfig.java
package com.example.slo.config;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
import java.time.Duration;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@Component
@ConfigurationProperties(prefix = "slo")
public class SLOConfig {
private Map<String, ServiceLevelObjective> objectives = new ConcurrentHashMap<>();
private AlertingConfig alerting = new AlertingConfig();
private ExportConfig export = new ExportConfig();
public ServiceLevelObjective getObjective(String serviceName) {
return objectives.get(serviceName);
}
public void addObjective(String serviceName, ServiceLevelObjective objective) {
objectives.put(serviceName, objective);
}
// Getters and setters
public Map<String, ServiceLevelObjective> getObjectives() { return objectives; }
public void setObjectives(Map<String, ServiceLevelObjective> objectives) { this.objectives = objectives; }
public AlertingConfig getAlerting() { return alerting; }
public void setAlerting(AlertingConfig alerting) { this.alerting = alerting; }
public ExportConfig getExport() { return export; }
public void setExport(ExportConfig export) { this.export = export; }
public static class ServiceLevelObjective {
private String name;
private String description;
private double target; // e.g., 0.999 for 99.9%
private Duration measurementWindow = Duration.ofDays(30);
private Duration rollingWindow = Duration.ofHours(1);
private Map<String, String> labels = new ConcurrentHashMap<>();
private BurnRateConfig burnRate = new BurnRateConfig();
public double getErrorBudget() {
return 1.0 - target;
}
// Getters and setters
public String getName() { return name; }
public void setName(String name) { this.name = name; }
public String getDescription() { return description; }
public void setDescription(String description) { this.description = description; }
public double getTarget() { return target; }
public void setTarget(double target) { this.target = target; }
public Duration getMeasurementWindow() { return measurementWindow; }
public void setMeasurementWindow(Duration measurementWindow) { this.measurementWindow = measurementWindow; }
public Duration getRollingWindow() { return rollingWindow; }
public void setRollingWindow(Duration rollingWindow) { this.rollingWindow = rollingWindow; }
public Map<String, String> getLabels() { return labels; }
public void setLabels(Map<String, String> labels) { this.labels = labels; }
public BurnRateConfig getBurnRate() { return burnRate; }
public void setBurnRate(BurnRateConfig burnRate) { this.burnRate = burnRate; }
}
public static class BurnRateConfig {
private double warningThreshold = 0.5; // 50% budget consumed
private double criticalThreshold = 0.8; // 80% budget consumed
private double emergencyThreshold = 0.95; // 95% budget consumed
private Duration shortWindow = Duration.ofHours(1);
private Duration longWindow = Duration.ofHours(6);
// Getters and setters
public double getWarningThreshold() { return warningThreshold; }
public void setWarningThreshold(double warningThreshold) { this.warningThreshold = warningThreshold; }
public double getCriticalThreshold() { return criticalThreshold; }
public void setCriticalThreshold(double criticalThreshold) { this.criticalThreshold = criticalThreshold; }
public double getEmergencyThreshold() { return emergencyThreshold; }
public void setEmergencyThreshold(double emergencyThreshold) { this.emergencyThreshold = emergencyThreshold; }
public Duration getShortWindow() { return shortWindow; }
public void setShortWindow(Duration shortWindow) { this.shortWindow = shortWindow; }
public Duration getLongWindow() { return longWindow; }
public void setLongWindow(Duration longWindow) { this.longWindow = longWindow; }
}
public static class AlertingConfig {
private boolean enabled = true;
private String webhookUrl;
private Duration evaluationInterval = Duration.ofMinutes(1);
private Map<String, String> alertLabels = new ConcurrentHashMap<>();
// Getters and setters
public boolean isEnabled() { return enabled; }
public void setEnabled(boolean enabled) { this.enabled = enabled; }
public String getWebhookUrl() { return webhookUrl; }
public void setWebhookUrl(String webhookUrl) { this.webhookUrl = webhookUrl; }
public Duration getEvaluationInterval() { return evaluationInterval; }
public void setEvaluationInterval(Duration evaluationInterval) { this.evaluationInterval = evaluationInterval; }
public Map<String, String> getAlertLabels() { return alertLabels; }
public void setAlertLabels(Map<String, String> alertLabels) { this.alertLabels = alertLabels; }
}
public static class ExportConfig {
private boolean prometheusEnabled = true;
private String prometheusJobName = "slo-monitor";
private boolean metricsEnabled = true;
private Duration exportInterval = Duration.ofMinutes(1);
// Getters and setters
public boolean isPrometheusEnabled() { return prometheusEnabled; }
public void setPrometheusEnabled(boolean prometheusEnabled) { this.prometheusEnabled = prometheusEnabled; }
public String getPrometheusJobName() { return prometheusJobName; }
public void setPrometheusJobName(String prometheusJobName) { this.prometheusJobName = prometheusJobName; }
public boolean isMetricsEnabled() { return metricsEnabled; }
public void setMetricsEnabled(boolean metricsEnabled) { this.metricsEnabled = metricsEnabled; }
public Duration getExportInterval() { return exportInterval; }
public void setExportInterval(Duration exportInterval) { this.exportInterval = exportInterval; }
}
}

2. SLO Tracker and Metrics Collector

// SLOTracker.java
package com.example.slo.core;
import com.example.slo.config.SLOConfig;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Timer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.time.Instant;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
@Component
public class SLOTracker {
private static final Logger logger = LoggerFactory.getLogger(SLOTracker.class);
private final SLOConfig sloConfig;
private final MeterRegistry meterRegistry;
private final Map<String, ServiceTracker> serviceTrackers = new ConcurrentHashMap<>();
public SLOTracker(SLOConfig sloConfig, MeterRegistry meterRegistry) {
this.sloConfig = sloConfig;
this.meterRegistry = meterRegistry;
initializeTrackers();
}
private void initializeTrackers() {
sloConfig.getObjectives().forEach((serviceName, objective) -> {
ServiceTracker tracker = new ServiceTracker(serviceName, objective, meterRegistry);
serviceTrackers.put(serviceName, tracker);
logger.info("Initialized SLO tracker for service: {}", serviceName);
});
}
public void recordSuccess(String serviceName) {
ServiceTracker tracker = serviceTrackers.get(serviceName);
if (tracker != null) {
tracker.recordSuccess();
} else {
logger.warn("No SLO tracker found for service: {}", serviceName);
}
}
public void recordError(String serviceName, String errorType) {
ServiceTracker tracker = serviceTrackers.get(serviceName);
if (tracker != null) {
tracker.recordError(errorType);
} else {
logger.warn("No SLO tracker found for service: {}", serviceName);
}
}
public void recordLatency(String serviceName, long durationMs) {
ServiceTracker tracker = serviceTrackers.get(serviceName);
if (tracker != null) {
tracker.recordLatency(durationMs);
}
}
public SLOStatus getCurrentStatus(String serviceName) {
ServiceTracker tracker = serviceTrackers.get(serviceName);
return tracker != null ? tracker.getCurrentStatus() : null;
}
public Map<String, SLOStatus> getAllStatuses() {
Map<String, SLOStatus> statuses = new ConcurrentHashMap<>();
serviceTrackers.forEach((serviceName, tracker) -> {
statuses.put(serviceName, tracker.getCurrentStatus());
});
return statuses;
}
@Scheduled(fixedRateString = "${slo.export.interval:60000}")
public void exportMetrics() {
if (sloConfig.getExport().isMetricsEnabled()) {
serviceTrackers.values().forEach(ServiceTracker::exportMetrics);
}
}
public static class ServiceTracker {
private final String serviceName;
private final SLOConfig.ServiceLevelObjective objective;
private final MeterRegistry meterRegistry;
private final AtomicLong totalRequests = new AtomicLong(0);
private final AtomicLong successfulRequests = new AtomicLong(0);
private final AtomicLong errorRequests = new AtomicLong(0);
private final AtomicReference<Instant> windowStartTime = new AtomicReference<>(Instant.now());
private final Map<String, AtomicLong> errorCountsByType = new ConcurrentHashMap<>();
// Micrometer metrics
private final Counter successCounter;
private final Counter errorCounter;
private final Timer latencyTimer;
private final Gauge errorBudgetGauge;
private final Gauge burnRateGauge;
public ServiceTracker(String serviceName, SLOConfig.ServiceLevelObjective objective, 
MeterRegistry meterRegistry) {
this.serviceName = serviceName;
this.objective = objective;
this.meterRegistry = meterRegistry;
// Initialize metrics
this.successCounter = Counter.builder("slo.requests.total")
.tag("service", serviceName)
.tag("status", "success")
.register(meterRegistry);
this.errorCounter = Counter.builder("slo.requests.total")
.tag("service", serviceName)
.tag("status", "error")
.register(meterRegistry);
this.latencyTimer = Timer.builder("slo.request.duration")
.tag("service", serviceName)
.register(meterRegistry);
this.errorBudgetGauge = Gauge.builder("slo.error_budget.remaining")
.tag("service", serviceName)
.register(meterRegistry, this, 
tracker -> tracker.getCurrentStatus().getRemainingErrorBudget());
this.burnRateGauge = Gauge.builder("slo.burn_rate")
.tag("service", serviceName)
.register(meterRegistry, this, 
tracker -> tracker.getCurrentStatus().getBurnRate());
}
public void recordSuccess() {
totalRequests.incrementAndGet();
successfulRequests.incrementAndGet();
successCounter.increment();
resetWindowIfNeeded();
}
public void recordError(String errorType) {
totalRequests.incrementAndGet();
errorRequests.incrementAndGet();
errorCounter.increment();
errorCountsByType.computeIfAbsent(errorType, k -> new AtomicLong(0)).incrementAndGet();
resetWindowIfNeeded();
}
public void recordLatency(long durationMs) {
latencyTimer.record(java.time.Duration.ofMillis(durationMs));
}
public SLOStatus getCurrentStatus() {
resetWindowIfNeeded();
long total = totalRequests.get();
long errors = errorRequests.get();
long successes = successfulRequests.get();
if (total == 0) {
return new SLOStatus(serviceName, objective, 1.0, 0.0, 0.0, 
objective.getErrorBudget(), objective.getErrorBudget(), 0.0, Instant.now());
}
double actualAvailability = (double) successes / total;
double errorRate = (double) errors / total;
double consumedBudget = errorRate;
double remainingBudget = Math.max(0, objective.getErrorBudget() - consumedBudget);
double burnRate = calculateBurnRate();
return new SLOStatus(serviceName, objective, actualAvailability, errorRate, 
consumedBudget, objective.getErrorBudget(), remainingBudget, burnRate, Instant.now());
}
private double calculateBurnRate() {
long totalInWindow = totalRequests.get();
if (totalInWindow == 0) return 0.0;
double expectedRequestsPerWindow = getExpectedRequestsPerWindow();
if (expectedRequestsPerWindow == 0) return 0.0;
double actualErrorRate = (double) errorRequests.get() / totalInWindow;
double expectedErrorRate = objective.getErrorBudget();
return actualErrorRate / expectedErrorRate;
}
private double getExpectedRequestsPerWindow() {
// This is a simplified calculation - in production, you'd want 
// to use historical data or configuration
Duration windowDuration = objective.getRollingWindow();
return 1000.0; // Example baseline
}
private void resetWindowIfNeeded() {
Instant now = Instant.now();
Instant windowStart = windowStartTime.get();
Duration windowDuration = objective.getRollingWindow();
if (windowStart.plus(windowDuration).isBefore(now)) {
if (windowStartTime.compareAndSet(windowStart, now)) {
// Reset counters for new window
totalRequests.set(0);
successfulRequests.set(0);
errorRequests.set(0);
errorCountsByType.clear();
logger.debug("Reset SLO tracking window for service: {}", serviceName);
}
}
}
public void exportMetrics() {
SLOStatus status = getCurrentStatus();
// Export additional metrics
Gauge.builder("slo.availability.current")
.tag("service", serviceName)
.register(meterRegistry, status, SLOStatus::getActualAvailability);
Gauge.builder("slo.error_rate.current")
.tag("service", serviceName)
.register(meterRegistry, status, SLOStatus::getErrorRate);
}
}
public static class SLOStatus {
private final String serviceName;
private final SLOConfig.ServiceLevelObjective objective;
private final double actualAvailability;
private final double errorRate;
private final double consumedErrorBudget;
private final double totalErrorBudget;
private final double remainingErrorBudget;
private final double burnRate;
private final Instant timestamp;
public SLOStatus(String serviceName, SLOConfig.ServiceLevelObjective objective,
double actualAvailability, double errorRate, double consumedErrorBudget,
double totalErrorBudget, double remainingErrorBudget, double burnRate,
Instant timestamp) {
this.serviceName = serviceName;
this.objective = objective;
this.actualAvailability = actualAvailability;
this.errorRate = errorRate;
this.consumedErrorBudget = consumedErrorBudget;
this.totalErrorBudget = totalErrorBudget;
this.remainingErrorBudget = remainingErrorBudget;
this.burnRate = burnRate;
this.timestamp = timestamp;
}
public boolean isWithinSLO() {
return actualAvailability >= (1.0 - totalErrorBudget);
}
public AlertLevel getAlertLevel() {
double budgetConsumptionRatio = consumedErrorBudget / totalErrorBudget;
if (budgetConsumptionRatio >= objective.getBurnRate().getEmergencyThreshold()) {
return AlertLevel.EMERGENCY;
} else if (budgetConsumptionRatio >= objective.getBurnRate().getCriticalThreshold()) {
return AlertLevel.CRITICAL;
} else if (budgetConsumptionRatio >= objective.getBurnRate().getWarningThreshold()) {
return AlertLevel.WARNING;
} else {
return AlertLevel.NORMAL;
}
}
// Getters
public String getServiceName() { return serviceName; }
public double getActualAvailability() { return actualAvailability; }
public double getErrorRate() { return errorRate; }
public double getConsumedErrorBudget() { return consumedErrorBudget; }
public double getTotalErrorBudget() { return totalErrorBudget; }
public double getRemainingErrorBudget() { return remainingErrorBudget; }
public double getBurnRate() { return burnRate; }
public Instant getTimestamp() { return timestamp; }
public SLOConfig.ServiceLevelObjective getObjective() { return objective; }
@Override
public String toString() {
return String.format(
"SLOStatus{service=%s, availability=%.4f, errorRate=%.4f, remainingBudget=%.4f, burnRate=%.2f}",
serviceName, actualAvailability, errorRate, remainingErrorBudget, burnRate);
}
}
public enum AlertLevel {
NORMAL, WARNING, CRITICAL, EMERGENCY
}
}

3. SLO Alert Manager

// SLOAlertManager.java
package com.example.slo.alert;
import com.example.slo.core.SLOTracker;
import com.example.slo.config.SLOConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@Component
public class SLOAlertManager {
private static final Logger logger = LoggerFactory.getLogger(SLOAlertManager.class);
private final SLOTracker sloTracker;
private final SLOConfig sloConfig;
private final AlertNotifier alertNotifier;
private final Map<String, SLOTracker.AlertLevel> lastAlertLevels = new ConcurrentHashMap<>();
public SLOAlertManager(SLOTracker sloTracker, SLOConfig sloConfig, AlertNotifier alertNotifier) {
this.sloTracker = sloTracker;
this.sloConfig = sloConfig;
this.alertNotifier = alertNotifier;
}
@Scheduled(fixedRateString = "${slo.alerting.evaluation-interval:60000}")
public void evaluateSLOAlerts() {
if (!sloConfig.getAlerting().isEnabled()) {
return;
}
Map<String, SLOTracker.SLOStatus> statuses = sloTracker.getAllStatuses();
for (Map.Entry<String, SLOTracker.SLOStatus> entry : statuses.entrySet()) {
String serviceName = entry.getKey();
SLOTracker.SLOStatus status = entry.getValue();
SLOTracker.AlertLevel currentLevel = status.getAlertLevel();
SLOTracker.AlertLevel lastLevel = lastAlertLevels.get(serviceName);
// Check if alert level changed
if (lastLevel == null || currentLevel != lastLevel) {
handleAlertLevelChange(serviceName, status, currentLevel, lastLevel);
lastAlertLevels.put(serviceName, currentLevel);
}
// Send ongoing alert if not normal
if (currentLevel != SLOTracker.AlertLevel.NORMAL) {
sendOngoingAlert(serviceName, status, currentLevel);
}
}
}
private void handleAlertLevelChange(String serviceName, SLOTracker.SLOStatus status,
SLOTracker.AlertLevel currentLevel, 
SLOTracker.AlertLevel lastLevel) {
String message = buildAlertMessage(serviceName, status, currentLevel, lastLevel);
switch (currentLevel) {
case EMERGENCY:
alertNotifier.sendEmergencyAlert(serviceName, message, status);
triggerEmergencyActions(serviceName, status);
break;
case CRITICAL:
alertNotifier.sendCriticalAlert(serviceName, message, status);
triggerCriticalActions(serviceName, status);
break;
case WARNING:
alertNotifier.sendWarningAlert(serviceName, message, status);
break;
case NORMAL:
if (lastLevel != null && lastLevel != SLOTracker.AlertLevel.NORMAL) {
alertNotifier.sendRecoveryAlert(serviceName, 
String.format("SLO recovered for service %s. Current availability: %.4f", 
serviceName, status.getActualAvailability()), status);
}
break;
}
logger.info("SLO Alert Level Changed - Service: {}, From: {}, To: {}, Message: {}", 
serviceName, lastLevel, currentLevel, message);
}
private void sendOngoingAlert(String serviceName, SLOTracker.SLOStatus status,
SLOTracker.AlertLevel level) {
String message = String.format(
"Ongoing SLO alert for service %s. Availability: %.4f, Error Budget Remaining: %.4f, Burn Rate: %.2f",
serviceName, status.getActualAvailability(), status.getRemainingErrorBudget(), status.getBurnRate());
alertNotifier.sendOngoingAlert(serviceName, message, status, level);
}
private String buildAlertMessage(String serviceName, SLOTracker.SLOStatus status,
SLOTracker.AlertLevel currentLevel, SLOTracker.AlertLevel lastLevel) {
return String.format(
"SLO Alert for service %s - Level: %s -> %s | " +
"Availability: %.4f (target: %.4f) | " +
"Error Budget Remaining: %.4f | Burn Rate: %.2f",
serviceName, lastLevel, currentLevel, status.getActualAvailability(),
(1 - status.getTotalErrorBudget()), status.getRemainingErrorBudget(), status.getBurnRate());
}
private void triggerEmergencyActions(String serviceName, SLOTracker.SLOStatus status) {
logger.warn("Triggering emergency actions for service: {}", serviceName);
// Implement emergency actions:
// - Page on-call engineer
// - Scale up resources
// - Enable circuit breakers
// - Disable non-critical features
}
private void triggerCriticalActions(String serviceName, SLOTracker.SLOStatus status) {
logger.warn("Triggering critical actions for service: {}", serviceName);
// Implement critical actions:
// - Notify engineering team
// - Increase monitoring frequency
// - Prepare rollback procedures
}
}

4. Alert Notifier Implementation

// AlertNotifier.java
package com.example.slo.alert;
import com.example.slo.core.SLOTracker;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.*;
import org.springframework.stereotype.Component;
import org.springframework.web.client.RestTemplate;
import java.util.HashMap;
import java.util.Map;
@Component
public class AlertNotifier {
private static final Logger logger = LoggerFactory.getLogger(AlertNotifier.class);
private final RestTemplate restTemplate;
private final String webhookUrl;
public AlertNotifier(RestTemplate restTemplate, String webhookUrl) {
this.restTemplate = restTemplate;
this.webhookUrl = webhookUrl;
}
public void sendEmergencyAlert(String serviceName, String message, SLOTracker.SLOStatus status) {
Map<String, Object> alertPayload = createAlertPayload("EMERGENCY", serviceName, message, status);
sendWebhookAlert(alertPayload);
logger.error("EMERGENCY SLO ALERT - Service: {}, Message: {}", serviceName, message);
}
public void sendCriticalAlert(String serviceName, String message, SLOTracker.SLOStatus status) {
Map<String, Object> alertPayload = createAlertPayload("CRITICAL", serviceName, message, status);
sendWebhookAlert(alertPayload);
logger.error("CRITICAL SLO ALERT - Service: {}, Message: {}", serviceName, message);
}
public void sendWarningAlert(String serviceName, String message, SLOTracker.SLOStatus status) {
Map<String, Object> alertPayload = createAlertPayload("WARNING", serviceName, message, status);
sendWebhookAlert(alertPayload);
logger.warn("WARNING SLO ALERT - Service: {}, Message: {}", serviceName, message);
}
public void sendRecoveryAlert(String serviceName, String message, SLOTracker.SLOStatus status) {
Map<String, Object> alertPayload = createAlertPayload("RECOVERY", serviceName, message, status);
sendWebhookAlert(alertPayload);
logger.info("SLO RECOVERY - Service: {}, Message: {}", serviceName, message);
}
public void sendOngoingAlert(String serviceName, String message, SLOTracker.SLOStatus status, 
SLOTracker.AlertLevel level) {
Map<String, Object> alertPayload = createAlertPayload(level.name(), serviceName, message, status);
sendWebhookAlert(alertPayload);
}
private Map<String, Object> createAlertPayload(String severity, String serviceName, 
String message, SLOTracker.SLOStatus status) {
Map<String, Object> payload = new HashMap<>();
payload.put("severity", severity);
payload.put("service", serviceName);
payload.put("message", message);
payload.put("timestamp", status.getTimestamp().toString());
Map<String, Object> metrics = new HashMap<>();
metrics.put("availability", status.getActualAvailability());
metrics.put("error_rate", status.getErrorRate());
metrics.put("error_budget_remaining", status.getRemainingErrorBudget());
metrics.put("burn_rate", status.getBurnRate());
metrics.put("slo_target", 1 - status.getTotalErrorBudget());
payload.put("metrics", metrics);
payload.put("labels", status.getObjective().getLabels());
return payload;
}
private void sendWebhookAlert(Map<String, Object> payload) {
if (webhookUrl == null || webhookUrl.isEmpty()) {
logger.debug("No webhook URL configured, skipping alert notification");
return;
}
try {
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
HttpEntity<Map<String, Object>> request = new HttpEntity<>(payload, headers);
ResponseEntity<String> response = restTemplate.exchange(
webhookUrl, HttpMethod.POST, request, String.class);
if (response.getStatusCode().is2xxSuccessful()) {
logger.debug("Alert notification sent successfully");
} else {
logger.warn("Failed to send alert notification: {}", response.getStatusCode());
}
} catch (Exception e) {
logger.error("Failed to send alert notification to webhook", e);
}
}
}

5. Spring Boot Auto-Configuration

// SLOAutoConfiguration.java
package com.example.slo.config;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.web.client.RestTemplateBuilder;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.client.RestTemplate;
@Configuration
public class SLOAutoConfiguration {
@Bean
@ConditionalOnMissingBean
public SLOConfig sloConfig() {
return new SLOConfig();
}
@Bean
@ConditionalOnMissingBean
public RestTemplate restTemplate(RestTemplateBuilder builder) {
return builder.build();
}
}

6. Aspect-Oriented SLO Monitoring

// SLOMonitoringAspect.java
package com.example.slo.aspect;
import com.example.slo.core.SLOTracker;
import org.aspectj.lang.ProceedingJoinPoint;
import org.aspectj.lang.annotation.Around;
import org.aspectj.lang.annotation.Aspect;
import org.aspectj.lang.reflect.MethodSignature;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.lang.reflect.Method;
@Aspect
@Component
public class SLOMonitoringAspect {
private static final Logger logger = LoggerFactory.getLogger(SLOMonitoringAspect.class);
private final SLOTracker sloTracker;
public SLOMonitoringAspect(SLOTracker sloTracker) {
this.sloTracker = sloTracker;
}
@Around("@annotation(MonitorSLO)")
public Object monitorSLO(ProceedingJoinPoint joinPoint) throws Throwable {
Method method = ((MethodSignature) joinPoint.getSignature()).getMethod();
MonitorSLO annotation = method.getAnnotation(MonitorSLO.class);
String serviceName = annotation.service();
long startTime = System.currentTimeMillis();
boolean success = false;
try {
Object result = joinPoint.proceed();
success = true;
return result;
} catch (Exception e) {
sloTracker.recordError(serviceName, e.getClass().getSimpleName());
logger.debug("Recorded error for service {}: {}", serviceName, e.getMessage());
throw e;
} finally {
long duration = System.currentTimeMillis() - startTime;
if (success) {
sloTracker.recordSuccess(serviceName);
}
sloTracker.recordLatency(serviceName, duration);
logger.debug("SLO monitoring - Service: {}, Success: {}, Duration: {}ms", 
serviceName, success, duration);
}
}
}
// MonitorSLO Annotation
package com.example.slo.aspect;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface MonitorSLO {
String service();
String operation() default "";
}

7. REST Controller for SLO Management

// SLOController.java
package com.example.slo.controller;
import com.example.slo.core.SLOTracker;
import com.example.slo.config.SLOConfig;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.Map;
@RestController
@RequestMapping("/api/slo")
public class SLOController {
private final SLOTracker sloTracker;
private final SLOConfig sloConfig;
public SLOController(SLOTracker sloTracker, SLOConfig sloConfig) {
this.sloTracker = sloTracker;
this.sloConfig = sloConfig;
}
@GetMapping("/status")
public ResponseEntity<Map<String, SLOTracker.SLOStatus>> getAllStatuses() {
return ResponseEntity.ok(sloTracker.getAllStatuses());
}
@GetMapping("/status/{serviceName}")
public ResponseEntity<SLOTracker.SLOStatus> getServiceStatus(@PathVariable String serviceName) {
SLOTracker.SLOStatus status = sloTracker.getCurrentStatus(serviceName);
if (status == null) {
return ResponseEntity.notFound().build();
}
return ResponseEntity.ok(status);
}
@GetMapping("/config")
public ResponseEntity<SLOConfig> getConfig() {
return ResponseEntity.ok(sloConfig);
}
@PostMapping("/record/success/{serviceName}")
public ResponseEntity<Void> recordSuccess(@PathVariable String serviceName) {
sloTracker.recordSuccess(serviceName);
return ResponseEntity.accepted().build();
}
@PostMapping("/record/error/{serviceName}")
public ResponseEntity<Void> recordError(@PathVariable String serviceName, 
@RequestParam String errorType) {
sloTracker.recordError(serviceName, errorType);
return ResponseEntity.accepted().build();
}
@GetMapping("/health")
public ResponseEntity<Map<String, Object>> getHealth() {
Map<String, SLOTracker.SLOStatus> statuses = sloTracker.getAllStatuses();
long healthyServices = statuses.values().stream()
.filter(SLOTracker.SLOStatus::isWithinSLO)
.count();
Map<String, Object> health = Map.of(
"totalServices", statuses.size(),
"healthyServices", healthyServices,
"unhealthyServices", statuses.size() - healthyServices,
"timestamp", java.time.Instant.now()
);
return ResponseEntity.ok(health);
}
}

8. Application Configuration

# application.yml
slo:
objectives:
user-service:
name: "User Service"
description: "User management service SLO"
target: 0.999  # 99.9% availability
measurement-window: 30d
rolling-window: 1h
labels:
team: "backend"
criticality: "high"
burn-rate:
warning-threshold: 0.5
critical-threshold: 0.8
emergency-threshold: 0.95
short-window: 1h
long-window: 6h
payment-service:
name: "Payment Service"
description: "Payment processing service SLO"
target: 0.9999  # 99.99% availability
measurement-window: 30d
rolling-window: 1h
labels:
team: "payments"
criticality: "critical"
burn-rate:
warning-threshold: 0.3
critical-threshold: 0.6
emergency-threshold: 0.9
short-window: 1h
long-window: 6h
alerting:
enabled: true
webhook-url: "https://hooks.slack.com/services/your-webhook-url"
evaluation-interval: 1m
alert-labels:
environment: "production"
region: "us-east-1"
export:
prometheus-enabled: true
prometheus-job-name: "slo-monitor"
metrics-enabled: true
export-interval: 1m
management:
endpoints:
web:
exposure:
include: health,metrics,prometheus
endpoint:
metrics:
enabled: true
prometheus:
enabled: true

9. Example Service Usage

// ExampleService.java
package com.example.slo.service;
import com.example.slo.aspect.MonitorSLO;
import org.springframework.stereotype.Service;
@Service
public class ExampleService {
@MonitorSLO(service = "user-service", operation = "getUser")
public User getUser(String userId) {
// Business logic here
return new User(userId, "John Doe");
}
@MonitorSLO(service = "user-service", operation = "createUser")
public User createUser(User user) {
// Business logic here
if (user.getEmail() == null) {
throw new IllegalArgumentException("Email is required");
}
return user;
}
@MonitorSLO(service = "payment-service", operation = "processPayment")
public PaymentResult processPayment(PaymentRequest request) {
// Business logic here
if (request.getAmount() <= 0) {
throw new IllegalArgumentException("Invalid payment amount");
}
return new PaymentResult("success", "Payment processed");
}
// Domain classes
public static class User {
private String id;
private String name;
private String email;
public User(String id, String name) {
this.id = id;
this.name = name;
}
// Getters and setters
public String getId() { return id; }
public String getName() { return name; }
public String getEmail() { return email; }
public void setEmail(String email) { this.email = email; }
}
public static class PaymentRequest {
private double amount;
private String currency;
// Getters and setters
public double getAmount() { return amount; }
public void setAmount(double amount) { this.amount = amount; }
public String getCurrency() { return currency; }
public void setCurrency(String currency) { this.currency = currency; }
}
public static class PaymentResult {
private String status;
private String message;
public PaymentResult(String status, String message) {
this.status = status;
this.message = message;
}
// Getters
public String getStatus() { return status; }
public String getMessage() { return message; }
}
}

This comprehensive Error Budgets and SLOs implementation provides:

  • Flexible SLO configuration with multiple service support
  • Real-time metrics collection and tracking
  • Error budget calculation and burn rate monitoring
  • Multi-level alerting system with configurable thresholds
  • Aspect-oriented monitoring for easy integration
  • REST API for management and monitoring
  • Prometheus metrics export for visualization
  • Webhook integrations for alert notifications
  • Spring Boot auto-configuration

The system helps maintain service reliability while providing clear visibility into error budget consumption and SLO compliance.

Leave a Reply

Your email address will not be published. Required fields are marked *


Macro Nepal Helper