Comprehensive Cluster Autoscaler Integration Guide
1. Kubernetes Autoscaling Configuration
Horizontal Pod Autoscaler (HPA) Configuration
// HPAConfiguration.java
package com.example.autoscaling.config;
import io.fabric8.kubernetes.api.model.HorizontalPodAutoscaler;
import io.fabric8.kubernetes.api.model.HorizontalPodAutoscalerBuilder;
import io.fabric8.kubernetes.api.model.MetricSpecBuilder;
import io.fabric8.kubernetes.api.model.ObjectMetaBuilder;
import io.fabric8.kubernetes.api.model.ResourceMetricSourceBuilder;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class HPAConfiguration {
@Value("${hpa.minReplicas:2}")
private int minReplicas;
@Value("${hpa.maxReplicas:10}")
private int maxReplicas;
@Value("${hpa.targetCPU:80}")
private int targetCPU;
@Value("${hpa.targetMemory:85}")
private int targetMemory;
@Bean
public HorizontalPodAutoscaler horizontalPodAutoscaler() {
return new HorizontalPodAutoscalerBuilder()
.withNewMetadata()
.withName("java-app-hpa")
.withNamespace("default")
.withLabels(createLabels())
.endMetadata()
.withNewSpec()
.withScaleTargetRef(createScaleTargetRef())
.withMinReplicas(minReplicas)
.withMaxReplicas(maxReplicas)
.withMetrics(
new MetricSpecBuilder()
.withType("Resource")
.withNewResource()
.withName("cpu")
.withTarget(createMetricTarget(targetCPU))
.endResource()
.build(),
new MetricSpecBuilder()
.withType("Resource")
.withNewResource()
.withName("memory")
.withTarget(createMetricTarget(targetMemory))
.endResource()
.build()
)
.withBehavior(createScalingBehavior())
.endSpec()
.build();
}
private Map<String, String> createLabels() {
Map<String, String> labels = new HashMap<>();
labels.put("app", "java-app");
labels.put("managed-by", "autoscaler");
return labels;
}
private io.fabric8.kubernetes.api.model.autoscaling.v2.CrossVersionObjectReference createScaleTargetRef() {
return new io.fabric8.kubernetes.api.model.autoscaling.v2.CrossVersionObjectReferenceBuilder()
.withApiVersion("apps/v1")
.withKind("Deployment")
.withName("java-app")
.build();
}
private io.fabric8.kubernetes.api.model.autoscaling.v2.MetricTarget createMetricTarget(int value) {
return new io.fabric8.kubernetes.api.model.autoscaling.v2.MetricTargetBuilder()
.withType("Utilization")
.withAverageUtilization(value)
.build();
}
private io.fabric8.kubernetes.api.model.autoscaling.v2.HorizontalPodAutoscalerBehavior createScalingBehavior() {
return new io.fabric8.kubernetes.api.model.autoscaling.v2.HorizontalPodAutoscalerBehaviorBuilder()
.withNewScaleUp()
.withSelectPolicy("Max")
.addNewPolicy()
.withType("Pods")
.withValue(4)
.withPeriodSeconds(60)
.endPolicy()
.endScaleUp()
.withNewScaleDown()
.withSelectPolicy("Min")
.addNewPolicy()
.withType("Pods")
.withValue(2)
.withPeriodSeconds(300)
.endPolicy()
.endScaleDown()
.build();
}
}
2. Custom Metrics for Autoscaling
Custom Metrics Exporter
// CustomMetricsExporter.java
package com.example.autoscaling.metrics;
import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Tags;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
@Component
public class CustomMetricsExporter {
private final MeterRegistry meterRegistry;
private final ApplicationMetricsService metricsService;
private final AtomicInteger activeUsers = new AtomicInteger(0);
private final AtomicLong messageQueueSize = new AtomicLong(0);
private final AtomicDouble requestRate = new AtomicDouble(0.0);
private final AtomicInteger errorRate = new AtomicInteger(0);
public CustomMetricsExporter(MeterRegistry meterRegistry,
ApplicationMetricsService metricsService) {
this.meterRegistry = meterRegistry;
this.metricsService = metricsService;
initializeCustomMetrics();
}
private void initializeCustomMetrics() {
// Active Users Gauge
Gauge.builder("app.active_users")
.description("Number of active users in the application")
.register(meterRegistry, activeUsers);
// Message Queue Size Gauge
Gauge.builder("app.message_queue_size")
.description("Current size of message queue")
.register(meterRegistry, messageQueueSize);
// Request Rate Gauge
Gauge.builder("app.request_rate")
.description("Requests per second")
.register(meterRegistry, requestRate);
// Error Rate Gauge
Gauge.builder("app.error_rate")
.description("Error rate percentage")
.register(meterRegistry, errorRate);
// Custom Business Metrics
Gauge.builder("app.business_transactions")
.description("Business transactions per minute")
.tags(Tags.of("type", "order"))
.register(meterRegistry, new AtomicInteger(0));
Gauge.builder("app.database_connections")
.description("Active database connections")
.register(meterRegistry, new AtomicInteger(0));
}
@Scheduled(fixedRate = 10000) // Update every 10 seconds
public void updateCustomMetrics() {
try {
// Update active users
activeUsers.set(metricsService.getActiveUserCount());
// Update queue metrics
messageQueueSize.set(metricsService.getMessageQueueSize());
// Update request metrics
requestRate.set(metricsService.getCurrentRequestRate());
// Update error rate
errorRate.set(metricsService.getErrorRatePercentage());
} catch (Exception e) {
// Log error but don't break the scheduling
}
}
// Method to update business metrics from application code
public void recordBusinessTransaction(String transactionType) {
meterRegistry.counter("app.business_transactions_total",
"type", transactionType).increment();
}
public void recordDatabaseConnection(boolean acquired) {
if (acquired) {
meterRegistry.counter("app.database_connections_acquired").increment();
} else {
meterRegistry.counter("app.database_connections_released").increment();
}
}
}
3. Vertical Pod Autoscaler (VPA) Integration
VPA Configuration Service
// VPAService.java
package com.example.autoscaling.vpa;
import io.fabric8.kubernetes.api.model.Pod;
import io.fabric8.kubernetes.api.model.Quantity;
import io.fabric8.kubernetes.api.model.ResourceRequirements;
import io.fabric8.kubernetes.api.model.ResourceRequirementsBuilder;
import io.fabric8.kubernetes.client.KubernetesClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import java.util.HashMap;
import java.util.Map;
@Service
public class VPAService {
private static final Logger logger = LoggerFactory.getLogger(VPAService.class);
private final KubernetesClient kubernetesClient;
private final MetricsAnalyzer metricsAnalyzer;
private final Map<String, ResourceRecommendation> recommendations = new HashMap<>();
public VPAService(KubernetesClient kubernetesClient, MetricsAnalyzer metricsAnalyzer) {
this.kubernetesClient = kubernetesClient;
this.metricsAnalyzer = metricsAnalyzer;
}
@Scheduled(fixedRate = 300000) // Every 5 minutes
public void analyzeAndRecommendResources() {
try {
String namespace = "default";
String deploymentName = "java-app";
// Get current resource usage
ResourceUsage currentUsage = metricsAnalyzer.getCurrentResourceUsage(deploymentName, namespace);
// Calculate recommendations
ResourceRecommendation recommendation = calculateRecommendation(currentUsage);
// Store recommendation
recommendations.put(deploymentName, recommendation);
// Apply recommendation if auto-update is enabled
if (shouldApplyRecommendation(recommendation)) {
applyResourceRecommendation(deploymentName, namespace, recommendation);
}
logger.info("VPA recommendation calculated: {}", recommendation);
} catch (Exception e) {
logger.error("Error calculating VPA recommendations", e);
}
}
private ResourceRecommendation calculateRecommendation(ResourceUsage currentUsage) {
double cpuBuffer = 1.2; // 20% buffer
double memoryBuffer = 1.3; // 30% buffer
String recommendedCpu = calculateCpuRecommendation(currentUsage.getCpuUsage(), cpuBuffer);
String recommendedMemory = calculateMemoryRecommendation(currentUsage.getMemoryUsage(), memoryBuffer);
return new ResourceRecommendation(recommendedCpu, recommendedMemory);
}
private String calculateCpuRecommendation(double currentUsage, double buffer) {
double recommended = currentUsage * buffer;
// Round up to nearest 25m
double rounded = Math.ceil(recommended * 40) / 40; // 40 = 1000/25
return String.format("%.0fm", rounded * 1000);
}
private String calculateMemoryRecommendation(double currentUsage, double buffer) {
double recommended = currentUsage * buffer;
// Round up to nearest 32Mi
double rounded = Math.ceil(recommended / 32) * 32;
return String.format("%.0fMi", rounded);
}
private boolean shouldApplyRecommendation(ResourceRecommendation recommendation) {
// Only apply if recommendation is significantly different from current
// and within safe limits
return recommendation.isValid() &&
recommendation.getConfidence() > 0.8 &&
!recommendation.exceedsSafeLimits();
}
private void applyResourceRecommendation(String deploymentName, String namespace,
ResourceRecommendation recommendation) {
try {
kubernetesClient.apps().deployments()
.inNamespace(namespace)
.withName(deploymentName)
.edit(deployment -> {
deployment.getSpec().getTemplate().getSpec().getContainers().forEach(container -> {
if (container.getName().equals("java-app")) {
ResourceRequirements resources = new ResourceRequirementsBuilder()
.addToRequests("cpu", new Quantity(recommendation.getCpu()))
.addToRequests("memory", new Quantity(recommendation.getMemory()))
.addToLimits("cpu", new Quantity(recommendation.getCpuLimit()))
.addToLimits("memory", new Quantity(recommendation.getMemoryLimit()))
.build();
container.setResources(resources);
}
});
return deployment;
});
logger.info("Applied VPA recommendation for deployment: {}", deploymentName);
} catch (Exception e) {
logger.error("Failed to apply VPA recommendation for deployment: {}", deploymentName, e);
}
}
public ResourceRecommendation getRecommendation(String deploymentName) {
return recommendations.get(deploymentName);
}
// Data classes
public static class ResourceUsage {
private final double cpuUsage;
private final double memoryUsage;
public ResourceUsage(double cpuUsage, double memoryUsage) {
this.cpuUsage = cpuUsage;
this.memoryUsage = memoryUsage;
}
public double getCpuUsage() { return cpuUsage; }
public double getMemoryUsage() { return memoryUsage; }
}
public static class ResourceRecommendation {
private final String cpu;
private final String memory;
private double confidence = 0.9;
public ResourceRecommendation(String cpu, String memory) {
this.cpu = cpu;
this.memory = memory;
}
public String getCpu() { return cpu; }
public String getMemory() { return memory; }
public String getCpuLimit() { return String.valueOf(Double.parseDouble(cpu.replace("m", "")) * 2) + "m"; }
public String getMemoryLimit() { return String.valueOf(Double.parseDouble(memory.replace("Mi", "")) * 1.5) + "Mi"; }
public double getConfidence() { return confidence; }
public boolean isValid() {
return cpu != null && memory != null;
}
public boolean exceedsSafeLimits() {
double cpuValue = Double.parseDouble(cpu.replace("m", ""));
double memoryValue = Double.parseDouble(memory.replace("Mi", ""));
return cpuValue > 4000 || memoryValue > 8192; // 4 CPU cores, 8Gi memory
}
}
}
4. Cluster Autoscaler Aware Application
Node Awareness Service
// NodeAwarenessService.java
package com.example.autoscaling.node;
import io.fabric8.kubernetes.api.model.Node;
import io.fabric8.kubernetes.api.model.Pod;
import io.fabric8.kubernetes.client.KubernetesClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@Service
public class NodeAwarenessService {
private static final Logger logger = LoggerFactory.getLogger(NodeAwarenessService.class);
private final KubernetesClient kubernetesClient;
private final Map<String, NodeInfo> nodeCache = new ConcurrentHashMap<>();
public NodeAwarenessService(KubernetesClient kubernetesClient) {
this.kubernetesClient = kubernetesClient;
}
@Scheduled(fixedRate = 60000) // Every minute
public void refreshNodeInfo() {
try {
List<Node> nodes = kubernetesClient.nodes().list().getItems();
nodeCache.clear();
for (Node node : nodes) {
NodeInfo nodeInfo = extractNodeInfo(node);
nodeCache.put(node.getMetadata().getName(), nodeInfo);
}
logger.debug("Refreshed node information for {} nodes", nodes.size());
} catch (Exception e) {
logger.error("Error refreshing node information", e);
}
}
private NodeInfo extractNodeInfo(Node node) {
String nodeName = node.getMetadata().getName();
// Extract allocatable resources
Map<String, String> allocatable = node.getStatus().getAllocatable();
double allocatableCPU = parseCPU(allocatable.get("cpu"));
double allocatableMemory = parseMemory(allocatable.get("memory"));
// Extract capacity
Map<String, String> capacity = node.getStatus().getCapacity();
double capacityCPU = parseCPU(capacity.get("cpu"));
double capacityMemory = parseMemory(capacity.get("memory"));
// Get node conditions
boolean ready = node.getStatus().getConditions().stream()
.filter(condition -> "Ready".equals(condition.getType()))
.anyMatch(condition -> "True".equals(condition.getStatus()));
// Get node labels for topology awareness
Map<String, String> labels = node.getMetadata().getLabels();
return new NodeInfo(nodeName, allocatableCPU, allocatableMemory,
capacityCPU, capacityMemory, ready, labels);
}
private double parseCPU(String cpu) {
if (cpu.endsWith("m")) {
return Double.parseDouble(cpu.substring(0, cpu.length() - 1)) / 1000;
}
return Double.parseDouble(cpu);
}
private double parseMemory(String memory) {
if (memory.endsWith("Ki")) {
return Double.parseDouble(memory.substring(0, memory.length() - 2)) / 1024;
} else if (memory.endsWith("Mi")) {
return Double.parseDouble(memory.substring(0, memory.length() - 2));
} else if (memory.endsWith("Gi")) {
return Double.parseDouble(memory.substring(0, memory.length() - 2)) * 1024;
}
return Double.parseDouble(memory) / (1024 * 1024); // Assume bytes
}
public boolean hasSufficientResources(double requiredCPU, double requiredMemory) {
return nodeCache.values().stream()
.filter(NodeInfo::isReady)
.anyMatch(node -> node.hasCapacity(requiredCPU, requiredMemory));
}
public List<String> findSuitableNodes(double requiredCPU, double requiredMemory) {
return nodeCache.values().stream()
.filter(NodeInfo::isReady)
.filter(node -> node.hasCapacity(requiredCPU, requiredMemory))
.map(NodeInfo::getName)
.collect(Collectors.toList());
}
public double getClusterCPUUtilization() {
double totalAllocatable = nodeCache.values().stream()
.mapToDouble(NodeInfo::getAllocatableCPU)
.sum();
double totalUsed = calculateTotalUsedCPU();
return totalUsed / totalAllocatable * 100;
}
public double getClusterMemoryUtilization() {
double totalAllocatable = nodeCache.values().stream()
.mapToDouble(NodeInfo::getAllocatableMemory)
.sum();
double totalUsed = calculateTotalUsedMemory();
return totalUsed / totalAllocatable * 100;
}
private double calculateTotalUsedCPU() {
// Implementation to calculate total CPU used by pods
return kubernetesClient.pods().inAnyNamespace().list().getItems().stream()
.mapToDouble(this::getPodCPURequest)
.sum();
}
private double calculateTotalUsedMemory() {
// Implementation to calculate total memory used by pods
return kubernetesClient.pods().inAnyNamespace().list().getItems().stream()
.mapToDouble(this::getPodMemoryRequest)
.sum();
}
private double getPodCPURequest(Pod pod) {
return pod.getSpec().getContainers().stream()
.mapToDouble(container -> parseCPU(container.getResources().getRequests().get("cpu").getAmount()))
.sum();
}
private double getPodMemoryRequest(Pod pod) {
return pod.getSpec().getContainers().stream()
.mapToDouble(container -> parseMemory(container.getResources().getRequests().get("memory").getAmount()))
.sum();
}
public static class NodeInfo {
private final String name;
private final double allocatableCPU;
private final double allocatableMemory;
private final double capacityCPU;
private final double capacityMemory;
private final boolean ready;
private final Map<String, String> labels;
public NodeInfo(String name, double allocatableCPU, double allocatableMemory,
double capacityCPU, double capacityMemory, boolean ready,
Map<String, String> labels) {
this.name = name;
this.allocatableCPU = allocatableCPU;
this.allocatableMemory = allocatableMemory;
this.capacityCPU = capacityCPU;
this.capacityMemory = capacityMemory;
this.ready = ready;
this.labels = labels;
}
public boolean hasCapacity(double requiredCPU, double requiredMemory) {
return allocatableCPU >= requiredCPU && allocatableMemory >= requiredMemory;
}
// Getters
public String getName() { return name; }
public double getAllocatableCPU() { return allocatableCPU; }
public double getAllocatableMemory() { return allocatableMemory; }
public boolean isReady() { return ready; }
}
}
5. Autoscaling Decision Engine
// AutoscalingDecisionEngine.java
package com.example.autoscaling.engine;
import com.example.autoscaling.metrics.CustomMetricsExporter;
import com.example.autoscaling.node.NodeAwarenessService;
import com.example.autoscaling.vpa.VPAService;
import io.fabric8.kubernetes.client.KubernetesClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@Component
public class AutoscalingDecisionEngine {
private static final Logger logger = LoggerFactory.getLogger(AutoscalingDecisionEngine.class);
private final KubernetesClient kubernetesClient;
private final NodeAwarenessService nodeAwarenessService;
private final VPAService vpaService;
private final CustomMetricsExporter metricsExporter;
private final Map<String, ScalingDecision> decisions = new ConcurrentHashMap<>();
public AutoscalingDecisionEngine(KubernetesClient kubernetesClient,
NodeAwarenessService nodeAwarenessService,
VPAService vpaService,
CustomMetricsExporter metricsExporter) {
this.kubernetesClient = kubernetesClient;
this.nodeAwarenessService = nodeAwarenessService;
this.vpaService = vpaService;
this.metricsExporter = metricsExporter;
}
@Scheduled(fixedRate = 30000) // Every 30 seconds
public void evaluateScalingDecisions() {
try {
ScalingDecision decision = analyzeScalingNeeds();
decisions.put("current", decision);
if (decision.getAction() != ScalingAction.NONE) {
executeScalingDecision(decision);
}
logger.debug("Scaling decision: {}", decision);
} catch (Exception e) {
logger.error("Error evaluating scaling decisions", e);
}
}
private ScalingDecision analyzeScalingNeeds() {
double cpuUtilization = nodeAwarenessService.getClusterCPUUtilization();
double memoryUtilization = nodeAwarenessService.getClusterMemoryUtilization();
// Check if we need to scale out
if (cpuUtilization > 80 || memoryUtilization > 85) {
return new ScalingDecision(ScalingAction.SCALE_OUT,
"High resource utilization: CPU=" + cpuUtilization + "%, Memory=" + memoryUtilization + "%");
}
// Check if we need to scale in
if (cpuUtilization < 30 && memoryUtilization < 40) {
return new ScalingDecision(ScalingAction.SCALE_IN,
"Low resource utilization: CPU=" + cpuUtilization + "%, Memory=" + memoryUtilization + "%");
}
// Check if we need to recommend VPA changes
if (shouldRecommendVPA()) {
return new ScalingDecision(ScalingAction.RECOMMEND_VPA,
"VPA resource adjustment recommended");
}
return new ScalingDecision(ScalingAction.NONE, "No scaling action needed");
}
private boolean shouldRecommendVPA() {
// Implement logic to determine if VPA should be adjusted
double cpuUtilization = nodeAwarenessService.getClusterCPUUtilization();
double memoryUtilization = nodeAwarenessService.getClusterMemoryUtilization();
return (cpuUtilization > 90 && memoryUtilization < 60) ||
(memoryUtilization > 90 && cpuUtilization < 60);
}
private void executeScalingDecision(ScalingDecision decision) {
switch (decision.getAction()) {
case SCALE_OUT:
triggerScaleOut();
break;
case SCALE_IN:
triggerScaleIn();
break;
case RECOMMEND_VPA:
triggerVPARecommendation();
break;
default:
// No action
break;
}
}
private void triggerScaleOut() {
// Logic to trigger scale out
// This could involve:
// 1. Increasing HPA max replicas
// 2. Creating a new deployment with more resources
// 3. Triggering cluster autoscaler via annotations
logger.info("Triggering scale out action");
// Example: Update deployment with scale-out configuration
kubernetesClient.apps().deployments()
.inNamespace("default")
.withName("java-app")
.edit(deployment -> {
// Add scale-out annotations for cluster autoscaler
Map<String, String> annotations = deployment.getMetadata().getAnnotations();
if (annotations == null) {
annotations = new HashMap<>();
}
annotations.put("cluster-autoscaler.kubernetes.io/safe-to-evict", "true");
annotations.put("cluster-autoscaler.kubernetes.io/scale-down-disabled", "true");
deployment.getMetadata().setAnnotations(annotations);
return deployment;
});
}
private void triggerScaleIn() {
// Logic to trigger scale in
logger.info("Triggering scale in action");
kubernetesClient.apps().deployments()
.inNamespace("default")
.withName("java-app")
.edit(deployment -> {
// Enable scale down
Map<String, String> annotations = deployment.getMetadata().getAnnotations();
if (annotations != null) {
annotations.put("cluster-autoscaler.kubernetes.io/scale-down-disabled", "false");
}
return deployment;
});
}
private void triggerVPARecommendation() {
// Trigger VPA analysis and application
vpaService.analyzeAndRecommendResources();
}
public ScalingDecision getCurrentDecision() {
return decisions.get("current");
}
public enum ScalingAction {
SCALE_OUT, SCALE_IN, RECOMMEND_VPA, NONE
}
public static class ScalingDecision {
private final ScalingAction action;
private final String reason;
private final long timestamp;
public ScalingDecision(ScalingAction action, String reason) {
this.action = action;
this.reason = reason;
this.timestamp = System.currentTimeMillis();
}
// Getters
public ScalingAction getAction() { return action; }
public String getReason() { return reason; }
public long getTimestamp() { return timestamp; }
@Override
public String toString() {
return String.format("ScalingDecision{action=%s, reason='%s', timestamp=%d}",
action, reason, timestamp);
}
}
}
6. Application Metrics Service
// ApplicationMetricsService.java
package com.example.autoscaling.metrics;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Timer;
import org.springframework.stereotype.Service;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
@Service
public class ApplicationMetricsService {
private final MeterRegistry meterRegistry;
private final AtomicInteger activeUserCount = new AtomicInteger(0);
private final AtomicLong messageQueueSize = new AtomicLong(0);
private final AtomicLong totalRequests = new AtomicLong(0);
private final AtomicLong errorCount = new AtomicLong(0);
private final Map<String, AtomicLong> requestCounters = new ConcurrentHashMap<>();
private final Timer requestTimer;
public ApplicationMetricsService(MeterRegistry meterRegistry) {
this.meterRegistry = meterRegistry;
// Initialize timer for request duration
this.requestTimer = Timer.builder("app.request.duration")
.description("HTTP request duration")
.register(meterRegistry);
// Initialize custom gauges
initializeGauges();
}
private void initializeGauges() {
// Active users gauge
meterRegistry.gauge("app.active_users", activeUserCount);
// Message queue size gauge
meterRegistry.gauge("app.message_queue_size", messageQueueSize);
// Request rate gauge (per second)
meterRegistry.more().counter("app.request.rate",
new AtomicLong(0));
}
public void recordUserLogin() {
activeUserCount.incrementAndGet();
meterRegistry.counter("app.user.logins").increment();
}
public void recordUserLogout() {
activeUserCount.decrementAndGet();
}
public void recordRequest(String endpoint, long duration, boolean success) {
totalRequests.incrementAndGet();
// Record duration
requestTimer.record(duration, TimeUnit.MILLISECONDS);
// Record endpoint-specific metrics
String counterName = "app.requests." + endpoint.replace("/", ".");
requestCounters.computeIfAbsent(counterName,
k -> meterRegistry.counter(k).count()).increment();
if (!success) {
errorCount.incrementAndGet();
meterRegistry.counter("app.request.errors", "endpoint", endpoint).increment();
}
// Update request rate (simplified)
meterRegistry.counter("app.request.rate").increment();
}
public void updateMessageQueueSize(long size) {
messageQueueSize.set(size);
}
public int getActiveUserCount() {
return activeUserCount.get();
}
public long getMessageQueueSize() {
return messageQueueSize.get();
}
public double getCurrentRequestRate() {
// Simplified implementation - in production, use a proper rate calculator
return totalRequests.get() / (System.currentTimeMillis() / 1000.0);
}
public int getErrorRatePercentage() {
if (totalRequests.get() == 0) {
return 0;
}
return (int) ((errorCount.get() * 100) / totalRequests.get());
}
public Map<String, Object> getMetricsSnapshot() {
Map<String, Object> snapshot = new HashMap<>();
snapshot.put("activeUsers", activeUserCount.get());
snapshot.put("messageQueueSize", messageQueueSize.get());
snapshot.put("totalRequests", totalRequests.get());
snapshot.put("errorCount", errorCount.get());
snapshot.put("errorRate", getErrorRatePercentage());
snapshot.put("requestRate", getCurrentRequestRate());
return snapshot;
}
}
7. Kubernetes Configuration
Autoscaling Configuration YAML
# k8s/autoscaling-config.yaml apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: java-app-hpa namespace: default labels: app: java-app managed-by: autoscaler spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: java-app minReplicas: 2 maxReplicas: 20 metrics: - type: Resource resource: name: cpu target: type: Utilization averageUtilization: 75 - type: Resource resource: name: memory target: type: Utilization averageUtilization: 85 - type: Pods pods: metric: name: app_active_users target: type: AverageValue averageValue: 100 - type: Object object: metric: name: app_message_queue_size describedObject: apiVersion: v1 kind: Service name: java-app target: type: Value value: 1000 behavior: scaleUp: stabilizationWindowSeconds: 60 policies: - type: Pods value: 4 periodSeconds: 60 - type: Percent value: 100 periodSeconds: 60 selectPolicy: Max scaleDown: stabilizationWindowSeconds: 300 policies: - type: Pods value: 2 periodSeconds: 60 - type: Percent value: 50 periodSeconds: 60 selectPolicy: Min --- apiVersion: autoscaling.k8s.io/v1 kind: VerticalPodAutoscaler metadata: name: java-app-vpa namespace: default spec: targetRef: apiVersion: apps/v1 kind: Deployment name: java-app updatePolicy: updateMode: "Auto" resourcePolicy: containerPolicies: - containerName: "*" minAllowed: cpu: 100m memory: 128Mi maxAllowed: cpu: 2 memory: 4Gi controlledResources: ["cpu", "memory"]
This comprehensive Cluster Autoscaler integration provides:
- Horizontal Pod Autoscaling with custom metrics
- Vertical Pod Autoscaling recommendations
- Node awareness and cluster resource monitoring
- Custom metrics collection and export
- Intelligent scaling decisions based on multiple factors
- Kubernetes-native configuration and operation
- Spring Boot integration for easy deployment
The system automatically scales your Java applications based on both resource utilization and custom business metrics, ensuring optimal performance and resource utilization.