A comprehensive fault injection framework for testing microservices resilience, supporting various fault types, dynamic configuration, and integration with popular frameworks.
Complete Implementation
1. Core Fault Injection Framework
package com.faultinjection.core;
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.*;
import java.time.Duration;
import java.time.Instant;
/**
* Main fault injection engine
*/
public class FaultInjectionEngine {
private static final FaultInjectionEngine INSTANCE = new FaultInjectionEngine();
private final Map<String, FaultRule> activeRules;
private final ScheduledExecutorService scheduler;
private final FaultRegistry faultRegistry;
private final AtomicBoolean enabled;
private final FaultMetrics metrics;
private FaultInjectionEngine() {
this.activeRules = new ConcurrentHashMap<>();
this.scheduler = Executors.newScheduledThreadPool(2);
this.faultRegistry = new FaultRegistry();
this.enabled = new AtomicBoolean(true);
this.metrics = new FaultMetrics();
initializeDefaultFaults();
startRuleMonitor();
}
public static FaultInjectionEngine getInstance() {
return INSTANCE;
}
/**
* Register a fault rule
*/
public void registerRule(FaultRule rule) {
activeRules.put(rule.getId(), rule);
System.info("Registered fault rule: " + rule.getId());
}
/**
* Remove a fault rule
*/
public void removeRule(String ruleId) {
FaultRule removed = activeRules.remove(ruleId);
if (removed != null) {
System.info("Removed fault rule: " + ruleId);
}
}
/**
* Check if fault should be injected for given context
*/
public boolean shouldInjectFault(String faultType, FaultContext context) {
if (!enabled.get()) {
return false;
}
for (FaultRule rule : activeRules.values()) {
if (rule.matches(faultType, context) && rule.isEnabled()) {
return true;
}
}
return false;
}
/**
* Execute with potential fault injection
*/
public <T> T executeWithFault(String operation, Callable<T> operationLogic) {
FaultContext context = FaultContext.create(operation);
try {
if (shouldInjectFault(operation, context)) {
return injectFault(operation, context, operationLogic);
} else {
return operationLogic.call();
}
} catch (Exception e) {
metrics.recordError(operation);
throw new RuntimeException("Operation failed: " + operation, e);
}
}
/**
* Execute with potential fault injection (void version)
*/
public void executeWithFault(String operation, Runnable operationLogic) {
FaultContext context = FaultContext.create(operation);
try {
if (shouldInjectFault(operation, context)) {
injectFault(operation, context, () -> {
operationLogic.run();
return null;
});
} else {
operationLogic.run();
}
} catch (Exception e) {
metrics.recordError(operation);
throw new RuntimeException("Operation failed: " + operation, e);
}
}
@SuppressWarnings("unchecked")
private <T> T injectFault(String operation, FaultContext context, Callable<T> operationLogic) {
metrics.recordInjection(operation);
for (FaultRule rule : activeRules.values()) {
if (rule.matches(operation, context) && rule.isEnabled()) {
try {
Fault fault = faultRegistry.getFault(rule.getFaultType());
if (fault != null) {
System.warn("Injecting fault: " + rule.getFaultType() + " for operation: " + operation);
return (T) fault.apply(operationLogic, context, rule.getConfiguration());
}
} catch (Exception e) {
System.err.println("Error injecting fault: " + e.getMessage());
metrics.recordFaultError(operation);
}
}
}
// If no fault applied, execute normally
try {
return operationLogic.call();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Enable/disable fault injection globally
*/
public void setEnabled(boolean enabled) {
this.enabled.set(enabled);
System.info("Fault injection " + (enabled ? "enabled" : "disabled"));
}
public boolean isEnabled() {
return enabled.get();
}
/**
* Get metrics
*/
public FaultMetrics getMetrics() {
return metrics;
}
/**
* Get all active rules
*/
public Map<String, FaultRule> getActiveRules() {
return new HashMap<>(activeRules);
}
private void initializeDefaultFaults() {
faultRegistry.registerFault("latency", new LatencyFault());
faultRegistry.registerFault("exception", new ExceptionFault());
faultRegistry.registerFault("null_response", new NullResponseFault());
faultRegistry.registerFault("partial_response", new PartialResponseFault());
faultRegistry.registerFault("cpu_spike", new CpuSpikeFault());
faultRegistry.registerFault("memory_leak", new MemoryLeakFault());
faultRegistry.registerFault("network_partition", new NetworkPartitionFault());
}
private void startRuleMonitor() {
scheduler.scheduleAtFixedRate(() -> {
try {
cleanupExpiredRules();
updateRuleProbabilities();
} catch (Exception e) {
System.err.println("Error in rule monitor: " + e.getMessage());
}
}, 1, 1, TimeUnit.MINUTES);
}
private void cleanupExpiredRules() {
Iterator<Map.Entry<String, FaultRule>> it = activeRules.entrySet().iterator();
while (it.hasNext()) {
Map.Entry<String, FaultRule> entry = it.next();
if (entry.getValue().isExpired()) {
it.remove();
System.info("Removed expired fault rule: " + entry.getKey());
}
}
}
private void updateRuleProbabilities() {
// Could implement dynamic probability adjustment based on metrics
}
}
2. Fault Rules and Configuration
/**
* Fault rule definition
*/
public class FaultRule {
private final String id;
private final String faultType;
private final String targetOperation;
private final double probability;
private final Map<String, Object> configuration;
private final FaultCondition condition;
private final Instant createdTime;
private final Duration ttl;
private final AtomicBoolean enabled;
public FaultRule(String id, String faultType, String targetOperation,
double probability, Map<String, Object> configuration,
FaultCondition condition, Duration ttl) {
this.id = id;
this.faultType = faultType;
this.targetOperation = targetOperation;
this.probability = probability;
this.configuration = configuration != null ? new HashMap<>(configuration) : new HashMap<>();
this.condition = condition != null ? condition : FaultCondition.ALWAYS;
this.createdTime = Instant.now();
this.ttl = ttl;
this.enabled = new AtomicBoolean(true);
}
public boolean matches(String operation, FaultContext context) {
if (!targetOperation.equals(operation) && !targetOperation.equals("*")) {
return false;
}
if (Math.random() > probability) {
return false;
}
return condition.test(context);
}
public boolean isExpired() {
if (ttl == null) {
return false;
}
return Instant.now().isAfter(createdTime.plus(ttl));
}
// Getters
public String getId() { return id; }
public String getFaultType() { return faultType; }
public String getTargetOperation() { return targetOperation; }
public double getProbability() { return probability; }
public Map<String, Object> getConfiguration() { return new HashMap<>(configuration); }
public FaultCondition getCondition() { return condition; }
public Instant getCreatedTime() { return createdTime; }
public Duration getTtl() { return ttl; }
public boolean isEnabled() { return enabled.get(); }
public void setEnabled(boolean enabled) {
this.enabled.set(enabled);
}
/**
* Builder for FaultRule
*/
public static class Builder {
private String id;
private String faultType;
private String targetOperation = "*";
private double probability = 1.0;
private Map<String, Object> configuration = new HashMap<>();
private FaultCondition condition = FaultCondition.ALWAYS;
private Duration ttl;
public Builder id(String id) {
this.id = id;
return this;
}
public Builder faultType(String faultType) {
this.faultType = faultType;
return this;
}
public Builder targetOperation(String targetOperation) {
this.targetOperation = targetOperation;
return this;
}
public Builder probability(double probability) {
this.probability = probability;
return this;
}
public Builder configuration(Map<String, Object> configuration) {
this.configuration = configuration;
return this;
}
public Builder config(String key, Object value) {
this.configuration.put(key, value);
return this;
}
public Builder condition(FaultCondition condition) {
this.condition = condition;
return this;
}
public Builder ttl(Duration ttl) {
this.ttl = ttl;
return this;
}
public FaultRule build() {
if (id == null) {
id = UUID.randomUUID().toString();
}
return new FaultRule(id, faultType, targetOperation, probability, configuration, condition, ttl);
}
}
}
/**
* Fault condition interface
*/
@FunctionalInterface
public interface FaultCondition {
FaultCondition ALWAYS = context -> true;
FaultCondition NEVER = context -> false;
boolean test(FaultContext context);
default FaultCondition and(FaultCondition other) {
return context -> this.test(context) && other.test(context);
}
default FaultCondition or(FaultCondition other) {
return context -> this.test(context) || other.test(context);
}
default FaultCondition not() {
return context -> !this.test(context);
}
}
/**
* Common fault conditions
*/
public class FaultConditions {
private FaultConditions() {}
public static FaultCondition random(double probability) {
return context -> Math.random() < probability;
}
public static FaultCondition timeWindow(LocalTime start, LocalTime end) {
return context -> {
LocalTime now = LocalTime.now();
return !now.isBefore(start) && !now.isAfter(end);
};
}
public static FaultCondition dayOfWeek(DayOfWeek... days) {
Set<DayOfWeek> daySet = Set.of(days);
return context -> daySet.contains(LocalDate.now().getDayOfWeek());
}
public static FaultCondition headerEquals(String header, String value) {
return context -> value.equals(context.getHeader(header));
}
public static FaultCondition parameterPresent(String param) {
return context -> context.getParameter(param) != null;
}
public static FaultCondition userInRole(String role) {
return context -> context.getUserRoles().contains(role);
}
public static FaultCondition requestCount(int threshold) {
return new RequestCountCondition(threshold);
}
public static FaultCondition errorRate(double threshold) {
return new ErrorRateCondition(threshold);
}
}
/**
* Fault context carrying request information
*/
public class FaultContext {
private final String operation;
private final Map<String, String> headers;
private final Map<String, String> parameters;
private final Map<String, Object> attributes;
private final String userId;
private final Set<String> userRoles;
private final Instant timestamp;
private FaultContext(String operation, Map<String, String> headers,
Map<String, String> parameters, Map<String, Object> attributes,
String userId, Set<String> userRoles) {
this.operation = operation;
this.headers = headers != null ? new HashMap<>(headers) : new HashMap<>();
this.parameters = parameters != null ? new HashMap<>(parameters) : new HashMap<>();
this.attributes = attributes != null ? new HashMap<>(attributes) : new HashMap<>();
this.userId = userId;
this.userRoles = userRoles != null ? new HashSet<>(userRoles) : new HashSet<>();
this.timestamp = Instant.now();
}
public static FaultContext create(String operation) {
return new Builder(operation).build();
}
// Getters
public String getOperation() { return operation; }
public Map<String, String> getHeaders() { return new HashMap<>(headers); }
public String getHeader(String name) { return headers.get(name); }
public Map<String, String> getParameters() { return new HashMap<>(parameters); }
public String getParameter(String name) { return parameters.get(name); }
public Map<String, Object> getAttributes() { return new HashMap<>(attributes); }
public Object getAttribute(String name) { return attributes.get(name); }
public String getUserId() { return userId; }
public Set<String> getUserRoles() { return new HashSet<>(userRoles); }
public Instant getTimestamp() { return timestamp; }
/**
* Builder for FaultContext
*/
public static class Builder {
private final String operation;
private Map<String, String> headers;
private Map<String, String> parameters;
private Map<String, Object> attributes;
private String userId;
private Set<String> userRoles;
public Builder(String operation) {
this.operation = operation;
}
public Builder headers(Map<String, String> headers) {
this.headers = headers;
return this;
}
public Builder header(String name, String value) {
if (this.headers == null) {
this.headers = new HashMap<>();
}
this.headers.put(name, value);
return this;
}
public Builder parameters(Map<String, String> parameters) {
this.parameters = parameters;
return this;
}
public Builder parameter(String name, String value) {
if (this.parameters == null) {
this.parameters = new HashMap<>();
}
this.parameters.put(name, value);
return this;
}
public Builder attributes(Map<String, Object> attributes) {
this.attributes = attributes;
return this;
}
public Builder attribute(String name, Object value) {
if (this.attributes == null) {
this.attributes = new HashMap<>();
}
this.attributes.put(name, value);
return this;
}
public Builder userId(String userId) {
this.userId = userId;
return this;
}
public Builder userRoles(Set<String> userRoles) {
this.userRoles = userRoles;
return this;
}
public Builder userRole(String role) {
if (this.userRoles == null) {
this.userRoles = new HashSet<>();
}
this.userRoles.add(role);
return this;
}
public FaultContext build() {
return new FaultContext(operation, headers, parameters, attributes, userId, userRoles);
}
}
}
3. Fault Implementations
/**
* Fault interface
*/
public interface Fault {
Object apply(Callable<?> operation, FaultContext context, Map<String, Object> config) throws Exception;
}
/**
* Latency fault - introduces delay
*/
public class LatencyFault implements Fault {
@Override
public Object apply(Callable<?> operation, FaultContext context, Map<String, Object> config) throws Exception {
long delayMs = getDelay(config);
try {
Thread.sleep(delayMs);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException("Latency fault interrupted", e);
}
return operation.call();
}
private long getDelay(Map<String, Object> config) {
Object delayObj = config.get("delay_ms");
if (delayObj instanceof Number) {
return ((Number) delayObj).longValue();
}
// Random delay between min and max
long min = getLongConfig(config, "min_delay_ms", 100);
long max = getLongConfig(config, "max_delay_ms", 5000);
return ThreadLocalRandom.current().nextLong(min, max + 1);
}
private long getLongConfig(Map<String, Object> config, String key, long defaultValue) {
Object value = config.get(key);
if (value instanceof Number) {
return ((Number) value).longValue();
}
return defaultValue;
}
}
/**
* Exception fault - throws exceptions
*/
public class ExceptionFault implements Fault {
@Override
public Object apply(Callable<?> operation, FaultContext context, Map<String, Object> config) throws Exception {
String exceptionType = getStringConfig(config, "exception_type", "RuntimeException");
String message = getStringConfig(config, "message", "Injected fault exception");
throw createException(exceptionType, message);
}
private Exception createException(String type, String message) {
switch (type.toLowerCase()) {
case "timeout":
return new TimeoutException(message);
case "io":
return new IOException(message);
case "illegalargument":
return new IllegalArgumentException(message);
case "illegalstate":
return new IllegalStateException(message);
case "nullpointer":
return new NullPointerException(message);
case "custom":
return new CustomFaultException(message);
default:
return new RuntimeException(message);
}
}
private String getStringConfig(Map<String, Object> config, String key, String defaultValue) {
Object value = config.get(key);
return value != null ? value.toString() : defaultValue;
}
}
/**
* Null response fault - returns null
*/
public class NullResponseFault implements Fault {
@Override
public Object apply(Callable<?> operation, FaultContext context, Map<String, Object> config) throws Exception {
// Don't call the operation, just return null
return null;
}
}
/**
* Partial response fault - returns corrupted data
*/
public class PartialResponseFault implements Fault {
@Override
public Object apply(Callable<?> operation, FaultContext context, Map<String, Object> config) throws Exception {
Object result = operation.call();
return corruptData(result, config);
}
private Object corruptData(Object data, Map<String, Object> config) {
if (data == null) {
return null;
}
if (data instanceof String) {
return corruptString((String) data, config);
} else if (data instanceof Map) {
return corruptMap((Map<?, ?>) data, config);
} else if (data instanceof List) {
return corruptList((List<?>) data, config);
} else if (data instanceof byte[]) {
return corruptBytes((byte[]) data, config);
}
return data; // Can't corrupt, return as-is
}
private String corruptString(String str, Map<String, Object> config) {
double corruptionRate = getDoubleConfig(config, "corruption_rate", 0.3);
if (Math.random() > corruptionRate || str.length() == 0) {
return str;
}
StringBuilder corrupted = new StringBuilder(str);
int corruptPosition = ThreadLocalRandom.current().nextInt(str.length());
corrupted.setCharAt(corruptPosition, '?');
return corrupted.toString();
}
private Map<?, ?> corruptMap(Map<?, ?> map, Map<String, Object> config) {
double removeRate = getDoubleConfig(config, "remove_rate", 0.2);
Map<Object, Object> corrupted = new HashMap<>(map);
// Randomly remove some entries
corrupted.entrySet().removeIf(entry -> Math.random() < removeRate);
return corrupted;
}
private List<?> corruptList(List<?> list, Map<String, Object> config) {
double removeRate = getDoubleConfig(config, "remove_rate", 0.2);
List<Object> corrupted = new ArrayList<>(list);
// Randomly remove some elements
corrupted.removeIf(item -> Math.random() < removeRate);
return corrupted;
}
private byte[] corruptBytes(byte[] bytes, Map<String, Object> config) {
double corruptionRate = getDoubleConfig(config, "corruption_rate", 0.1);
byte[] corrupted = Arrays.copyOf(bytes, bytes.length);
for (int i = 0; i < corrupted.length; i++) {
if (Math.random() < corruptionRate) {
corrupted[i] = (byte) (corrupted[i] ^ 0xFF); // Flip bits
}
}
return corrupted;
}
private double getDoubleConfig(Map<String, Object> config, String key, double defaultValue) {
Object value = config.get(key);
if (value instanceof Number) {
return ((Number) value).doubleValue();
}
return defaultValue;
}
}
/**
* CPU spike fault - consumes CPU cycles
*/
public class CpuSpikeFault implements Fault {
@Override
public Object apply(Callable<?> operation, FaultContext context, Map<String, Object> config) throws Exception {
int durationMs = getIntConfig(config, "duration_ms", 5000);
int threadCount = getIntConfig(config, "threads", 2);
ExecutorService executor = Executors.newFixedThreadPool(threadCount);
List<Future<?>> futures = new ArrayList<>();
long endTime = System.currentTimeMillis() + durationMs;
// Start CPU-intensive tasks
for (int i = 0; i < threadCount; i++) {
futures.add(executor.submit(() -> {
while (System.currentTimeMillis() < endTime && !Thread.currentThread().isInterrupted()) {
// Burn CPU cycles
Math.pow(Math.random(), Math.random());
}
}));
}
try {
// Wait for CPU spike to complete
for (Future<?> future : futures) {
try {
future.get(durationMs + 1000, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
future.cancel(true);
}
}
} finally {
executor.shutdownNow();
}
return operation.call();
}
private int getIntConfig(Map<String, Object> config, String key, int defaultValue) {
Object value = config.get(key);
if (value instanceof Number) {
return ((Number) value).intValue();
}
return defaultValue;
}
}
/**
* Memory leak fault - consumes memory
*/
public class MemoryLeakFault implements Fault {
private static final List<byte[]> MEMORY_LEAK = new ArrayList<>();
@Override
public Object apply(Callable<?> operation, FaultContext context, Map<String, Object> config) throws Exception {
int leakSizeMb = getIntConfig(config, "leak_size_mb", 100);
boolean permanent = getBooleanConfig(config, "permanent", false);
// Allocate memory
byte[] memoryChunk = new byte[leakSizeMb * 1024 * 1024];
if (permanent) {
// Keep reference to prevent GC
synchronized (MEMORY_LEAK) {
MEMORY_LEAK.add(memoryChunk);
}
}
// Fill with data to ensure memory is actually allocated
ThreadLocalRandom.current().nextBytes(memoryChunk);
try {
return operation.call();
} finally {
if (!permanent) {
// Allow GC to clean up
memoryChunk = null;
}
}
}
private boolean getBooleanConfig(Map<String, Object> config, String key, boolean defaultValue) {
Object value = config.get(key);
if (value instanceof Boolean) {
return (Boolean) value;
}
return defaultValue;
}
}
/**
* Network partition fault - simulates network issues
*/
public class NetworkPartitionFault implements Fault {
@Override
public Object apply(Callable<?> operation, FaultContext context, Map<String, Object> config) throws Exception {
String failureType = getStringConfig(config, "failure_type", "timeout");
switch (failureType.toLowerCase()) {
case "timeout":
return simulateTimeout(operation, config);
case "connection_reset":
return simulateConnectionReset(operation, config);
case "dns_failure":
return simulateDnsFailure(operation, config);
default:
return operation.call();
}
}
private Object simulateTimeout(Callable<?> operation, Map<String, Object> config) throws Exception {
long timeoutMs = getLongConfig(config, "timeout_ms", 30000);
ExecutorService executor = Executors.newSingleThreadExecutor();
Future<Object> future = executor.submit(operation);
try {
return future.get(timeoutMs, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
future.cancel(true);
throw new TimeoutException("Network timeout simulated by fault injection");
} finally {
executor.shutdown();
}
}
private Object simulateConnectionReset(Callable<?> operation, Map<String, Object> config) throws Exception {
throw new IOException("Connection reset by peer - simulated network fault");
}
private Object simulateDnsFailure(Callable<?> operation, Map<String, Object> config) throws Exception {
throw new UnknownHostException("DNS resolution failed - simulated network fault");
}
private String getStringConfig(Map<String, Object> config, String key, String defaultValue) {
Object value = config.get(key);
return value != null ? value.toString() : defaultValue;
}
private long getLongConfig(Map<String, Object> config, String key, long defaultValue) {
Object value = config.get(key);
if (value instanceof Number) {
return ((Number) value).longValue();
}
return defaultValue;
}
}
/**
* Custom fault exception
*/
public class CustomFaultException extends RuntimeException {
public CustomFaultException(String message) {
super(message);
}
public CustomFaultException(String message, Throwable cause) {
super(message, cause);
}
}
4. Fault Registry and Metrics
/**
* Fault registry
*/
public class FaultRegistry {
private final Map<String, Fault> faults;
public FaultRegistry() {
this.faults = new ConcurrentHashMap<>();
}
public void registerFault(String type, Fault fault) {
faults.put(type, fault);
}
public void unregisterFault(String type) {
faults.remove(type);
}
public Fault getFault(String type) {
return faults.get(type);
}
public Set<String> getRegisteredFaults() {
return new HashSet<>(faults.keySet());
}
}
/**
* Fault metrics collector
*/
public class FaultMetrics {
private final ConcurrentMap<String, AtomicLong> injectionCounts;
private final ConcurrentMap<String, AtomicLong> errorCounts;
private final ConcurrentMap<String, AtomicLong> faultErrorCounts;
private final AtomicLong totalInjections;
private final AtomicLong totalErrors;
public FaultMetrics() {
this.injectionCounts = new ConcurrentHashMap<>();
this.errorCounts = new ConcurrentHashMap<>();
this.faultErrorCounts = new ConcurrentHashMap<>();
this.totalInjections = new AtomicLong(0);
this.totalErrors = new AtomicLong(0);
}
public void recordInjection(String operation) {
injectionCounts.computeIfAbsent(operation, k -> new AtomicLong()).incrementAndGet();
totalInjections.incrementAndGet();
}
public void recordError(String operation) {
errorCounts.computeIfAbsent(operation, k -> new AtomicLong()).incrementAndGet();
totalErrors.incrementAndGet();
}
public void recordFaultError(String operation) {
faultErrorCounts.computeIfAbsent(operation, k -> new AtomicLong()).incrementAndGet();
}
public long getInjectionCount(String operation) {
AtomicLong count = injectionCounts.get(operation);
return count != null ? count.get() : 0;
}
public long getErrorCount(String operation) {
AtomicLong count = errorCounts.get(operation);
return count != null ? count.get() : 0;
}
public long getFaultErrorCount(String operation) {
AtomicLong count = faultErrorCounts.get(operation);
return count != null ? count.get() : 0;
}
public long getTotalInjections() {
return totalInjections.get();
}
public long getTotalErrors() {
return totalErrors.get();
}
public double getErrorRate(String operation) {
long injections = getInjectionCount(operation);
long errors = getErrorCount(operation);
return injections > 0 ? (double) errors / injections : 0.0;
}
public Map<String, Long> getInjectionCounts() {
Map<String, Long> counts = new HashMap<>();
injectionCounts.forEach((op, count) -> counts.put(op, count.get()));
return counts;
}
public Map<String, Long> getErrorCounts() {
Map<String, Long> counts = new HashMap<>();
errorCounts.forEach((op, count) -> counts.put(op, count.get()));
return counts;
}
public void reset() {
injectionCounts.clear();
errorCounts.clear();
faultErrorCounts.clear();
totalInjections.set(0);
totalErrors.set(0);
}
}
5. Spring Boot Integration
/**
* Spring Boot Auto-Configuration
*/
@Configuration
@EnableConfigurationProperties(FaultInjectionProperties.class)
public class FaultInjectionAutoConfiguration {
@Bean
@ConditionalOnMissingBean
public FaultInjectionEngine faultInjectionEngine() {
return FaultInjectionEngine.getInstance();
}
@Bean
@ConditionalOnMissingBean
public FaultInjectionController faultInjectionController() {
return new FaultInjectionController();
}
@Bean
public FaultInjectionAspect faultInjectionAspect() {
return new FaultInjectionAspect();
}
}
/**
* Configuration properties
*/
@ConfigurationProperties(prefix = "fault.injection")
public class FaultInjectionProperties {
private boolean enabled = true;
private Map<String, RuleProperties> rules = new HashMap<>();
private MetricsProperties metrics = new MetricsProperties();
public static class RuleProperties {
private String faultType;
private String targetOperation = "*";
private double probability = 1.0;
private Map<String, Object> config = new HashMap<>();
private Duration ttl;
private boolean enabled = true;
// Getters and setters
public String getFaultType() { return faultType; }
public void setFaultType(String faultType) { this.faultType = faultType; }
public String getTargetOperation() { return targetOperation; }
public void setTargetOperation(String targetOperation) { this.targetOperation = targetOperation; }
public double getProbability() { return probability; }
public void setProbability(double probability) { this.probability = probability; }
public Map<String, Object> getConfig() { return config; }
public void setConfig(Map<String, Object> config) { this.config = config; }
public Duration getTtl() { return ttl; }
public void setTtl(Duration ttl) { this.ttl = ttl; }
public boolean isEnabled() { return enabled; }
public void setEnabled(boolean enabled) { this.enabled = enabled; }
}
public static class MetricsProperties {
private boolean enabled = true;
private Duration exportInterval = Duration.ofMinutes(1);
public boolean isEnabled() { return enabled; }
public void setEnabled(boolean enabled) { this.enabled = enabled; }
public Duration getExportInterval() { return exportInterval; }
public void setExportInterval(Duration exportInterval) { this.exportInterval = exportInterval; }
}
// Getters and setters
public boolean isEnabled() { return enabled; }
public void setEnabled(boolean enabled) { this.enabled = enabled; }
public Map<String, RuleProperties> getRules() { return rules; }
public void setRules(Map<String, RuleProperties> rules) { this.rules = rules; }
public MetricsProperties getMetrics() { return metrics; }
public void setMetrics(MetricsProperties metrics) { this.metrics = metrics; }
}
/**
* Spring AOP aspect for automatic fault injection
*/
@Aspect
@Component
public class FaultInjectionAspect {
private final FaultInjectionEngine faultEngine;
public FaultInjectionAspect() {
this.faultEngine = FaultInjectionEngine.getInstance();
}
@Around("@annotation(injectFault)")
public Object injectFault(ProceedingJoinPoint joinPoint, InjectFault injectFault) throws Throwable {
String operation = !injectFault.value().isEmpty() ?
injectFault.value() :
joinPoint.getSignature().toShortString();
return faultEngine.executeWithFault(operation, (Callable<Object>) joinPoint::proceed);
}
@Around("@within(injectFaultClass) || @annotation(injectFaultClass)")
public Object injectFaultClass(ProceedingJoinPoint joinPoint, InjectFault injectFaultClass) throws Throwable {
String operation = joinPoint.getSignature().toShortString();
return faultEngine.executeWithFault(operation, (Callable<Object>) joinPoint::proceed);
}
}
/**
* Annotation for method-level fault injection
*/
@Target({ElementType.METHOD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
public @interface InjectFault {
String value() default "";
String faultType() default "";
double probability() default 1.0;
}
/**
* REST Controller for managing fault injection
*/
@RestController
@RequestMapping("/faults")
public class FaultInjectionController {
private final FaultInjectionEngine faultEngine;
public FaultInjectionController() {
this.faultEngine = FaultInjectionEngine.getInstance();
}
@PostMapping("/rules")
public ResponseEntity<String> addRule(@RequestBody FaultRule rule) {
faultEngine.registerRule(rule);
return ResponseEntity.ok("Rule added: " + rule.getId());
}
@DeleteMapping("/rules/{ruleId}")
public ResponseEntity<String> removeRule(@PathVariable String ruleId) {
faultEngine.removeRule(ruleId);
return ResponseEntity.ok("Rule removed: " + ruleId);
}
@GetMapping("/rules")
public ResponseEntity<Map<String, FaultRule>> getRules() {
return ResponseEntity.ok(faultEngine.getActiveRules());
}
@PostMapping("/enable")
public ResponseEntity<String> enableFaults(@RequestParam boolean enabled) {
faultEngine.setEnabled(enabled);
return ResponseEntity.ok("Fault injection " + (enabled ? "enabled" : "disabled"));
}
@GetMapping("/metrics")
public ResponseEntity<FaultMetrics> getMetrics() {
return ResponseEntity.ok(faultEngine.getMetrics());
}
@PostMapping("/metrics/reset")
public ResponseEntity<String> resetMetrics() {
faultEngine.getMetrics().reset();
return ResponseEntity.ok("Metrics reset");
}
}
6. Advanced Condition Implementations
/**
* Request count based condition
*/
public class RequestCountCondition implements FaultCondition {
private final int threshold;
private final AtomicInteger requestCount;
private final AtomicLong lastReset;
private final Duration window;
public RequestCountCondition(int threshold) {
this(threshold, Duration.ofMinutes(1));
}
public RequestCountCondition(int threshold, Duration window) {
this.threshold = threshold;
this.requestCount = new AtomicInteger(0);
this.lastReset = new AtomicLong(System.currentTimeMillis());
this.window = window;
}
@Override
public boolean test(FaultContext context) {
resetIfNeeded();
return requestCount.incrementAndGet() >= threshold;
}
private void resetIfNeeded() {
long now = System.currentTimeMillis();
long last = lastReset.get();
if (now - last > window.toMillis()) {
if (lastReset.compareAndSet(last, now)) {
requestCount.set(0);
}
}
}
}
/**
* Error rate based condition
*/
public class ErrorRateCondition implements FaultCondition {
private final double threshold;
private final AtomicInteger totalRequests;
private final AtomicInteger errorRequests;
private final AtomicLong lastReset;
private final Duration window;
public ErrorRateCondition(double threshold) {
this(threshold, Duration.ofMinutes(5));
}
public ErrorRateCondition(double threshold, Duration window) {
this.threshold = threshold;
this.totalRequests = new AtomicInteger(0);
this.errorRequests = new AtomicInteger(0);
this.lastReset = new AtomicLong(System.currentTimeMillis());
this.window = window;
}
@Override
public boolean test(FaultContext context) {
resetIfNeeded();
totalRequests.incrementAndGet();
// This would need integration with actual error tracking
// For now, we'll use a simple random approach
if (Math.random() < 0.1) { // Simulate 10% error rate
errorRequests.incrementAndGet();
}
double currentErrorRate = (double) errorRequests.get() / totalRequests.get();
return currentErrorRate >= threshold;
}
private void resetIfNeeded() {
long now = System.currentTimeMillis();
long last = lastReset.get();
if (now - last > window.toMillis()) {
if (lastReset.compareAndSet(last, now)) {
totalRequests.set(0);
errorRequests.set(0);
}
}
}
}
7. Usage Examples
/**
* Demo service showing fault injection usage
*/
@Service
public class OrderService {
private final FaultInjectionEngine faultEngine = FaultInjectionEngine.getInstance();
@InjectFault("order.process")
public Order processOrder(OrderRequest request) {
// This method is automatically wrapped by AOP
return doProcessOrder(request);
}
public Order processOrderManual(OrderRequest request) {
return faultEngine.executeWithFault("order.process", () -> {
// Business logic here
validateOrder(request);
processPayment(request);
updateInventory(request);
return createOrder(request);
});
}
public Order processOrderWithContext(OrderRequest request) {
FaultContext context = FaultContext.create("order.process")
.header("user-agent", request.getUserAgent())
.parameter("priority", request.getPriority())
.userId(request.getUserId())
.userRole("customer")
.build();
return faultEngine.executeWithFault("order.process", () -> {
// Business logic
return createOrder(request);
});
}
private void validateOrder(OrderRequest request) {
// Validation logic
}
private void processPayment(OrderRequest request) {
faultEngine.executeWithFault("payment.process", () -> {
// Payment processing logic
return null;
});
}
private void updateInventory(OrderRequest request) {
faultEngine.executeWithFault("inventory.update", () -> {
// Inventory update logic
return null;
});
}
private Order createOrder(OrderRequest request) {
return new Order(UUID.randomUUID().toString(), request.getItems());
}
}
/**
* Demo application
*/
@SpringBootApplication
@EnableFaultInjection
public class FaultInjectionDemo {
public static void main(String[] args) {
SpringApplication.run(FaultInjectionDemo.class, args);
demonstrateFaultInjection();
}
private static void demonstrateFaultInjection() {
FaultInjectionEngine engine = FaultInjectionEngine.getInstance();
// Add some fault rules
FaultRule latencyRule = new FaultRule.Builder()
.id("api-latency")
.faultType("latency")
.targetOperation("order.process")
.probability(0.3)
.config("min_delay_ms", 1000)
.config("max_delay_ms", 5000)
.condition(FaultConditions.random(0.5))
.ttl(Duration.ofHours(1))
.build();
FaultRule exceptionRule = new FaultRule.Builder()
.id("api-exception")
.faultType("exception")
.targetOperation("payment.process")
.probability(0.2)
.config("exception_type", "IOException")
.config("message", "Payment service unavailable")
.condition(FaultConditions.timeWindow(
LocalTime.of(14, 0), LocalTime.of(16, 0)))
.build();
engine.registerRule(latencyRule);
engine.registerRule(exceptionRule);
// Simulate service calls
OrderService service = new OrderService();
OrderRequest request = new OrderRequest("user123", List.of("item1", "item2"));
for (int i = 0; i < 10; i++) {
try {
Order order = service.processOrderManual(request);
System.out.println("Order processed: " + order.getId());
} catch (Exception e) {
System.out.println("Order failed: " + e.getMessage());
}
}
// Print metrics
FaultMetrics metrics = engine.getMetrics();
System.out.println("Total injections: " + metrics.getTotalInjections());
System.out.println("Total errors: " + metrics.getTotalErrors());
}
}
// Example configuration in application.yml
/*
fault:
injection:
enabled: true
rules:
api-latency:
fault-type: latency
target-operation: "order.process"
probability: 0.3
config:
min_delay_ms: 1000
max_delay_ms: 5000
ttl: 1h
api-exception:
fault-type: exception
target-operation: "payment.process"
probability: 0.2
config:
exception_type: "IOException"
message: "Payment service unavailable"
*/
8. Resilience4j Integration
/**
* Integration with Resilience4j Circuit Breaker
*/
@Component
public class FaultTolerantService {
private final CircuitBreaker circuitBreaker;
private final FaultInjectionEngine faultEngine;
public FaultTolerantService() {
this.circuitBreaker = CircuitBreaker.ofDefaults("orderService");
this.faultEngine = FaultInjectionEngine.getInstance();
}
public Order processOrderResilient(OrderRequest request) {
return CircuitBreaker.decorateSupplier(circuitBreaker, () -> {
return faultEngine.executeWithFault("order.process", () -> {
// Business logic with fault injection
return processOrderInternal(request);
});
}).get();
}
private Order processOrderInternal(OrderRequest request) {
// Actual business logic
return new Order(UUID.randomUUID().toString(), request.getItems());
}
}
Key Features
- Multiple Fault Types: Latency, exceptions, null responses, partial data, CPU/memory spikes, network issues
- Dynamic Configuration: Runtime rule management via REST API
- Conditional Injection: Based on time, headers, users, error rates, etc.
- Metrics Collection: Track injection counts, error rates, and fault effectiveness
- Spring Boot Integration: Auto-configuration and AOP support
- Resilience Testing: Test circuit breakers, retries, and fallbacks
- Safety Controls: TTL, probability, and global enable/disable
Usage Examples
Basic Fault Injection
@InjectFault("database.query")
public User findUser(String userId) {
return userRepository.findById(userId);
}
Programmatic Usage
faultEngine.executeWithFault("external.api.call", () -> {
return externalService.callApi(request);
});
REST API Management
# Add fault rule
curl -X POST /faults/rules -d '{
"id": "high-latency",
"faultType": "latency",
"targetOperation": "order.process",
"probability": 0.5,
"config": {"delay_ms": 2000}
}'
# Enable/disable
curl -X POST /faults/enable?enabled=false
This comprehensive fault injection framework enables thorough resilience testing of microservices by simulating real-world failure scenarios in a controlled manner.