Introduction
Fault Injection Testing is a technique for improving software reliability by deliberately introducing faults to test system behavior under failure conditions. This approach helps identify weaknesses, validate error handling, and ensure system resilience.
Architecture Overview
Fault Injection Framework
public class FaultInjectionArchitecture {
/**
* Fault Injection Components:
* 1. Fault Registry - Manage fault definitions
* 2. Injection Engine - Apply faults at runtime
* 3. Monitoring - Track fault injections and system behavior
* 4. Recovery - Handle fault scenarios and cleanup
*/
private FaultRegistry faultRegistry;
private InjectionEngine injectionEngine;
private MonitoringService monitoringService;
private RecoveryManager recoveryManager;
public void injectFault(String faultId, Object target) {
FaultDefinition fault = faultRegistry.getFault(faultId);
monitoringService.recordInjection(faultId);
injectionEngine.inject(fault, target);
}
}
Core Framework Implementation
Fault Definition and Registry
public enum FaultType {
LATENCY,
EXCEPTION,
MEMORY_LEAK,
CPU_SPIKE,
NETWORK_FAILURE,
DATABASE_FAILURE,
NULL_POINTER,
TIMEOUT,
CUSTOM
}
public class FaultDefinition {
private final String id;
private final String name;
private final FaultType type;
private final double probability; // 0.0 to 1.0
private final Map<String, Object> parameters;
private final Duration duration;
private final Predicate<FaultContext> condition;
public FaultDefinition(String id, String name, FaultType type,
double probability, Map<String, Object> parameters,
Duration duration, Predicate<FaultContext> condition) {
this.id = id;
this.name = name;
this.type = type;
this.probability = probability;
this.parameters = parameters != null ? parameters : new HashMap<>();
this.duration = duration;
this.condition = condition != null ? condition : ctx -> true;
}
// Builder pattern for easy creation
public static class Builder {
private String id;
private String name;
private FaultType type;
private double probability = 1.0;
private Map<String, Object> parameters = new HashMap<>();
private Duration duration = Duration.ofSeconds(30);
private Predicate<FaultContext> condition;
public Builder id(String id) { this.id = id; return this; }
public Builder name(String name) { this.name = name; return this; }
public Builder type(FaultType type) { this.type = type; return this; }
public Builder probability(double probability) { this.probability = probability; return this; }
public Builder parameter(String key, Object value) { this.parameters.put(key, value); return this; }
public Builder duration(Duration duration) { this.duration = duration; return this; }
public Builder condition(Predicate<FaultContext> condition) { this.condition = condition; return this; }
public FaultDefinition build() {
return new FaultDefinition(id, name, type, probability, parameters, duration, condition);
}
}
// Getters
public String getId() { return id; }
public FaultType getType() { return type; }
public double getProbability() { return probability; }
public Map<String, Object> getParameters() { return parameters; }
}
public class FaultContext {
private final String methodName;
private final Object[] methodArguments;
private final Object target;
private final long timestamp;
private final Thread currentThread;
public FaultContext(String methodName, Object[] methodArguments, Object target) {
this.methodName = methodName;
this.methodArguments = methodArguments;
this.target = target;
this.timestamp = System.currentTimeMillis();
this.currentThread = Thread.currentThread();
}
// Getters
public String getMethodName() { return methodName; }
public Object[] getMethodArguments() { return methodArguments; }
public Object getTarget() { return target; }
}
@Component
public class FaultRegistry {
private final Map<String, FaultDefinition> faults = new ConcurrentHashMap<>();
private final Map<String, FaultState> faultStates = new ConcurrentHashMap<>();
public void registerFault(FaultDefinition fault) {
faults.put(fault.getId(), fault);
faultStates.put(fault.getId(), new FaultState(fault.getId()));
}
public void unregisterFault(String faultId) {
faults.remove(faultId);
faultStates.remove(faultId);
}
public FaultDefinition getFault(String faultId) {
return faults.get(faultId);
}
public Collection<FaultDefinition> getAllFaults() {
return faults.values();
}
public boolean shouldInject(String faultId, FaultContext context) {
FaultDefinition fault = faults.get(faultId);
if (fault == null) return false;
FaultState state = faultStates.get(faultId);
if (state.isActive()) return true;
// Check probability and conditions
return Math.random() < fault.getProbability() &&
fault.getCondition().test(context);
}
public void activateFault(String faultId) {
FaultState state = faultStates.get(faultId);
if (state != null) {
state.activate();
}
}
public void deactivateFault(String faultId) {
FaultState state = faultStates.get(faultId);
if (state != null) {
state.deactivate();
}
}
}
class FaultState {
private final String faultId;
private volatile boolean active;
private volatile long activatedAt;
private final AtomicInteger injectionCount = new AtomicInteger();
public FaultState(String faultId) {
this.faultId = faultId;
}
public void activate() {
this.active = true;
this.activatedAt = System.currentTimeMillis();
}
public void deactivate() {
this.active = false;
}
public boolean isActive() {
return active;
}
public void recordInjection() {
injectionCount.incrementAndGet();
}
public int getInjectionCount() {
return injectionCount.get();
}
}
Injection Engine
@Component
public class InjectionEngine {
private static final Logger logger = LoggerFactory.getLogger(InjectionEngine.class);
private final FaultRegistry faultRegistry;
private final MonitoringService monitoringService;
private final Map<FaultType, FaultHandler> faultHandlers;
public InjectionEngine(FaultRegistry faultRegistry, MonitoringService monitoringService) {
this.faultRegistry = faultRegistry;
this.monitoringService = monitoringService;
this.faultHandlers = initializeHandlers();
}
public <T> T inject(String faultId, FaultContext context, Supplier<T> originalOperation) {
if (!faultRegistry.shouldInject(faultId, context)) {
return originalOperation.get();
}
FaultDefinition fault = faultRegistry.getFault(faultId);
FaultHandler handler = faultHandlers.get(fault.getType());
if (handler != null) {
monitoringService.recordInjection(faultId, context);
faultRegistry.getFaultState(faultId).recordInjection();
try {
return handler.handle(fault, context, originalOperation);
} catch (Exception e) {
monitoringService.recordError(faultId, context, e);
throw e;
}
}
return originalOperation.get();
}
public void injectVoid(String faultId, FaultContext context, Runnable originalOperation) {
if (!faultRegistry.shouldInject(faultId, context)) {
originalOperation.run();
return;
}
FaultDefinition fault = faultRegistry.getFault(faultId);
FaultHandler handler = faultHandlers.get(fault.getType());
if (handler != null) {
monitoringService.recordInjection(faultId, context);
faultRegistry.getFaultState(faultId).recordInjection();
try {
handler.handleVoid(fault, context, originalOperation);
} catch (Exception e) {
monitoringService.recordError(faultId, context, e);
throw e;
}
} else {
originalOperation.run();
}
}
private Map<FaultType, FaultHandler> initializeHandlers() {
Map<FaultType, FaultHandler> handlers = new EnumMap<>(FaultType.class);
handlers.put(FaultType.LATENCY, new LatencyFaultHandler());
handlers.put(FaultType.EXCEPTION, new ExceptionFaultHandler());
handlers.put(FaultType.MEMORY_LEAK, new MemoryLeakFaultHandler());
handlers.put(FaultType.CPU_SPIKE, new CpuSpikeFaultHandler());
handlers.put(FaultType.NETWORK_FAILURE, new NetworkFailureFaultHandler());
handlers.put(FaultType.DATABASE_FAILURE, new DatabaseFailureFaultHandler());
handlers.put(FaultType.TIMEOUT, new TimeoutFaultHandler());
return handlers;
}
}
interface FaultHandler {
<T> T handle(FaultDefinition fault, FaultContext context, Supplier<T> operation);
void handleVoid(FaultDefinition fault, FaultContext context, Runnable operation);
}
@Component
class LatencyFaultHandler implements FaultHandler {
@Override
public <T> T handle(FaultDefinition fault, FaultContext context, Supplier<T> operation) {
long latencyMs = ((Number) fault.getParameters()
.getOrDefault("latencyMs", 1000L)).longValue();
try {
Thread.sleep(latencyMs);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException("Latency injection interrupted", e);
}
return operation.get();
}
@Override
public void handleVoid(FaultDefinition fault, FaultContext context, Runnable operation) {
long latencyMs = ((Number) fault.getParameters()
.getOrDefault("latencyMs", 1000L)).longValue();
try {
Thread.sleep(latencyMs);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException("Latency injection interrupted", e);
}
operation.run();
}
}
@Component
class ExceptionFaultHandler implements FaultHandler {
@Override
public <T> T handle(FaultDefinition fault, FaultContext context, Supplier<T> operation) {
String exceptionClass = (String) fault.getParameters()
.getOrDefault("exceptionClass", "java.lang.RuntimeException");
String message = (String) fault.getParameters()
.getOrDefault("message", "Injected fault");
try {
Class<?> clazz = Class.forName(exceptionClass);
Constructor<?> constructor = clazz.getConstructor(String.class);
RuntimeException exception = (RuntimeException) constructor.newInstance(message);
throw exception;
} catch (Exception e) {
throw new RuntimeException("Failed to create exception for fault injection", e);
}
}
@Override
public void handleVoid(FaultDefinition fault, FaultContext context, Runnable operation) {
handle(fault, context, () -> {
operation.run();
return null;
});
}
}
@Component
class MemoryLeakFaultHandler implements FaultHandler {
private final List<byte[]> memoryLeaks = new ArrayList<>();
@Override
public <T> T handle(FaultDefinition fault, FaultContext context, Supplier<T> operation) {
int leakSizeMb = ((Number) fault.getParameters()
.getOrDefault("leakSizeMb", 10)).intValue();
// Create memory leak
byte[] leak = new byte[leakSizeMb * 1024 * 1024];
memoryLeaks.add(leak);
return operation.get();
}
@Override
public void handleVoid(FaultDefinition fault, FaultContext context, Runnable operation) {
handle(fault, context, () -> {
operation.run();
return null;
});
}
public void cleanup() {
memoryLeaks.clear();
System.gc();
}
}
@Component
class TimeoutFaultHandler implements FaultHandler {
@Override
public <T> T handle(FaultDefinition fault, FaultContext context, Supplier<T> operation) {
long timeoutMs = ((Number) fault.getParameters()
.getOrDefault("timeoutMs", 5000L)).longValue();
ExecutorService executor = Executors.newSingleThreadExecutor();
Future<T> future = executor.submit(operation::get);
try {
return future.get(timeoutMs, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
future.cancel(true);
throw new RuntimeException("Operation timed out due to fault injection", e);
} catch (Exception e) {
throw new RuntimeException("Operation failed", e);
} finally {
executor.shutdown();
}
}
@Override
public void handleVoid(FaultDefinition fault, FaultContext context, Runnable operation) {
handle(fault, context, () -> {
operation.run();
return null;
});
}
}
Aspect-Oriented Fault Injection
Method-Level Fault Injection
@Aspect
@Component
public class FaultInjectionAspect {
private final InjectionEngine injectionEngine;
private final FaultRegistry faultRegistry;
public FaultInjectionAspect(InjectionEngine injectionEngine, FaultRegistry faultRegistry) {
this.injectionEngine = injectionEngine;
this.faultRegistry = faultRegistry;
}
@Around("@annotation(injectFault)")
public Object injectFault(ProceedingJoinPoint joinPoint, InjectFault injectFault) throws Throwable {
String faultId = injectFault.value();
FaultContext context = new FaultContext(
joinPoint.getSignature().getName(),
joinPoint.getArgs(),
joinPoint.getTarget()
);
return injectionEngine.inject(faultId, context, () -> {
try {
return joinPoint.proceed();
} catch (Throwable t) {
throw new RuntimeException(t);
}
});
}
@Around("execution(* com.example.service.*.*(..)) && @annotation(retryable)")
public Object withRetry(ProceedingJoinPoint joinPoint, Retryable retryable) throws Throwable {
int maxAttempts = retryable.maxAttempts();
long backoffMs = retryable.backoffMs();
int attempts = 0;
while (attempts < maxAttempts) {
try {
return joinPoint.proceed();
} catch (Exception e) {
attempts++;
if (attempts >= maxAttempts) {
throw e;
}
try {
Thread.sleep(backoffMs * attempts); // Exponential backoff
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw e;
}
}
}
throw new IllegalStateException("Should not reach here");
}
}
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.METHOD)
public @interface InjectFault {
String value();
double probability() default 1.0;
}
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.METHOD)
public @interface Retryable {
int maxAttempts() default 3;
long backoffMs() default 1000;
}
Service Integration Examples
Database Service with Fault Injection
@Service
public class DatabaseService {
private static final Logger logger = LoggerFactory.getLogger(DatabaseService.class);
private final JdbcTemplate jdbcTemplate;
private final InjectionEngine injectionEngine;
private final FaultRegistry faultRegistry;
public DatabaseService(JdbcTemplate jdbcTemplate, InjectionEngine injectionEngine,
FaultRegistry faultRegistry) {
this.jdbcTemplate = jdbcTemplate;
this.injectionEngine = injectionEngine;
this.faultRegistry = faultRegistry;
}
@InjectFault("database-latency")
public User findUserById(Long userId) {
FaultContext context = new FaultContext("findUserById", new Object[]{userId}, this);
return injectionEngine.inject("database-latency", context, () -> {
String sql = "SELECT id, name, email FROM users WHERE id = ?";
return jdbcTemplate.queryForObject(sql, new Object[]{userId}, (rs, rowNum) -> {
User user = new User();
user.setId(rs.getLong("id"));
user.setName(rs.getString("name"));
user.setEmail(rs.getString("email"));
return user;
});
});
}
@InjectFault("database-exception")
@Retryable(maxAttempts = 3, backoffMs = 1000)
public void saveUser(User user) {
FaultContext context = new FaultContext("saveUser", new Object[]{user}, this);
injectionEngine.injectVoid("database-exception", context, () -> {
String sql = "INSERT INTO users (name, email) VALUES (?, ?)";
jdbcTemplate.update(sql, user.getName(), user.getEmail());
});
}
public List<User> findUsersWithFaultTolerance(String name) {
FaultContext context = new FaultContext("findUsersByName", new Object[]{name}, this);
try {
return injectionEngine.inject("database-timeout", context, () -> {
String sql = "SELECT id, name, email FROM users WHERE name LIKE ?";
return jdbcTemplate.query(sql, new Object[]{"%" + name + "%"}, (rs, rowNum) -> {
User user = new User();
user.setId(rs.getLong("id"));
user.setName(rs.getString("name"));
user.setEmail(rs.getString("email"));
return user;
});
});
} catch (Exception e) {
logger.warn("Database query failed, returning empty result", e);
return Collections.emptyList();
}
}
}
External Service Client with Circuit Breaker
@Service
public class ExternalServiceClient {
private final RestTemplate restTemplate;
private final InjectionEngine injectionEngine;
private final CircuitBreaker circuitBreaker;
public ExternalServiceClient(RestTemplate restTemplate, InjectionEngine injectionEngine) {
this.restTemplate = restTemplate;
this.injectionEngine = injectionEngine;
this.circuitBreaker = CircuitBreaker.ofDefaults("external-service");
}
@InjectFault("http-latency")
public String callExternalService(String endpoint) {
FaultContext context = new FaultContext("callExternalService", new Object[]{endpoint}, this);
return circuitBreaker.executeSupplier(() ->
injectionEngine.inject("http-latency", context, () -> {
ResponseEntity<String> response = restTemplate.getForEntity(endpoint, String.class);
return response.getBody();
})
);
}
@Retryable(maxAttempts = 5, backoffMs = 2000)
public String callExternalServiceWithRetry(String endpoint) {
FaultContext context = new FaultContext("callExternalServiceWithRetry",
new Object[]{endpoint}, this);
return injectionEngine.inject("http-exception", context, () -> {
ResponseEntity<String> response = restTemplate.getForEntity(endpoint, String.class);
if (!response.getStatusCode().is2xxSuccessful()) {
throw new RuntimeException("HTTP error: " + response.getStatusCode());
}
return response.getBody();
});
}
}
Monitoring and Metrics
Comprehensive Monitoring Service
@Component
public class MonitoringService {
private final MeterRegistry meterRegistry;
private final Map<String, Counter> injectionCounters = new ConcurrentHashMap<>();
private final Map<String, Counter> errorCounters = new ConcurrentHashMap<>();
private final Map<String, Timer> executionTimers = new ConcurrentHashMap<>();
public MonitoringService(MeterRegistry meterRegistry) {
this.meterRegistry = meterRegistry;
}
public void recordInjection(String faultId, FaultContext context) {
Counter counter = injectionCounters.computeIfAbsent(faultId,
id -> Counter.builder("fault.injections")
.tag("fault_id", id)
.tag("method", context.getMethodName())
.register(meterRegistry));
counter.increment();
logger.info("Fault injected: {} in method {}", faultId, context.getMethodName());
}
public void recordError(String faultId, FaultContext context, Exception error) {
Counter counter = errorCounters.computeIfAbsent(faultId,
id -> Counter.builder("fault.errors")
.tag("fault_id", id)
.tag("method", context.getMethodName())
.tag("error_type", error.getClass().getSimpleName())
.register(meterRegistry));
counter.increment();
logger.error("Fault injection error: {} in method {}", faultId, context.getMethodName(), error);
}
public void recordExecutionTime(String operation, long durationMs) {
Timer timer = executionTimers.computeIfAbsent(operation,
op -> Timer.builder("operation.duration")
.tag("operation", op)
.register(meterRegistry));
timer.record(durationMs, TimeUnit.MILLISECONDS);
}
public Map<String, Object> getFaultStatistics() {
Map<String, Object> stats = new HashMap<>();
injectionCounters.forEach((faultId, counter) -> {
Map<String, Object> faultStats = new HashMap<>();
faultStats.put("injections", counter.count());
Counter errorCounter = errorCounters.get(faultId);
faultStats.put("errors", errorCounter != null ? errorCounter.count() : 0);
stats.put(faultId, faultStats);
});
return stats;
}
}
@RestController
@RequestMapping("/faults")
public class FaultMonitoringController {
private final FaultRegistry faultRegistry;
private final MonitoringService monitoringService;
public FaultMonitoringController(FaultRegistry faultRegistry, MonitoringService monitoringService) {
this.faultRegistry = faultRegistry;
this.monitoringService = monitoringService;
}
@GetMapping
public Map<String, Object> getFaults() {
Map<String, Object> response = new HashMap<>();
response.put("faults", faultRegistry.getAllFaults());
response.put("statistics", monitoringService.getFaultStatistics());
return response;
}
@PostMapping("/{faultId}/activate")
public ResponseEntity<Void> activateFault(@PathVariable String faultId) {
faultRegistry.activateFault(faultId);
return ResponseEntity.ok().build();
}
@PostMapping("/{faultId}/deactivate")
public ResponseEntity<Void> deactivateFault(@PathVariable String faultId) {
faultRegistry.deactivateFault(faultId);
return ResponseEntity.ok().build();
}
@PostMapping("/{faultId}/inject")
public ResponseEntity<Map<String, Object>> injectFault(@PathVariable String faultId) {
// Manual fault injection endpoint for testing
Map<String, Object> response = new HashMap<>();
response.put("faultId", faultId);
response.put("timestamp", System.currentTimeMillis());
return ResponseEntity.ok(response);
}
}
Chaos Engineering Integration
Chaos Monkey Implementation
@Component
public class ChaosMonkey {
private final FaultRegistry faultRegistry;
private final InjectionEngine injectionEngine;
private final ScheduledExecutorService scheduler;
private final Random random = new Random();
private volatile boolean enabled = false;
private final List<ScheduledFuture<?>> scheduledTasks = new ArrayList<>();
public ChaosMonkey(FaultRegistry faultRegistry, InjectionEngine injectionEngine) {
this.faultRegistry = faultRegistry;
this.injectionEngine = injectionEngine;
this.scheduler = Executors.newScheduledThreadPool(3);
}
public void enable() {
this.enabled = true;
scheduleRandomFaults();
logger.info("Chaos Monkey enabled");
}
public void disable() {
this.enabled = false;
scheduledTasks.forEach(task -> task.cancel(false));
scheduledTasks.clear();
logger.info("Chaos Monkey disabled");
}
public boolean isEnabled() {
return enabled;
}
private void scheduleRandomFaults() {
// Schedule random fault injections
ScheduledFuture<?> latencyTask = scheduler.scheduleAtFixedRate(
this::injectRandomLatency, 30, 60, TimeUnit.SECONDS);
ScheduledFuture<?> exceptionTask = scheduler.scheduleAtFixedRate(
this::injectRandomException, 45, 90, TimeUnit.SECONDS);
ScheduledFuture<?> memoryTask = scheduler.scheduleAtFixedRate(
this::injectMemoryPressure, 120, 300, TimeUnit.SECONDS);
scheduledTasks.addAll(Arrays.asList(latencyTask, exceptionTask, memoryTask));
}
private void injectRandomLatency() {
if (!enabled || random.nextDouble() > 0.3) return;
try {
Collection<FaultDefinition> latencyFaults = faultRegistry.getAllFaults().stream()
.filter(f -> f.getType() == FaultType.LATENCY)
.collect(Collectors.toList());
if (!latencyFaults.isEmpty()) {
FaultDefinition fault = latencyFaults.stream()
.skip(random.nextInt(latencyFaults.size()))
.findFirst()
.orElseThrow();
faultRegistry.activateFault(fault.getId());
logger.info("Chaos Monkey injected latency: {}", fault.getId());
// Deactivate after random time
scheduler.schedule(() ->
faultRegistry.deactivateFault(fault.getId()),
random.nextInt(30) + 10, TimeUnit.SECONDS);
}
} catch (Exception e) {
logger.error("Chaos Monkey failed to inject latency", e);
}
}
private void injectRandomException() {
if (!enabled || random.nextDouble() > 0.2) return;
try {
Collection<FaultDefinition> exceptionFaults = faultRegistry.getAllFaults().stream()
.filter(f -> f.getType() == FaultType.EXCEPTION)
.collect(Collectors.toList());
if (!exceptionFaults.isEmpty()) {
FaultDefinition fault = exceptionFaults.stream()
.skip(random.nextInt(exceptionFaults.size()))
.findFirst()
.orElseThrow();
faultRegistry.activateFault(fault.getId());
logger.info("Chaos Monkey injected exception: {}", fault.getId());
scheduler.schedule(() ->
faultRegistry.deactivateFault(fault.getId()),
random.nextInt(60) + 30, TimeUnit.SECONDS);
}
} catch (Exception e) {
logger.error("Chaos Monkey failed to inject exception", e);
}
}
private void injectMemoryPressure() {
if (!enabled || random.nextDouble() > 0.1) return;
try {
FaultDefinition memoryFault = new FaultDefinition.Builder()
.id("chaos-memory-" + System.currentTimeMillis())
.name("Chaos Memory Pressure")
.type(FaultType.MEMORY_LEAK)
.probability(1.0)
.parameter("leakSizeMb", random.nextInt(50) + 10)
.duration(Duration.ofMinutes(2))
.build();
faultRegistry.registerFault(memoryFault);
faultRegistry.activateFault(memoryFault.getId());
logger.info("Chaos Monkey injected memory pressure: {}MB",
memoryFault.getParameters().get("leakSizeMb"));
scheduler.schedule(() -> {
faultRegistry.deactivateFault(memoryFault.getId());
faultRegistry.unregisterFault(memoryFault.getId());
}, 2, TimeUnit.MINUTES);
} catch (Exception e) {
logger.error("Chaos Monkey failed to inject memory pressure", e);
}
}
}
Testing Framework
Comprehensive Test Suite
@SpringBootTest
@TestPropertySource(properties = {
"fault.injection.enabled=true",
"chaos.monkey.enabled=false"
})
public class FaultInjectionTest {
@Autowired
private DatabaseService databaseService;
@Autowired
private ExternalServiceClient externalServiceClient;
@Autowired
private FaultRegistry faultRegistry;
@Autowired
private InjectionEngine injectionEngine;
@BeforeEach
void setUp() {
// Register test faults
registerTestFaults();
}
@Test
void testLatencyInjection() {
FaultDefinition latencyFault = new FaultDefinition.Builder()
.id("test-latency")
.name("Test Latency")
.type(FaultType.LATENCY)
.probability(1.0)
.parameter("latencyMs", 1000L)
.build();
faultRegistry.registerFault(latencyFault);
faultRegistry.activateFault("test-latency");
long startTime = System.currentTimeMillis();
databaseService.findUserById(1L);
long duration = System.currentTimeMillis() - startTime;
assertTrue(duration >= 1000, "Operation should have taken at least 1 second");
}
@Test
void testExceptionInjection() {
FaultDefinition exceptionFault = new FaultDefinition.Builder()
.id("test-exception")
.name("Test Exception")
.type(FaultType.EXCEPTION)
.probability(1.0)
.parameter("exceptionClass", "java.lang.RuntimeException")
.parameter("message", "Injected test exception")
.build();
faultRegistry.registerFault(exceptionFault);
faultRegistry.activateFault("test-exception");
assertThrows(RuntimeException.class, () ->
databaseService.saveUser(new User("Test", "[email protected]")));
}
@Test
void testFaultRecovery() {
FaultDefinition transientFault = new FaultDefinition.Builder()
.id("transient-fault")
.name("Transient Fault")
.type(FaultType.EXCEPTION)
.probability(1.0)
.condition(ctx -> ((Integer) ctx.getMethodArguments()[0]) < 3)
.build();
faultRegistry.registerFault(transientFault);
faultRegistry.activateFault("transient-fault");
// First call should fail
assertThrows(RuntimeException.class, () -> callWithCounter(1));
// Subsequent calls should succeed after counter reaches threshold
assertDoesNotThrow(() -> callWithCounter(3));
}
@Test
void testCircuitBreakerWithFaults() {
FaultDefinition persistentFault = new FaultDefinition.Builder()
.id("persistent-fault")
.name("Persistent Fault")
.type(FaultType.EXCEPTION)
.probability(1.0)
.duration(Duration.ofMinutes(1))
.build();
faultRegistry.registerFault(persistentFault);
faultRegistry.activateFault("persistent-fault");
// First few calls should fail
for (int i = 0; i < 5; i++) {
assertThrows(RuntimeException.class, () ->
externalServiceClient.callExternalService("http://test.com/api"));
}
// Circuit should open, subsequent calls should fail fast
assertThrows(CallNotPermittedException.class, () ->
externalServiceClient.callExternalService("http://test.com/api"));
}
@Test
void testProbabilisticFaultInjection() {
FaultDefinition probabilisticFault = new FaultDefinition.Builder()
.id("probabilistic-fault")
.name("Probabilistic Fault")
.type(FaultType.LATENCY)
.probability(0.5)
.parameter("latencyMs", 500L)
.build();
faultRegistry.registerFault(probabilisticFault);
int injections = 0;
int totalCalls = 1000;
for (int i = 0; i < totalCalls; i++) {
long startTime = System.currentTimeMillis();
databaseService.findUserById((long) i);
long duration = System.currentTimeMillis() - startTime;
if (duration >= 500) {
injections++;
}
}
// Should have approximately 50% injection rate
double injectionRate = (double) injections / totalCalls;
assertTrue(injectionRate > 0.4 && injectionRate < 0.6,
"Injection rate should be around 50%, was: " + injectionRate);
}
private void registerTestFaults() {
// Register common test faults
FaultDefinition[] testFaults = {
new FaultDefinition.Builder()
.id("database-latency")
.name("Database Latency")
.type(FaultType.LATENCY)
.probability(0.1)
.parameter("latencyMs", 2000L)
.build(),
new FaultDefinition.Builder()
.id("database-exception")
.name("Database Exception")
.type(FaultType.EXCEPTION)
.probability(0.05)
.parameter("exceptionClass", "org.springframework.dao.DataAccessException")
.build(),
new FaultDefinition.Builder()
.id("http-latency")
.name("HTTP Latency")
.type(FaultType.LATENCY)
.probability(0.2)
.parameter("latencyMs", 3000L)
.build()
};
for (FaultDefinition fault : testFaults) {
faultRegistry.registerFault(fault);
}
}
private void callWithCounter(int counter) {
FaultContext context = new FaultContext("testMethod", new Object[]{counter}, this);
injectionEngine.inject("transient-fault", context, () -> "success");
}
}
Performance Testing with Faults
@SpringBootTest
public class PerformanceUnderFaultTest {
@Autowired
private DatabaseService databaseService;
@Autowired
private FaultRegistry faultRegistry;
@Test
void testPerformanceUnderLoadWithFaults() throws InterruptedException {
// Register high-probability faults
registerStressFaults();
int threadCount = 10;
int operationsPerThread = 100;
ExecutorService executor = Executors.newFixedThreadPool(threadCount);
CountDownLatch latch = new CountDownLatch(threadCount);
List<Future<TestResult>> futures = new ArrayList<>();
for (int i = 0; i < threadCount; i++) {
futures.add(executor.submit(() -> {
try {
return executeOperations(operationsPerThread);
} finally {
latch.countDown();
}
}));
}
latch.await(2, TimeUnit.MINUTES);
// Analyze results
List<TestResult> results = futures.stream()
.map(f -> {
try {
return f.get();
} catch (Exception e) {
return new TestResult(0, 0, operationsPerThread);
}
})
.collect(Collectors.toList());
int totalSuccess = results.stream().mapToInt(TestResult::getSuccessCount).sum();
int totalErrors = results.stream().mapToInt(TestResult::getErrorCount).sum();
double successRate = (double) totalSuccess / (threadCount * operationsPerThread);
assertTrue(successRate > 0.8, "Success rate should be above 80% under fault conditions");
logger.info("Performance test completed: {}/{} successful ({}% success rate)",
totalSuccess, threadCount * operationsPerThread, successRate * 100);
}
private TestResult executeOperations(int operationCount) {
int success = 0;
int errors = 0;
for (int i = 0; i < operationCount; i++) {
try {
databaseService.findUserById((long) i);
success++;
} catch (Exception e) {
errors++;
}
}
return new TestResult(success, errors, operationCount);
}
private void registerStressFaults() {
FaultDefinition[] stressFaults = {
new FaultDefinition.Builder()
.id("stress-latency")
.name("Stress Latency")
.type(FaultType.LATENCY)
.probability(0.3)
.parameter("latencyMs", 100L)
.build(),
new FaultDefinition.Builder()
.id("stress-exception")
.name("Stress Exception")
.type(FaultType.EXCEPTION)
.probability(0.1)
.build()
};
for (FaultDefinition fault : stressFaults) {
faultRegistry.registerFault(fault);
}
}
private static class TestResult {
private final int successCount;
private final int errorCount;
private final int totalCount;
public TestResult(int successCount, int errorCount, int totalCount) {
this.successCount = successCount;
this.errorCount = errorCount;
this.totalCount = totalCount;
}
public int getSuccessCount() { return successCount; }
public int getErrorCount() { return errorCount; }
}
}
Configuration and Setup
Spring Boot Configuration
@Configuration
@EnableAspectJAutoProxy
public class FaultInjectionConfig {
@Bean
@ConditionalOnProperty(name = "fault.injection.enabled", havingValue = "true")
public FaultRegistry faultRegistry() {
return new FaultRegistry();
}
@Bean
@ConditionalOnProperty(name = "fault.injection.enabled", havingValue = "true")
public InjectionEngine injectionEngine(FaultRegistry faultRegistry,
MonitoringService monitoringService) {
return new InjectionEngine(faultRegistry, monitoringService);
}
@Bean
@ConditionalOnProperty(name = "fault.injection.enabled", havingValue = "true")
public FaultInjectionAspect faultInjectionAspect(InjectionEngine injectionEngine,
FaultRegistry faultRegistry) {
return new FaultInjectionAspect(injectionEngine, faultRegistry);
}
@Bean
@ConditionalOnProperty(name = "chaos.monkey.enabled", havingValue = "true")
public ChaosMonkey chaosMonkey(FaultRegistry faultRegistry,
InjectionEngine injectionEngine) {
return new ChaosMonkey(faultRegistry, injectionEngine);
}
@Bean
public MonitoringService monitoringService(MeterRegistry meterRegistry) {
return new MonitoringService(meterRegistry);
}
}
// Application properties
/*
fault.injection.enabled=true
chaos.monkey.enabled=false
# Fault probabilities
fault.probability.database-latency=0.1
fault.probability.database-exception=0.05
fault.probability.http-latency=0.2
fault.probability.http-exception=0.1
# Chaos Monkey settings
chaos.monkey.latency-probability=0.3
chaos.monkey.exception-probability=0.2
chaos.monkey.memory-probability=0.1
*/
Conclusion
This comprehensive Fault Injection Testing framework provides:
- Flexible Fault Definitions - Configurable faults with probabilities and conditions
- Multiple Injection Strategies - Latency, exceptions, memory leaks, timeouts
- Aspect-Oriented Integration - Non-invasive fault injection using annotations
- Chaos Engineering - Automated random fault injection for resilience testing
- Comprehensive Monitoring - Metrics, logging, and dashboards for fault analysis
- Circuit Breaker Integration - Fault tolerance patterns for production systems
- Testing Framework - Unit and performance tests under fault conditions
This approach helps build more resilient systems by proactively testing failure scenarios and ensuring proper error handling and recovery mechanisms.