Flaky Test Detective

Diagnose and eliminate flaky tests systematically.

Common Flaky Test Patterns 1. Timing Issues // ❌ Flaky: Race condition test("should load user data", async () => { render();

// Race condition - might pass or fail expect(screen.getByText("John Doe")).toBeInTheDocument(); });

// ✅ Fixed: Wait for element test("should load user data", async () => { render();

await waitFor(() => { expect(screen.getByText("John Doe")).toBeInTheDocument(); }); });

// ❌ Flaky: Fixed timeout test("should complete animation", async () => { render(); await new Promise((resolve) => setTimeout(resolve, 500)); // Brittle! expect(element).toHaveClass("animated"); });

// ✅ Fixed: Wait for condition test("should complete animation", async () => { render(); await waitFor( () => { expect(element).toHaveClass("animated"); }, { timeout: 2000 } ); });

Shared State // ❌ Flaky: Global state pollution let userId = "123";

test("test A", () => { userId = "456"; // Modifies global // ... });

test("test B", () => { expect(userId).toBe("123"); // Fails if test A runs first! });

// ✅ Fixed: Isolated state test("test A", () => { const userId = "456"; // Local variable // ... });

test("test B", () => { const userId = "123"; expect(userId).toBe("123"); });

// ❌ Flaky: Database not cleaned test("should create user", async () => { await db.user.create({ email: "test@example.com" }); // No cleanup! });

test("should create another user", async () => { await db.user.create({ email: "test@example.com" }); // Fails! Duplicate });

// ✅ Fixed: Proper cleanup afterEach(async () => { await db.user.deleteMany(); });

Randomness // ❌ Flaky: Random data test("should sort users", () => { const users = generateRandomUsers(10); // Different each time! const sorted = sortUsers(users); expect(sorted[0].name).toBe("Alice"); // Might not be Alice });

// ✅ Fixed: Deterministic data test("should sort users", () => { const users = [ { name: "Charlie", age: 30 }, { name: "Alice", age: 25 }, { name: "Bob", age: 35 }, ]; const sorted = sortUsers(users); expect(sorted[0].name).toBe("Alice"); });

// ✅ Fixed: Seeded randomness import { faker } from "@faker-js/faker";

beforeEach(() => { faker.seed(12345); // Same data every time });

Network Dependencies // ❌ Flaky: Real API call test("should fetch users", async () => { const users = await fetchUsers(); // External API! expect(users).toHaveLength(10); // Might fail if API down });

// ✅ Fixed: Mocked API test("should fetch users", async () => { server.use( http.get("/api/users", () => { return HttpResponse.json([ { id: "1", name: "User 1" }, { id: "2", name: "User 2" }, ]); }) );

const users = await fetchUsers(); expect(users).toHaveLength(2); });

Flaky Test Detection Script // scripts/detect-flaky-tests.ts import { execSync } from "child_process";

async function detectFlakyTests(iterations: number = 10) { const results = new Map();

for (let i = 0; i < iterations; i++) { console.log(\nRun ${i + 1}/${iterations});

try {
  const output = execSync("npm test -- --reporter=json", {
    encoding: "utf-8",
  });

  const testResults = JSON.parse(output);

  testResults.testResults.forEach((file: any) => {
    file.assertionResults.forEach((test: any) => {
      const key = `${file.name}::${test.fullName}`;
      const stats = results.get(key) || { passed: 0, failed: 0 };

      if (test.status === "passed") {
        stats.passed++;
      } else {
        stats.failed++;
      }

      results.set(key, stats);
    });
  });
} catch (error) {
  console.error("Test run failed:", error);
}

}

// Analyze results console.log("\n🔍 Flaky Test Report\n");

const flakyTests: string[] = [];

results.forEach((stats, testName) => { if (stats.failed > 0 && stats.passed > 0) { const failureRate = (stats.failed / iterations) * 100; console.log(❌ FLAKY: ${testName}); console.log(Passed: ${stats.passed}/${iterations}); console.log(Failed: ${stats.failed}/${iterations}); console.log(Failure rate: ${failureRate.toFixed(1)}%\n); flakyTests.push(testName); } });

if (flakyTests.length === 0) { console.log("✅ No flaky tests detected!"); } else { console.log(\n🚨 Found ${flakyTests.length} flaky tests); process.exit(1); } }

detectFlakyTests(20); // Run tests 20 times

Root Cause Analysis // Framework for analyzing flaky tests interface FlakyTestAnalysis { testName: string; failureRate: number; symptoms: string[]; rootCause: "timing" | "state" | "randomness" | "network" | "unknown"; recommendation: string; }

function analyzeTest( testName: string, errorMessages: string[] ): FlakyTestAnalysis { const analysis: FlakyTestAnalysis = { testName, failureRate: 0, symptoms: [], rootCause: "unknown", recommendation: "", };

// Detect timing issues if ( errorMessages.some( (msg) => msg.includes("timeout") || msg.includes("not found") ) ) { analysis.symptoms.push("Timeout or element not found"); analysis.rootCause = "timing"; analysis.recommendation = "Add explicit waits using waitFor() or findBy* queries"; }

// Detect shared state if ( errorMessages.some( (msg) => msg.includes("already exists") || msg.includes("unique constraint") ) ) { analysis.symptoms.push("Duplicate or existing data"); analysis.rootCause = "state"; analysis.recommendation = "Add beforeEach/afterEach cleanup or use unique test data"; }

// Detect randomness if ( errorMessages.some( (msg) => msg.includes("expected") && msg.includes("received") ) ) { analysis.symptoms.push("Inconsistent values"); analysis.rootCause = "randomness"; analysis.recommendation = "Use deterministic test data or seed random generators"; }

// Detect network issues if ( errorMessages.some( (msg) => msg.includes("network") || msg.includes("ECONNREFUSED") ) ) { analysis.symptoms.push("Network or connection errors"); analysis.rootCause = "network"; analysis.recommendation = "Mock all network requests using MSW or similar"; }

return analysis; }

Stabilization Guidelines // Test stability checklist const stabilityChecklist = { timing: [ "Use waitFor() instead of fixed timeouts", "Use findBy* queries (built-in waiting)", "Set appropriate timeout values", "Wait for loading states to disappear", ], state: [ "Clear database before each test", "Reset mocks after each test", "Use test-specific data (unique IDs)", "Avoid global variables", ], randomness: [ "Use fixed seed for random generators", "Use deterministic test data", "Avoid Date.now() - mock time instead", "Generate IDs deterministically", ], network: [ "Mock all API calls", "Use MSW for HTTP mocking", "Avoid real external services", "Test network errors explicitly", ], parallelism: [ "Use isolated databases per test worker", "Avoid port conflicts (random ports)", "Dont share file system state", "Use test.concurrent cautiously", ], };

Auto-Fix Patterns // Automated fixes for common issues

// Fix 1: Add waitFor to assertions function addWaitFor(code: string): string { // Replace: expect(screen.getByText('...')).toBeInTheDocument() // With: await waitFor(() => expect(screen.getByText('...')).toBeInTheDocument())

return code .replace( /expect(screen.getBy/g, "await waitFor(() => expect(screen.getBy" ) .replace(/).toBeInTheDocument()/g, ").toBeInTheDocument())"); }

// Fix 2: Replace getBy with findBy function replaceGetByWithFindBy(code: string): string { return code.replace(/screen.getBy/g, "await screen.findBy"); }

// Fix 3: Add cleanup function addCleanup(code: string): string { if (!code.includes("afterEach")) { const insertPoint = code.indexOf("test("); return ( code.slice(0, insertPoint) + "afterEach(async () => {\n await cleanup();\n});\n\n" + code.slice(insertPoint) ); } return code; }

Monitoring Flaky Tests in CI

.github/workflows/test-stability.yml

name: Test Stability

on: schedule: - cron: "0 2 * * *" # Run nightly

jobs: stability-check: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4

  - uses: actions/setup-node@v4
    with:
      node-version: "20"

  - run: npm ci

  - name: Run tests 20 times
    run: |
      for i in {1..20}; do
        echo "Run $i/20"
        npm test || echo "FAILED: Run $i"
      done

  - name: Analyze results
    run: npm run detect-flaky-tests

Best Practices Explicit waits: Never use sleep/timeout Clean state: Reset between tests Deterministic data: No randomness Mock external deps: APIs, time, randomness Run tests multiple times: Catch intermittent failures Isolate tests: No shared state Monitor CI: Track flaky test trends Output Checklist Common patterns identified Root cause analysis performed Timing issues fixed (waitFor) Shared state eliminated (cleanup) Randomness removed (fixed seeds) Network mocked (MSW) Detection script implemented Stabilization guidelines documented CI monitoring configured

flaky-test-detective

安装

.github/workflows/test-stability.yml