test: add triage resolver unit tests, plan-eng coverage audit E2E, and triage E2E

- TEST_FAILURE_TRIAGE resolver: 6 unit tests verifying all triage steps (T1-T4), REPO_MODE branching, and safety default for ambiguous failures - plan-eng-coverage-audit E2E: tests /plan-eng-review coverage audit codepath (gap identified during eng review — existed on neither branch) - ship-triage E2E: planted-bug fixture with in-branch (truncate null) and pre-existing (divide-by-zero) failures; verifies correct classification - Touchfile entries for diff-based test selection Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-02 03:35:09 +02:00 · 2026-03-21 09:31:29 -07:00
parent 0394b8052d
commit fe3b5a30dc
3 changed files with 285 additions and 0 deletions
@@ -520,6 +520,46 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
  });
 });

+// --- {{TEST_FAILURE_TRIAGE}} resolver tests ---
+
+describe('TEST_FAILURE_TRIAGE resolver', () => {
+  const shipSkill = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
+
+  test('contains all 4 triage steps', () => {
+    expect(shipSkill).toContain('Step T1: Classify each failure');
+    expect(shipSkill).toContain('Step T2: Handle in-branch failures');
+    expect(shipSkill).toContain('Step T3: Handle pre-existing failures');
+    expect(shipSkill).toContain('Step T4: Execute the chosen action');
+  });
+
+  test('T1 includes classification criteria (in-branch vs pre-existing)', () => {
+    expect(shipSkill).toContain('In-branch');
+    expect(shipSkill).toContain('Likely pre-existing');
+    expect(shipSkill).toContain('git diff origin/');
+  });
+
+  test('T3 branches on REPO_MODE (solo vs collaborative)', () => {
+    expect(shipSkill).toContain('REPO_MODE');
+    expect(shipSkill).toContain('solo');
+    expect(shipSkill).toContain('collaborative');
+  });
+
+  test('solo mode offers fix-now, TODO, and skip options', () => {
+    expect(shipSkill).toContain('Investigate and fix now');
+    expect(shipSkill).toContain('Add as P0 TODO');
+    expect(shipSkill).toContain('Skip');
+  });
+
+  test('collaborative mode offers blame + assign option', () => {
+    expect(shipSkill).toContain('Blame + assign GitHub issue');
+    expect(shipSkill).toContain('gh issue create');
+  });
+
+  test('defaults ambiguous failures to in-branch (safety)', () => {
+    expect(shipSkill).toContain('When ambiguous, default to in-branch');
+  });
+});
+
 // --- {{SPEC_REVIEW_LOOP}} resolver tests ---

 describe('SPEC_REVIEW_LOOP resolver', () => {
@@ -94,6 +94,8 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
  // Coverage audit (shared fixture) + triage
  'ship-coverage-audit': ['ship/**', 'test/fixtures/coverage-audit-fixture.ts', 'bin/gstack-repo-mode'],
  'review-coverage-audit': ['review/**', 'test/fixtures/coverage-audit-fixture.ts'],
+  'plan-eng-coverage-audit': ['plan-eng-review/**', 'test/fixtures/coverage-audit-fixture.ts'],
+  'ship-triage': ['ship/**', 'bin/gstack-repo-mode'],

  // Design
  'design-consultation-core':     ['design-consultation/**'],
@@ -2858,6 +2858,249 @@ Output the diagram directly.`,
  }, 180_000);
 });

+// --- Plan Eng Review Coverage Audit E2E ---
+
+describeIfSelected('Plan Eng Review Coverage Audit E2E', ['plan-eng-coverage-audit'], () => {
+  let planCoverageDir: string;
+
+  beforeAll(() => {
+    planCoverageDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-plan-coverage-'));
+
+    // Copy plan-eng-review skill files
+    copyDirSync(path.join(ROOT, 'plan-eng-review'), path.join(planCoverageDir, 'plan-eng-review'));
+
+    // Use shared fixture for billing project with coverage gaps
+    const { createCoverageAuditFixture } = require('./fixtures/coverage-audit-fixture');
+    createCoverageAuditFixture(planCoverageDir);
+  });
+
+  afterAll(() => {
+    try { fs.rmSync(planCoverageDir, { recursive: true, force: true }); } catch {}
+  });
+
+  test('/plan-eng-review coverage audit traces plan codepaths', async () => {
+    const result = await runSkillTest({
+      prompt: `Read the file plan-eng-review/SKILL.md for the plan review workflow instructions.
+
+You are on the feature/billing branch. The base branch is main.
+This is a test project — there is no remote, no PR to create.
+
+ONLY run the Test Coverage Audit section from the plan review workflow.
+Skip all other steps (architecture, code quality, performance, etc.).
+
+The source code is in ${planCoverageDir}/src/billing.ts.
+Existing tests are in ${planCoverageDir}/test/billing.test.ts.
+
+Produce the ASCII coverage diagram showing which code paths are tested and which have gaps.
+Output the diagram directly.`,
+      workingDirectory: planCoverageDir,
+      maxTurns: 15,
+      allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Glob', 'Grep'],
+      timeout: 120_000,
+      testName: 'plan-eng-coverage-audit',
+      runId,
+    });
+
+    logCost('/plan-eng-review coverage audit', result);
+    recordE2E('/plan-eng-review coverage audit', 'Plan Eng Review Coverage Audit E2E', result, {
+      passed: result.exitReason === 'success',
+    });
+
+    expect(result.exitReason).toBe('success');
+
+    // Check output contains coverage diagram elements
+    const output = result.output || '';
+    const outputLower = output.toLowerCase();
+    const hasGap = outputLower.includes('gap') || outputLower.includes('no test');
+    const hasTested = outputLower.includes('tested') || output.includes('✓') || output.includes('★');
+    const hasCoverage = outputLower.includes('coverage') || outputLower.includes('paths tested');
+
+    console.log(`Output has GAP markers: ${hasGap}`);
+    console.log(`Output has TESTED markers: ${hasTested}`);
+    console.log(`Output has coverage summary: ${hasCoverage}`);
+
+    // The agent MUST produce a coverage diagram with gap and tested markers
+    expect(hasGap || hasTested).toBe(true);
+
+    // At minimum, the agent should have read the source and test files
+    const readCalls = result.toolCalls.filter(tc => tc.tool === 'Read');
+    expect(readCalls.length).toBeGreaterThan(0);
+  }, 180_000);
+});
+
+// --- Triage E2E ---
+
+describeIfSelected('Test Failure Triage E2E', ['ship-triage'], () => {
+  let triageDir: string;
+
+  beforeAll(() => {
+    triageDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-triage-'));
+
+    // Copy ship skill files
+    copyDirSync(path.join(ROOT, 'ship'), path.join(triageDir, 'ship'));
+
+    const run = (cmd: string, args: string[]) =>
+      spawnSync(cmd, args, { cwd: triageDir, stdio: 'pipe', timeout: 5000 });
+
+    // Init git repo
+    run('git', ['init', '-b', 'main']);
+    run('git', ['config', 'user.email', 'test@test.com']);
+    run('git', ['config', 'user.name', 'Test']);
+
+    // Create a project with a pre-existing test failure on main
+    fs.writeFileSync(path.join(triageDir, 'package.json'), JSON.stringify({
+      name: 'triage-test-app',
+      version: '1.0.0',
+      scripts: { test: 'node test/run.js' },
+    }, null, 2));
+
+    fs.mkdirSync(path.join(triageDir, 'src'), { recursive: true });
+    fs.mkdirSync(path.join(triageDir, 'test'), { recursive: true });
+
+    // Source with a bug that exists on main (pre-existing)
+    fs.writeFileSync(path.join(triageDir, 'src', 'math.js'), `
+module.exports = {
+  add: (a, b) => a + b,
+  divide: (a, b) => a / b,  // BUG: no zero-division check (pre-existing)
+};
+`);
+
+    // Test file that catches the pre-existing bug
+    fs.writeFileSync(path.join(triageDir, 'test', 'math.test.js'), `
+const { add, divide } = require('../src/math');
+
+// This test passes
+if (add(2, 3) !== 5) { console.error('FAIL: add(2,3) should be 5'); process.exit(1); }
+console.log('PASS: add');
+
+// This test FAILS — pre-existing bug (divide by zero returns Infinity, not an error)
+try {
+  const result = divide(10, 0);
+  if (result === Infinity) { console.error('FAIL: divide(10,0) should throw, got Infinity'); process.exit(1); }
+} catch(e) {
+  console.log('PASS: divide zero check');
+}
+`);
+
+    // Test runner
+    fs.writeFileSync(path.join(triageDir, 'test', 'run.js'), `
+require('./math.test.js');
+require('./string.test.js');
+`);
+
+    // Commit on main with the pre-existing bug
+    run('git', ['add', '.']);
+    run('git', ['commit', '-m', 'initial: math utils with tests']);
+
+    // Create feature branch
+    run('git', ['checkout', '-b', 'feature/string-utils']);
+
+    // Add new code with a new bug (in-branch)
+    fs.writeFileSync(path.join(triageDir, 'src', 'string.js'), `
+module.exports = {
+  capitalize: (s) => s.charAt(0).toUpperCase() + s.slice(1),
+  reverse: (s) => s.split('').reverse().join(''),
+  truncate: (s, len) => s.substring(0, len),  // BUG: no null check (in-branch)
+};
+`);
+
+    // Add test that catches the in-branch bug
+    fs.writeFileSync(path.join(triageDir, 'test', 'string.test.js'), `
+const { capitalize, reverse, truncate } = require('../src/string');
+
+if (capitalize('hello') !== 'Hello') { console.error('FAIL: capitalize'); process.exit(1); }
+console.log('PASS: capitalize');
+
+if (reverse('abc') !== 'cba') { console.error('FAIL: reverse'); process.exit(1); }
+console.log('PASS: reverse');
+
+// This test FAILS — in-branch bug (null input causes TypeError)
+try {
+  truncate(null, 5);
+  console.log('PASS: truncate null');
+} catch(e) {
+  console.error('FAIL: truncate(null, 5) threw: ' + e.message);
+  process.exit(1);
+}
+`);
+
+    run('git', ['add', '.']);
+    run('git', ['commit', '-m', 'feat: add string utilities']);
+  });
+
+  afterAll(() => {
+    try { fs.rmSync(triageDir, { recursive: true, force: true }); } catch {}
+  });
+
+  test('/ship triage correctly classifies in-branch vs pre-existing failures', async () => {
+    const result = await runSkillTest({
+      prompt: `Read the file ship/SKILL.md for the ship workflow instructions.
+
+You are on the feature/string-utils branch. The base branch is main.
+This is a test project — there is no remote, no PR to create.
+
+Run the tests first:
+\`\`\`bash
+cd ${triageDir} && node test/run.js
+\`\`\`
+
+The tests will fail. Now run ONLY the Test Failure Ownership Triage (Steps T1-T4) from the ship workflow.
+
+For each failing test, classify it as:
+- **In-branch**: caused by changes on this branch (feature/string-utils)
+- **Pre-existing**: existed before this branch (present on main)
+
+Use git diff origin/main...HEAD (or git diff main...HEAD since there's no remote) to determine which files changed on this branch.
+
+Output your classification for each failure clearly, labeling each as "IN-BRANCH" or "PRE-EXISTING" with your reasoning.
+
+This is a solo repo (REPO_MODE=solo). For pre-existing failures, recommend fixing now.`,
+      workingDirectory: triageDir,
+      maxTurns: 20,
+      allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Glob', 'Grep'],
+      timeout: 180_000,
+      testName: 'ship-triage',
+      runId,
+    });
+
+    logCost('/ship triage', result);
+
+    const output = result.output || '';
+    const outputLower = output.toLowerCase();
+
+    // The triage should identify the string/truncate failure as in-branch
+    const hasInBranch = outputLower.includes('in-branch') || outputLower.includes('in branch') || outputLower.includes('introduced');
+    // The triage should identify the math/divide failure as pre-existing
+    const hasPreExisting = outputLower.includes('pre-existing') || outputLower.includes('pre existing') || outputLower.includes('existed before');
+
+    console.log(`Output identifies IN-BRANCH failures: ${hasInBranch}`);
+    console.log(`Output identifies PRE-EXISTING failures: ${hasPreExisting}`);
+
+    // Check that the string/truncate bug is classified as in-branch
+    const mentionsTruncate = outputLower.includes('truncate') || outputLower.includes('string');
+    const mentionsDivide = outputLower.includes('divide') || outputLower.includes('math');
+
+    console.log(`Mentions truncate/string (in-branch bug): ${mentionsTruncate}`);
+    console.log(`Mentions divide/math (pre-existing bug): ${mentionsDivide}`);
+
+    recordE2E('/ship triage', 'Test Failure Triage E2E', result, {
+      passed: result.exitReason === 'success' && hasInBranch && hasPreExisting,
+      has_in_branch_classification: hasInBranch,
+      has_pre_existing_classification: hasPreExisting,
+      mentions_truncate: mentionsTruncate,
+      mentions_divide: mentionsDivide,
+    });
+
+    expect(result.exitReason).toBe('success');
+    // Must classify at least one failure as in-branch AND one as pre-existing
+    expect(hasInBranch).toBe(true);
+    expect(hasPreExisting).toBe(true);
+    // Must mention the specific bugs
+    expect(mentionsTruncate).toBe(true);
+    expect(mentionsDivide).toBe(true);
+  }, 240_000);
+});
+
 // --- Codex skill E2E ---

 describeIfSelected('Codex skill E2E', ['codex-review'], () => {