test: add multi-finding batching regression test (periodic tier)

Adds a periodic-tier E2E that catches the May 2026 transcript bug shape the existing single-finding gate-tier floor test cannot detect: a model that fires one AskUserQuestion and then batches the remaining findings into a single "## Decisions to confirm" plan write + ExitPlanMode. Why a separate test from skill-e2e-plan-eng-finding-floor: the gate-tier floor (runPlanSkillFloorCheck) exits on the first AUQ render and returns success, so a once-then-batch model would pass it trivially. This test uses runPlanSkillCounting at periodic tier with N-AUQ tracking and asserts >= 3 distinct review-phase AUQs on a 4-finding seeded plan. - test/fixtures/forcing-finding-seeds.ts: FORCING_BATCHING_ENG fixture (4 distinct non-trivial findings spread across Architecture, Code Quality, Tests, Performance — mirrors the D1-D4 transcript shape) - test/skill-e2e-plan-eng-multi-finding-batching.test.ts: new test - test/helpers/touchfiles.ts: registered in BOTH E2E_TOUCHFILES and E2E_TIERS (touchfiles.test.ts asserts exact equality) Test will fail on baseline today because today's model uses the preamble fallback to batch findings; passes after the architectural fix lands in a follow-up commit. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-19 00:00:13 +02:00 · 2026-05-08 15:25:17 -07:00
parent 443bde054c
commit 4c2bcf5c17
3 changed files with 143 additions and 0 deletions
@@ -143,6 +143,13 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
  'plan-ceo-finding-floor':      ['plan-ceo-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/review.ts', 'test/helpers/claude-pty-runner.ts', 'test/fixtures/forcing-finding-seeds.ts', 'test/skill-e2e-plan-ceo-finding-floor.test.ts'],
  'plan-design-finding-floor':   ['plan-design-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/review.ts', 'test/helpers/claude-pty-runner.ts', 'test/fixtures/forcing-finding-seeds.ts', 'test/skill-e2e-plan-design-finding-floor.test.ts'],
  'plan-devex-finding-floor':    ['plan-devex-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/review.ts', 'test/helpers/claude-pty-runner.ts', 'test/fixtures/forcing-finding-seeds.ts', 'test/skill-e2e-plan-devex-finding-floor.test.ts'],
+
+  // Multi-finding batching regression — periodic tier complement to the
+  // gate-tier finding-floor. Catches the May 2026 transcript shape where
+  // a model fires one AUQ then batches the rest into a "## Decisions to
+  // confirm" plan write. runPlanSkillFloorCheck cannot detect that shape
+  // (it exits on first AUQ); runPlanSkillCounting can.
+  'plan-eng-multi-finding-batching': ['plan-eng-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/review.ts', 'test/helpers/claude-pty-runner.ts', 'test/fixtures/forcing-finding-seeds.ts', 'test/skill-e2e-plan-eng-multi-finding-batching.test.ts'],
  'brain-privacy-gate':           ['scripts/resolvers/preamble/generate-brain-sync-block.ts', 'scripts/resolvers/preamble.ts', 'bin/gstack-brain-sync', 'bin/gstack-artifacts-init', 'bin/gstack-config', 'test/helpers/agent-sdk-runner.ts'],

  // /setup-gbrain Path 4 (Remote MCP) — happy + bad-token end-to-end via
@@ -443,6 +450,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
  'plan-ceo-finding-floor':    'gate',
  'plan-design-finding-floor': 'gate',
  'plan-devex-finding-floor':  'gate',
+  'plan-eng-multi-finding-batching': 'periodic',

  // Privacy gate for gstack-brain-sync — periodic (non-deterministic LLM call,
  // costs ~$0.30-$0.50 per run, not needed on every commit)