test(e2e): split-overflow regression for /plan-ceo-review

Periodic-tier E2E test that catches the original failure mode the user complained about: 5+ options for ONE decision must split into N sequential AskUserQuestion calls, not drop one to fit Conductor's 4-option cap. Fixture: 5 independent chat-platform integration candidates (Slack/Discord/Teams/Telegram/Mattermost), each carrying its own include/defer/cut decision. Floor = 4 review-phase AUQs (standard [N-1] tolerance band). Pre-fix "drop to 4 + 1 dropped" fails this floor. Wired into test/helpers/touchfiles.ts: tier periodic, depends on plan-ceo-review/**, the new preamble subsection, the question-pref binary (for the carve-out), and the runner helper. touchfiles.test.ts expected count bumped 21 → 22 to account for the new entry. Cost: ~$0.30/run when EVALS_TIER=periodic. Skips silently otherwise. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-19 00:00:13 +02:00 · 2026-05-26 22:27:51 -07:00
parent 975312ef3f
commit d0d8cb2db6
4 changed files with 172 additions and 2 deletions
@@ -149,6 +149,7 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
  // confirm" plan write. runPlanSkillFloorCheck cannot detect that shape
  // (it exits on first AUQ); runPlanSkillCounting can.
  'plan-eng-multi-finding-batching': ['plan-eng-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/review.ts', 'test/helpers/claude-pty-runner.ts', 'test/fixtures/forcing-finding-seeds.ts', 'test/skill-e2e-plan-eng-multi-finding-batching.test.ts'],
+  'plan-ceo-split-overflow': ['plan-ceo-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'bin/gstack-question-preference', 'test/helpers/claude-pty-runner.ts', 'test/fixtures/forcing-finding-seeds.ts', 'test/skill-e2e-plan-ceo-split-overflow.test.ts'],
  'brain-privacy-gate':           ['scripts/resolvers/preamble/generate-brain-sync-block.ts', 'scripts/resolvers/preamble.ts', 'bin/gstack-brain-sync', 'bin/gstack-artifacts-init', 'bin/gstack-config', 'test/helpers/agent-sdk-runner.ts'],

  // /setup-gbrain Path 4 (Remote MCP) — happy + bad-token end-to-end via
@@ -475,6 +476,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
  'plan-design-finding-floor': 'gate',
  'plan-devex-finding-floor':  'gate',
  'plan-eng-multi-finding-batching': 'periodic',
+  'plan-ceo-split-overflow': 'periodic',

  // Privacy gate for gstack-brain-sync — periodic (non-deterministic LLM call,
  // costs ~$0.30-$0.50 per run, not needed on every commit)