From 0b04ca8486fb01710c640c851f9773f76d0ba6f9 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Tue, 28 Apr 2026 20:09:02 -0700 Subject: [PATCH] test: register four finding-count tests in touchfiles + tier map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each new test depends on its skill template, the runner, and three preamble resolvers (preamble.ts, generate-ask-user-format.ts, generate-completion-status.ts) — those affect question cadence and completion rendering, which is exactly what the test asserts on. All four classified periodic. Sequential execution during calibration; opt-in to concurrent only after measured comparison agrees (plan §D15). Updated touchfiles.test.ts: plan-ceo-review/** now selects 19 tests (was 18) because plan-ceo-finding-count joins the family. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/helpers/touchfiles.ts | 18 ++++++++++++++++++ test/touchfiles.test.ts | 6 ++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts index 8e57e8e5..294867c6 100644 --- a/test/helpers/touchfiles.ts +++ b/test/helpers/touchfiles.ts @@ -103,6 +103,15 @@ export const E2E_TOUCHFILES: Record = { 'ship-idempotency-pty': ['ship/**', 'bin/gstack-next-version', 'lib/worktree.ts', 'test/helpers/claude-pty-runner.ts'], 'autoplan-chain-pty': ['autoplan/**', 'plan-ceo-review/**', 'plan-design-review/**', 'plan-eng-review/**', 'plan-devex-review/**', 'test/fixtures/plans/ui-heavy-feature.md', 'test/helpers/claude-pty-runner.ts'], 'e2e-harness-audit': ['plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**', 'plan-devex-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/claude-pty-runner.ts'], + + // Per-finding AskUserQuestion count + review-report-at-bottom assertion. + // Each test drives its skill end-to-end; touchfiles include preamble + + // completion-status resolvers because they affect question cadence and + // terminal output (the regression surface this test catches). + 'plan-ceo-finding-count': ['plan-ceo-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'test/helpers/claude-pty-runner.ts', 'test/skill-e2e-plan-ceo-finding-count.test.ts'], + 'plan-eng-finding-count': ['plan-eng-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'test/helpers/claude-pty-runner.ts', 'test/skill-e2e-plan-eng-finding-count.test.ts'], + 'plan-design-finding-count': ['plan-design-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'test/helpers/claude-pty-runner.ts', 'test/skill-e2e-plan-design-finding-count.test.ts'], + 'plan-devex-finding-count': ['plan-devex-review/**', 'scripts/resolvers/preamble.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'test/helpers/claude-pty-runner.ts', 'test/skill-e2e-plan-devex-finding-count.test.ts'], 'brain-privacy-gate': ['scripts/resolvers/preamble/generate-brain-sync-block.ts', 'scripts/resolvers/preamble.ts', 'bin/gstack-brain-sync', 'bin/gstack-brain-init', 'bin/gstack-config', 'test/helpers/agent-sdk-runner.ts'], // AskUserQuestion format regression (RECOMMENDATION + Completeness: N/10) @@ -358,6 +367,15 @@ export const E2E_TIERS: Record = { 'ship-idempotency-pty': 'periodic', // ~$3/run, real /ship in plan mode 'autoplan-chain-pty': 'periodic', // ~$8/run, all 3 phases sequential + // Per-finding count + review-report-at-bottom — periodic because each + // run drives a full skill end-to-end (~25 min, ~$5/run). Sequential + // execution during calibration; concurrent opt-in only after measured + // comparison agrees (plan §D15). + 'plan-ceo-finding-count': 'periodic', + 'plan-eng-finding-count': 'periodic', + 'plan-design-finding-count': 'periodic', + 'plan-devex-finding-count': 'periodic', + // Privacy gate for gstack-brain-sync — periodic (non-deterministic LLM call, // costs ~$0.30-$0.50 per run, not needed on every commit) 'brain-privacy-gate': 'periodic', diff --git a/test/touchfiles.test.ts b/test/touchfiles.test.ts index 0d9ada4b..7f6ad22b 100644 --- a/test/touchfiles.test.ts +++ b/test/touchfiles.test.ts @@ -97,8 +97,10 @@ describe('selectTests', () => { expect(result.selected).toContain('ask-user-question-format-pty'); expect(result.selected).toContain('plan-ceo-mode-routing'); expect(result.selected).toContain('autoplan-chain-pty'); - expect(result.selected.length).toBe(18); - expect(result.skipped.length).toBe(Object.keys(E2E_TOUCHFILES).length - 18); + // Per-finding count + review-report-at-bottom (v1.21.x) + expect(result.selected).toContain('plan-ceo-finding-count'); + expect(result.selected.length).toBe(19); + expect(result.skipped.length).toBe(Object.keys(E2E_TOUCHFILES).length - 19); }); test('global touchfile triggers ALL tests', () => {