Files
gstack/test/touchfiles.test.ts
T
Garry Tan 7b4738bca0 v1.27.1.0 fix: anti-shortcut clause + gate-tier AskUserQuestion floor tests for all plan-* skills (#1354)
* feat(test/helpers): runPlanSkillFloorCheck — minimal AskUserQuestion-floor observer

Adds a focused PTY observer that exits at the first non-permission
numbered-option render. Catches the May 2026 transcript-bug class
(model wrote plan + ExitPlanMode without firing any AUQ) without
needing to fingerprint or navigate past the AUQ.

Why separate from runPlanSkillCounting: plan-mode AUQs render every
option on a single logical line via cursor-positioning escapes that
stripAnsi can't simulate, so parseNumberedOptions returns < 2 options
and never records a fingerprint. Counting tests work on 25-min budgets
because eventually one frame parses cleanly; gate-tier floor tests
need to exit early on the first observation. Trades fingerprint
precision for early-exit reliability.

Also drops COMPLETION_SUMMARY_RE check from this helper — it matches
"GSTACK REVIEW REPORT" anywhere in the buffer including when the
agent does recon by reading existing plan files. plan_ready
(claude's actual "Ready to execute" confirmation) is the reliable
terminal signal for "agent finished without asking."

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* feat(resolvers): generateAntiShortcutClause shared resolver

Adds {{ANTI_SHORTCUT_CLAUSE}} placeholder backed by a single resolver
function in scripts/resolvers/review.ts. Plan-* review skills can now
include the clause via one placeholder line in their .tmpl rather than
cloning the paragraph four times. Future tightening edits one resolver,
all four skills update on next gen-skill-docs.

Wired into the existing RESOLVERS map alongside generateReviewDashboard
and generatePlanFileReviewReport — no gen-skill-docs.ts change needed
because the generator already does generic placeholder substitution
against that map.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* feat(plan-*-review): anti-shortcut clause in all four review skills

Inserts {{ANTI_SHORTCUT_CLAUSE}} placeholder immediately after the
**Anti-skip rule:** paragraph in plan-{eng,ceo,design,devex}-review
SKILL.md.tmpl. The four templates use different surrounding section
headers (eng "Review Sections (after scope is agreed)" vs ceo/design/devex
variants), so anchoring on the paragraph rather than the heading works
across all four.

Closes the May 2026 transcript-bug loophole: existing STOP gates name
forbidden actions only AFTER a per-section finding is identified. The
anti-shortcut clause adds the pre-emptive rule — "the plan file is the
OUTPUT of the interactive review, not a substitute for it" — covering
the case the transcript exhibited (skip per-section walk, dump every
finding into one plan write, call ExitPlanMode).

Regenerated SKILL.md for all hosts via bun run gen:skill-docs --host all.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* test: gate-tier AskUserQuestion floor tests for all plan-* review skills

Adds 4 finding-floor tests (one per plan-* skill) that catch the May
2026 transcript-bug class — model wrote a plan and called ExitPlanMode
without firing any review-phase AskUserQuestion. Asserts via
runPlanSkillFloorCheck that ANY non-permission AUQ render fires before
the agent reaches plan_ready.

Verified:
- Eng floor: passed in 59s
- CEO floor: passed in 197s
- Design floor: passed
- Devex floor: passed
- Total ~$2-6 per CI run; only triggers on diff against the 4 plan-*
  templates, the shared resolver review.ts, the seeds fixture, or the
  PTY runner helper.

Fixtures live in test/fixtures/forcing-finding-seeds.ts, one constant
per skill. Each seed is engineered to force at least one obvious
finding under that skill's review focus (architectural smell for eng,
scope-creep for ceo, UI-slop for design, painful onboarding for devex).

Touchfiles wiring:
- E2E_TOUCHFILES: 4 plan-*-finding-floor entries with deps on the
  matching skill template, the shared resolver, the seeds fixture,
  and the PTY runner helper
- E2E_TIERS: all 4 entries marked 'gate'
- touchfiles.test.ts: count assertion bumped 21→22 with explicit
  plan-ceo-finding-floor containment check

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* chore: bump version and changelog (v1.27.1.0)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-06 20:27:20 -07:00

327 lines
13 KiB
TypeScript

/**
* Unit tests for diff-based test selection.
* Free (no API calls), runs with `bun test`.
*/
import { describe, test, expect } from 'bun:test';
import { spawnSync } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import {
matchGlob,
selectTests,
detectBaseBranch,
E2E_TOUCHFILES,
E2E_TIERS,
LLM_JUDGE_TOUCHFILES,
GLOBAL_TOUCHFILES,
} from './helpers/touchfiles';
const ROOT = path.resolve(import.meta.dir, '..');
// --- matchGlob ---
describe('matchGlob', () => {
test('** matches any depth of path segments', () => {
expect(matchGlob('browse/src/commands.ts', 'browse/src/**')).toBe(true);
expect(matchGlob('browse/src/deep/nested/file.ts', 'browse/src/**')).toBe(true);
expect(matchGlob('browse/src/cli.ts', 'browse/src/**')).toBe(true);
});
test('** does not match unrelated paths', () => {
expect(matchGlob('browse/src/commands.ts', 'qa/**')).toBe(false);
expect(matchGlob('review/SKILL.md', 'qa/**')).toBe(false);
});
test('exact match works', () => {
expect(matchGlob('SKILL.md', 'SKILL.md')).toBe(true);
expect(matchGlob('SKILL.md.tmpl', 'SKILL.md')).toBe(false);
expect(matchGlob('qa/SKILL.md', 'SKILL.md')).toBe(false);
});
test('* matches within a single segment', () => {
expect(matchGlob('test/fixtures/review-eval-enum.rb', 'test/fixtures/review-eval-enum*.rb')).toBe(true);
expect(matchGlob('test/fixtures/review-eval-enum-diff.rb', 'test/fixtures/review-eval-enum*.rb')).toBe(true);
expect(matchGlob('test/fixtures/review-eval-vuln.rb', 'test/fixtures/review-eval-enum*.rb')).toBe(false);
});
test('dots in patterns are escaped correctly', () => {
expect(matchGlob('SKILL.md', 'SKILL.md')).toBe(true);
expect(matchGlob('SKILLxmd', 'SKILL.md')).toBe(false);
});
test('** at end matches files in the directory', () => {
expect(matchGlob('qa/SKILL.md', 'qa/**')).toBe(true);
expect(matchGlob('qa/SKILL.md.tmpl', 'qa/**')).toBe(true);
expect(matchGlob('qa/templates/report.md', 'qa/**')).toBe(true);
});
});
// --- selectTests ---
describe('selectTests', () => {
test('browse/src change selects browse and qa tests', () => {
const result = selectTests(['browse/src/commands.ts'], E2E_TOUCHFILES);
expect(result.selected).toContain('browse-basic');
expect(result.selected).toContain('browse-snapshot');
expect(result.selected).toContain('qa-quick');
expect(result.selected).toContain('qa-fix-loop');
expect(result.selected).toContain('design-review-fix');
expect(result.reason).toBe('diff');
// Should NOT include unrelated tests
expect(result.selected).not.toContain('plan-ceo-review');
expect(result.selected).not.toContain('retro');
expect(result.selected).not.toContain('document-release');
});
test('skill-specific change selects only that skill and related tests', () => {
const result = selectTests(['plan-ceo-review/SKILL.md'], E2E_TOUCHFILES);
expect(result.selected).toContain('plan-ceo-review');
expect(result.selected).toContain('plan-ceo-review-selective');
expect(result.selected).toContain('plan-ceo-review-benefits');
expect(result.selected).toContain('plan-ceo-review-expansion-energy');
expect(result.selected).toContain('autoplan-core');
expect(result.selected).toContain('codex-offered-ceo-review');
expect(result.selected).toContain('plan-ceo-review-format-mode');
expect(result.selected).toContain('plan-ceo-review-format-approach');
// v1.10.2.0 plan-mode handshake entries also depend on plan-ceo-review/**
expect(result.selected).toContain('plan-ceo-review-plan-mode');
expect(result.selected).toContain('plan-mode-no-op');
expect(result.selected).toContain('e2e-harness-audit');
expect(result.selected).toContain('plan-ceo-review-prosons-cadence');
expect(result.selected).toContain('plan-review-prosons-format');
expect(result.selected).toContain('plan-review-prosons-hardstop-neg');
expect(result.selected).toContain('plan-review-prosons-neutral-neg');
// v1.13.x real-PTY E2E batch entries that also depend on plan-ceo-review/**
expect(result.selected).toContain('ask-user-question-format-pty');
expect(result.selected).toContain('plan-ceo-mode-routing');
expect(result.selected).toContain('autoplan-chain-pty');
// Per-finding count + review-report-at-bottom (v1.21.x)
expect(result.selected).toContain('plan-ceo-finding-count');
// v1.22+ AskUserQuestion-blocked regression: autoplan-auto-mode +
// auto-decide-preserved also depend on plan-ceo-review/**
expect(result.selected).toContain('autoplan-auto-mode');
expect(result.selected).toContain('auto-decide-preserved');
// v1.27+ gate-tier reviewCount-floor regression for transcript bug
expect(result.selected).toContain('plan-ceo-finding-floor');
expect(result.selected.length).toBe(22);
expect(result.skipped.length).toBe(Object.keys(E2E_TOUCHFILES).length - 22);
});
test('global touchfile triggers ALL tests', () => {
const result = selectTests(['test/helpers/session-runner.ts'], E2E_TOUCHFILES);
expect(result.selected.length).toBe(Object.keys(E2E_TOUCHFILES).length);
expect(result.skipped.length).toBe(0);
expect(result.reason).toContain('global');
});
test('gen-skill-docs.ts is a scoped touchfile, not global', () => {
const result = selectTests(['scripts/gen-skill-docs.ts'], E2E_TOUCHFILES);
// Should select tests that list gen-skill-docs.ts in their touchfiles, not ALL tests
expect(result.selected.length).toBeGreaterThan(0);
expect(result.selected.length).toBeLessThan(Object.keys(E2E_TOUCHFILES).length);
expect(result.reason).toBe('diff');
// Should include tests that depend on gen-skill-docs.ts
expect(result.selected).toContain('skillmd-setup-discovery');
expect(result.selected).toContain('session-awareness');
expect(result.selected).toContain('journey-ideation');
// Should NOT include tests that don't depend on it
expect(result.selected).not.toContain('retro');
expect(result.selected).not.toContain('cso-full-audit');
});
test('unrelated file selects nothing', () => {
const result = selectTests(['README.md'], E2E_TOUCHFILES);
expect(result.selected).toEqual([]);
expect(result.skipped.length).toBe(Object.keys(E2E_TOUCHFILES).length);
});
test('empty changed files selects nothing', () => {
const result = selectTests([], E2E_TOUCHFILES);
expect(result.selected).toEqual([]);
});
test('multiple changed files union their selections', () => {
const result = selectTests(
['plan-ceo-review/SKILL.md', 'retro/SKILL.md.tmpl'],
E2E_TOUCHFILES,
);
expect(result.selected).toContain('plan-ceo-review');
expect(result.selected).toContain('plan-ceo-review-selective');
expect(result.selected).toContain('retro');
expect(result.selected).toContain('retro-base-branch');
// Also selects journey routing tests (*/SKILL.md.tmpl matches retro/SKILL.md.tmpl)
expect(result.selected.length).toBeGreaterThanOrEqual(4);
});
test('works with LLM_JUDGE_TOUCHFILES', () => {
const result = selectTests(['qa/SKILL.md'], LLM_JUDGE_TOUCHFILES);
expect(result.selected).toContain('qa/SKILL.md workflow');
expect(result.selected).toContain('qa/SKILL.md health rubric');
expect(result.selected).toContain('qa/SKILL.md anti-refusal');
expect(result.selected.length).toBe(3);
});
test('SKILL.md.tmpl root template selects root-dependent tests and routing tests', () => {
const result = selectTests(['SKILL.md.tmpl'], E2E_TOUCHFILES);
// Should select the 7 tests that depend on root SKILL.md
expect(result.selected).toContain('skillmd-setup-discovery');
expect(result.selected).toContain('session-awareness');
expect(result.selected).toContain('session-awareness');
// Also selects journey routing tests (SKILL.md.tmpl in their touchfiles)
expect(result.selected).toContain('journey-ideation');
// Should NOT select unrelated non-routing tests
expect(result.selected).not.toContain('plan-ceo-review');
expect(result.selected).not.toContain('retro');
});
test('global touchfiles work for LLM-judge tests too', () => {
const result = selectTests(['test/helpers/session-runner.ts'], LLM_JUDGE_TOUCHFILES);
expect(result.selected.length).toBe(Object.keys(LLM_JUDGE_TOUCHFILES).length);
});
});
// --- detectBaseBranch ---
describe('detectBaseBranch', () => {
test('detects local main branch', () => {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'touchfiles-test-'));
const run = (cmd: string, args: string[]) =>
spawnSync(cmd, args, { cwd: dir, stdio: 'pipe', timeout: 5000 });
run('git', ['init']);
run('git', ['config', 'user.email', 'test@test.com']);
run('git', ['config', 'user.name', 'Test']);
fs.writeFileSync(path.join(dir, 'test.txt'), 'hello\n');
run('git', ['add', '.']);
run('git', ['commit', '-m', 'init']);
const result = detectBaseBranch(dir);
// Should find 'main' (or 'master' depending on git default)
expect(result).toMatch(/^(main|master)$/);
try { fs.rmSync(dir, { recursive: true, force: true }); } catch {}
});
test('returns null for empty repo with no branches', () => {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'touchfiles-test-'));
const run = (cmd: string, args: string[]) =>
spawnSync(cmd, args, { cwd: dir, stdio: 'pipe', timeout: 5000 });
run('git', ['init']);
// No commits = no branches
const result = detectBaseBranch(dir);
expect(result).toBeNull();
try { fs.rmSync(dir, { recursive: true, force: true }); } catch {}
});
test('returns null for non-git directory', () => {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'touchfiles-test-'));
const result = detectBaseBranch(dir);
expect(result).toBeNull();
try { fs.rmSync(dir, { recursive: true, force: true }); } catch {}
});
});
// --- Completeness: every testName in skill-e2e-*.test.ts has a TOUCHFILES entry ---
describe('TOUCHFILES completeness', () => {
test('every E2E testName has a TOUCHFILES entry', () => {
// Read all split E2E test files
const testDir = path.join(ROOT, 'test');
const e2eFiles = fs.readdirSync(testDir).filter(f => f.startsWith('skill-e2e-') && f.endsWith('.test.ts'));
let e2eContent = '';
for (const f of e2eFiles) {
e2eContent += fs.readFileSync(path.join(testDir, f), 'utf-8') + '\n';
}
// Extract all testName: 'value' entries
const testNameRegex = /testName:\s*['"`]([^'"`]+)['"`]/g;
const testNames: string[] = [];
let match;
while ((match = testNameRegex.exec(e2eContent)) !== null) {
let name = match[1];
// Handle template literals like `qa-${label}` — these expand to
// qa-b6-static, qa-b7-spa, qa-b8-checkout
if (name.includes('${')) continue; // skip template literals, check expanded forms below
testNames.push(name);
}
// Add the template-expanded testNames from runPlantedBugEval calls
const plantedBugRegex = /runPlantedBugEval\([^,]+,\s*[^,]+,\s*['"`]([^'"`]+)['"`]\)/g;
while ((match = plantedBugRegex.exec(e2eContent)) !== null) {
testNames.push(`qa-${match[1]}`);
}
expect(testNames.length).toBeGreaterThan(0);
const missing = testNames.filter(name => !(name in E2E_TOUCHFILES));
if (missing.length > 0) {
throw new Error(
`E2E tests missing TOUCHFILES entries: ${missing.join(', ')}\n` +
`Add these to E2E_TOUCHFILES in test/helpers/touchfiles.ts`,
);
}
});
test('E2E_TIERS covers exactly the same tests as E2E_TOUCHFILES', () => {
const touchfileKeys = new Set(Object.keys(E2E_TOUCHFILES));
const tierKeys = new Set(Object.keys(E2E_TIERS));
const missingFromTiers = [...touchfileKeys].filter(k => !tierKeys.has(k));
const extraInTiers = [...tierKeys].filter(k => !touchfileKeys.has(k));
if (missingFromTiers.length > 0) {
throw new Error(
`E2E tests missing TIER entries: ${missingFromTiers.join(', ')}\n` +
`Add these to E2E_TIERS in test/helpers/touchfiles.ts`,
);
}
if (extraInTiers.length > 0) {
throw new Error(
`E2E_TIERS has extra entries not in E2E_TOUCHFILES: ${extraInTiers.join(', ')}\n` +
`Remove these from E2E_TIERS or add to E2E_TOUCHFILES`,
);
}
});
test('E2E_TIERS only contains valid tier values', () => {
const validTiers = ['gate', 'periodic'];
for (const [name, tier] of Object.entries(E2E_TIERS)) {
if (!validTiers.includes(tier)) {
throw new Error(`E2E_TIERS['${name}'] has invalid tier '${tier}'. Valid: ${validTiers.join(', ')}`);
}
}
});
test('every LLM-judge test has a TOUCHFILES entry', () => {
const llmContent = fs.readFileSync(
path.join(ROOT, 'test', 'skill-llm-eval.test.ts'),
'utf-8',
);
// Extract test names from addTest({ name: '...' }) calls
const nameRegex = /name:\s*['"`]([^'"`]+)['"`]/g;
const testNames: string[] = [];
let match;
while ((match = nameRegex.exec(llmContent)) !== null) {
testNames.push(match[1]);
}
// Deduplicate (some tests call addTest with the same name)
const unique = [...new Set(testNames)];
expect(unique.length).toBeGreaterThan(0);
const missing = unique.filter(name => !(name in LLM_JUDGE_TOUCHFILES));
if (missing.length > 0) {
throw new Error(
`LLM-judge tests missing TOUCHFILES entries: ${missing.join(', ')}\n` +
`Add these to LLM_JUDGE_TOUCHFILES in test/helpers/touchfiles.ts`,
);
}
});
});