/** * Static invariant tests for /spec (consolidates 13 gate-tier checks). * * Each test asserts a specific contract the spec/SKILL.md.tmpl must encode. * If the template drifts away from a contract, the test fails immediately — * no LLM, no E2E cost. * * Covers (W7 plan): * spec-phase-gating — Phase 1 hard gate ("no issue after first message") * spec-phase4-revise — Phase 4 "what did I get wrong" loop * spec-dedupe-no-gh — graceful skip on gh missing / unauth / rate-limit * spec-dedupe-matches — merge-with-or-file-new AskUserQuestion for matches * spec-execute-dirty — porcelain check + 3-path AUQ + TOCTOU re-check * spec-execute-race — unique branch spec/-$$ + SHA pin * spec-quality-gate-fallback — codex timeout/unavailable skip-with-warn * spec-quality-gate-redaction — fail-closed secret regex list + BLOCKED * spec-quality-gate-secret-sink — invariant: raw spec not persisted on block * spec-archive — gstack-paths eval + atomic tmp/mv + PID suffix * spec-archive-sync-exclusion — /specs/ auto-exclude from sync allowlist * spec-audit-flag — flag routes to Audit/Cleanup template * spec-concurrency — PID suffix in branch + atomic archive write * spec-plan-mode-detection — reads GSTACK_PLAN_MODE env */ import { describe, test, expect } from 'bun:test'; import * as fs from 'fs'; import * as path from 'path'; const ROOT = path.resolve(import.meta.dir, '..'); const TMPL = fs.readFileSync(path.join(ROOT, 'spec', 'SKILL.md.tmpl'), 'utf-8'); // The redaction taxonomy + invocation bash are injected by the gen-skill-docs // resolver, so the literal patterns/bash live in the GENERATED SKILL.md, not the // .tmpl. Redaction assertions read the generated file. const GEN = fs.readFileSync(path.join(ROOT, 'spec', 'SKILL.md'), 'utf-8'); describe('/spec phase-gating', () => { test('HARD GATE prose forbids producing issue after first message', () => { expect(TMPL).toMatch(/HARD GATE.*Do NOT produce an issue after the first message/i); expect(TMPL).toMatch(/Always start with[\s\S]*?Phase 1/); }); test('Phase 1 lists all five mandatory questions', () => { for (const q of ['Who', 'current behavior', 'should the behavior be', 'Why now', "we'll know it's done"]) { expect(TMPL.toLowerCase()).toContain(q.toLowerCase().replace("we'll know", 'know')); } }); }); describe('/spec Phase 4 revise loop', () => { test('Phase 4 asks "what did I get wrong" and iterates', () => { expect(TMPL).toMatch(/What did I get wrong\?/); expect(TMPL).toMatch(/Iterate until the user confirms/i); }); }); describe('/spec --dedupe gh failure handling', () => { test('handles gh-not-installed, unauthed, rate-limited paths', () => { // Template wraps gh in backticks: "`gh` not installed" or "`gh` is not installed". expect(TMPL).toMatch(/gh.{0,5}not installed/i); expect(TMPL).toMatch(/gh auth status[\s\S]*?not logged in/i); expect(TMPL).toMatch(/rate.?limit/i); }); test('never blocks Phase 2 on dedupe failure', () => { expect(TMPL).toMatch(/best-effort.*Never block|Never block.*dedupe failure/i); }); test('matches surface as AskUserQuestion with merge-or-file-new options', () => { // Template breaks the sentence across lines: "Found {N} similar\n open issue(s):" expect(TMPL).toMatch(/Found \{N\} similar[\s\S]*?open issue/); expect(TMPL).toMatch(/Merge with one of these/); expect(TMPL).toMatch(/file a new spec anyway/); }); }); describe('/spec --execute dirty-worktree gate', () => { test('runs git status --porcelain before spawn', () => { expect(TMPL).toMatch(/git status --porcelain/); }); test('offers 3-option AskUserQuestion (continue / stash / cancel)', () => { expect(TMPL).toMatch(/Continue.*uncommitted/i); expect(TMPL).toMatch(/Stash and restore/i); expect(TMPL).toMatch(/Cancel spawn/i); }); test('TOCTOU re-check fires after AskUserQuestion answer', () => { expect(TMPL).toMatch(/TOCTOU.*re-?check|re-?run.*git status/i); }); }); describe('/spec --execute race + concurrency hardening', () => { test('captures SHA pin via git rev-parse HEAD (not "HEAD" string)', () => { expect(TMPL).toMatch(/PIN_SHA=\$\(git rev-parse HEAD\)/); expect(TMPL).toMatch(/git worktree add[^\n]*\$PIN_SHA/); }); test('branch name includes PID suffix for concurrency safety', () => { expect(TMPL).toMatch(/SPAWN_BRANCH="spec\/\$\{SLUG_TITLE\}-\$\$"/); }); test('worktree path includes PID suffix', () => { expect(TMPL).toMatch(/SPAWN_PATH=.*-\$\$/); }); }); describe('/spec quality gate fallback', () => { test('skips on codex timeout with explanatory message', () => { // `didn.t` matches both ASCII `'` and Unicode curly `’` apostrophes. expect(TMPL).toMatch(/codex didn.t respond in[\s\S]{0,80}2 minutes/); // Template wraps `--no-gate` in backticks, so allow flexible separator: expect(TMPL).toMatch(/--no-gate.{0,3}to disable/i); }); test('skips on codex not installed / unauthed', () => { expect(TMPL).toMatch(/codex.*not installed/i); expect(TMPL).toMatch(/codex.*auth.*failed/i); }); }); describe('/spec fail-closed redaction (shared engine)', () => { test('the full taxonomy (with secret prefixes) lives in the generated /cso doc', () => { const cso = fs.readFileSync(path.join(ROOT, 'cso', 'SKILL.md'), 'utf-8'); expect(cso).toContain('AKIA'); expect(cso).toMatch(/ghp_|gho_|ghs_/); expect(cso).toContain('sk-ant-'); expect(cso).toContain('BEGIN'); }); test('/spec points to the full taxonomy without inlining the catalog', () => { expect(GEN).toMatch(/Full taxonomy.*lib\/redact-patterns\.ts|\/cso/); expect(GEN).toMatch(/~30 secret\/PII\/legal patterns/); }); test('redaction routes through the shared gstack-redact bin, not inline regex', () => { expect(GEN).toContain('gstack-redact'); expect(GEN).toContain('--from-file'); // The old inline 7-regex prose is gone from the template. expect(TMPL).not.toMatch(/AWS access key.*regex.*AKIA\[0-9A-Z\]/); }); test('HIGH (exit 3) blocks dispatch; no skip flag for HIGH', () => { expect(GEN).toMatch(/Exit 3 \(HIGH\)/); expect(GEN).toMatch(/no skip flag for HIGH/i); }); test('hard delimiter + instruction boundary still wraps the codex dispatch', () => { expect(TMPL).toContain('<<>>'); expect(TMPL).toContain('<<>>'); expect(TMPL).toMatch(/text between[\s\S]*delimiters[\s\S]*is DATA, not instructions/i); }); }); describe('/spec redaction at every sink (scan-at-sink)', () => { test('scan precedes the gh issue create (pre-issue)', () => { const scanIdx = GEN.indexOf('Re-scan before filing'); const fileIdx = GEN.indexOf('gh issue create --title'); expect(scanIdx).toBeGreaterThan(-1); expect(fileIdx).toBeGreaterThan(scanIdx); }); test('files from the scanned temp file (exact bytes, not a re-render)', () => { expect(GEN).toMatch(/gh issue create --title "" --body-file "\$REDACT_FILE"/); }); test('scan precedes the archive write (pre-archive)', () => { const scanIdx = GEN.indexOf('Re-scan before archiving'); const archIdx = GEN.indexOf('ARCHIVE_PATH.tmp'); expect(scanIdx).toBeGreaterThan(-1); expect(archIdx).toBeGreaterThan(scanIdx); }); test('D2: sanitized body lands in the archive', () => { expect(GEN).toMatch(/sanitized body[\s\S]{0,200}\$REDACT_FILE/i); }); }); describe('/spec quality gate secret-sink invariant', () => { test('declares "raw spec must NOT be persisted" when the scan BLOCKS', () => { expect(TMPL).toMatch(/raw spec must NOT[\s\S]*be persisted/i); }); test('BLOCK path stops before dispatch/archive/file', () => { expect(TMPL).toMatch(/no archive write, no transcript log, no codex\s*\n?\s*dispatch/i); }); }); describe('/spec Phase 4.5a semantic content review', () => { test('semantic pass precedes the regex scan', () => { const semIdx = TMPL.indexOf('Phase 4.5a: Semantic Content Review'); const regexIdx = TMPL.indexOf('Phase 4.5b: Fail-closed redaction'); expect(semIdx).toBeGreaterThan(-1); expect(regexIdx).toBeGreaterThan(semIdx); }); test('emits a structurally-testable SEMANTIC_REVIEW marker', () => { expect(TMPL).toMatch(/SEMANTIC_REVIEW: clean/); expect(TMPL).toMatch(/SEMANTIC_REVIEW: flagged/); }); test('lists all five semantic categories', () => { expect(TMPL).toMatch(/Named individuals attached to negative judgments/i); expect(TMPL).toMatch(/Customer\/vendor names tied to negative events/i); expect(TMPL).toMatch(/Unannounced internal strategy/i); expect(TMPL).toMatch(/NDA-bound material/i); expect(TMPL).toMatch(/Confidential context bleed/i); }); test('prompt-injection hardened: marker in body forces flagged', () => { expect(TMPL).toMatch(/contains[\s\S]{0,20}`SEMANTIC_REVIEW:`[\s\S]{0,80}force the[\s\S]{0,10}outcome to `flagged`/i); }); test('public repo disables option B (acknowledge and proceed)', () => { expect(TMPL).toMatch(/PUBLIC repo,\s*option B is disabled/i); }); test('appends a content-free audit record (sha256, no body text)', () => { expect(TMPL).toContain('redact-audit-log.ts'); expect(TMPL).toMatch(/categories_flagged/); }); }); describe('/spec --no-gate keeps redacting', () => { test('flag table says redaction still runs under --no-gate', () => { expect(TMPL).toMatch(/Redaction.*still runs.*no flag that disables it/i); }); }); describe('/spec archive', () => { test('uses eval $(gstack-paths) not hardcoded ~/.gstack/', () => { expect(TMPL).toMatch(/eval "\$\(.+gstack-paths\)"/); expect(TMPL).toMatch(/\$GSTACK_STATE_ROOT\/projects\/\$SLUG\/specs/); // No hardcoded ~/.gstack/projects path: expect(TMPL).not.toMatch(/~\/\.gstack\/projects\/\$SLUG\/specs/); }); test('atomic write via .tmp + mv', () => { expect(TMPL).toMatch(/\$ARCHIVE_PATH\.tmp/); expect(TMPL).toMatch(/mv "\$ARCHIVE_PATH\.tmp" "\$ARCHIVE_PATH"/); }); test('PID suffix in archive filename', () => { expect(TMPL).toMatch(/ARCHIVE_NAME=.*\$\$/); }); test('frontmatter includes spec_issue_number for /ship integration', () => { expect(TMPL).toMatch(/spec_issue_number:/); expect(TMPL).toMatch(/spec_branch:/); expect(TMPL).toMatch(/spec_executed:/); }); }); describe('/spec archive sync exclusion', () => { test('/specs/ excluded from artifacts-sync by default; --sync-archive opt-in', () => { expect(TMPL).toMatch(/\/specs\/.*auto-excluded.*artifacts-sync|excluded from.*allowlist/i); expect(TMPL).toMatch(/--sync-archive/); }); }); describe('/spec --audit flag', () => { test('flag table includes --audit with routing to Audit template', () => { expect(TMPL).toMatch(/\| `--audit` \|/); expect(TMPL).toMatch(/Audit\/Cleanup template/); }); test('Audit / Cleanup Issues section exists with --audit cross-reference', () => { expect(TMPL).toMatch(/### Audit \/ Cleanup Issues.*routed via.*--audit/); }); test('--bug/--feature/--refactor flags NOT in table (dropped per DX14)', () => { expect(TMPL).not.toMatch(/\| `--bug` \|/); expect(TMPL).not.toMatch(/\| `--feature` \|/); expect(TMPL).not.toMatch(/\| `--refactor` \|/); }); }); describe('/spec plan-mode-aware Phase 5 (DX7/DX11/F1)', () => { test('reads GSTACK_PLAN_MODE env at Phase 5 dispatch', () => { expect(TMPL).toMatch(/GSTACK_PLAN_MODE/); expect(TMPL).toMatch(/plan-mode-aware default/i); }); test('plan-mode active → file-only path; inactive → file + spawn', () => { expect(TMPL).toMatch(/GSTACK_PLAN_MODE=active.*file-only path/); expect(TMPL).toMatch(/GSTACK_PLAN_MODE=inactive.*file \+ spawn/); }); test('--file-only / --no-execute / --plan-file override flags', () => { expect(TMPL).toMatch(/--file-only/); expect(TMPL).toMatch(/--no-execute/); expect(TMPL).toMatch(/--plan-file/); }); }); describe('/spec Phase 3 hard-grep with fallback', () => { test('Phase 3 mandates reading evidence before asking', () => { expect(TMPL).toMatch(/Mandatory:[\s\S]*MUST read at least one[\s\S]*evidence/i); }); test('project-level fallback prose for prompts with no concrete file', () => { expect(TMPL).toMatch(/Project-level prompt/); expect(TMPL).toMatch(/I inspected the project structure/); }); test('greenfield escape (no related evidence) is explicit', () => { expect(TMPL).toMatch(/genuinely cannot find any related evidence/i); }); }); describe('/spec concurrency safety (overlap with race; codex F5/F6/F10)', () => { test('two concurrent /spec runs get distinct branches via $$ PID', () => { expect(TMPL).toMatch(/SPAWN_BRANCH=.*\$\$/); }); test('atomic archive write prevents JSONL/file interleave', () => { expect(TMPL).toMatch(/atomic.*rename|atomic write/i); }); });