Files
gstack/test/helpers/carve-guards.ts
T
Garry Tan da9f6852ea test: canonical CARVE_GUARDS registry; derive parity + size-budget from it
Single source of truth for the carved-skill set + per-skill invariants
(EQ1). parity-harness.ts sectioned entries and skill-size-budget.ts
SECTIONS_EXTRACTED now derive from it instead of hand-maintained lists.
Closes a pre-existing drift: plan-devex-review was in SECTIONS_EXTRACTED
but had no sectioned parity invariant; now generated. carve-guards.ts is
a pure leaf data module (import type only) to avoid an import cycle.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-07 17:52:02 -07:00

201 lines
8.6 KiB
TypeScript

/**
* Canonical carved-skill guard registry — the single source of truth for which
* skills are carved (skeleton SKILL.md + on-demand sections/*.md) and what each
* carve must guarantee.
*
* PURE LEAF DATA MODULE (codex outside-voice #1, refined-plan pass): this file
* has NO runtime imports — `import type` only. parity-harness.ts and
* skill-size-budget.test.ts derive their carved-skill lists FROM here (no
* parallel hand-maintained lists), so a runtime import back into either of them
* would create a cycle. Keep it data.
*
* Consumers:
* - test/carve-section-ordering.test.ts (E2, gate) → staticInvariants
* - test/carve-section-loading.test.ts (T2, periodic) → requiredReads + scenario
* - test/carve-guard-completeness.test.ts (E1, gate) → the set must equal the
* filesystem carved set
* - test/carve-guards-negative.test.ts (ET1, gate) → injects a broken fixture
* - test/helpers/parity-harness.ts → sectioned/maxSkeletonBytes/minBytes/mustContain
* - test/skill-size-budget.test.ts → SECTIONS_EXTRACTED = CARVED_SKILLS
*
* Adding a carve = add one entry here (atomically, in the same commit as the
* skeleton + manifest + sections — codex #4 — so E1's bidirectional parity never
* false-positives mid-commit).
*/
/** Static (skeleton-shape) invariants the per-PR ordering guard (E2) asserts. */
export interface CarveStaticInvariants {
/**
* Substrings that MUST remain in the always-loaded skeleton. Empty = skip
* (the skill has no distinctive pre-STOP anchor worth pinning beyond the
* universal STOP/section-index checks E2 already runs).
*/
mustStayInSkeleton: string[];
/**
* Substrings that MUST be in the union (skeleton + sections) but MUST NOT be in
* the skeleton — i.e. the heavy body that the carve relocated. Empty = skip.
*/
mustMoveToSection: string[];
/**
* If set, this marker must appear in the skeleton AFTER the last STOP-Read
* directive (e.g. the EXIT PLAN MODE GATE that fires once section work returns).
* Undefined = the skill has no post-STOP gate (operational/conversational carve).
*/
gateAfterStop?: string;
}
export interface CarveGuard {
skill: string;
/** Section .md filenames the manifest lists and the skeleton must STOP-Read. */
expectedSections: string[];
/**
* Sections the behavioral test (T2) asserts the agent actually Read when driven
* by `scenario`. A non-empty subset of expectedSections — the ones the scenario
* is built to require. The registry owns this so "registered ⇒ asserted" is
* structural (codex #2), not policed.
*/
requiredReads: string[];
/**
* Fixture prompt that drives a real `claude -p` run down the STOP-Read path for
* this skill (codex #7). The behavioral test asserts the run reached the STOP
* (read requiredReads), not merely that nothing was read.
*/
scenario: string;
staticInvariants: CarveStaticInvariants;
/**
* How the behavioral guard (T2) exercises this skill:
* - 'plan' → write a PLAN.md fixture, run the review against it
* - 'prompt' → no fixture file; the scenario prompt alone drives the run
* - 'external' → covered by a dedicated bespoke test (complex fixtures, e.g.
* ship's git/VERSION/CHANGELOG state). The data-driven loop
* skips it; E1 asserts `externalTest` exists instead.
*/
behavioral: 'plan' | 'prompt' | 'external';
/** Required when behavioral === 'external': path (repo-relative) to the dedicated test. */
externalTest?: string;
/** Parity: max bytes for the always-loaded skeleton (asserts the carve shrank it). */
maxSkeletonBytes: number;
/** Parity: min bytes for the skeleton+sections union (total behavior preserved). */
minUnionBytes: number;
/** Parity: content phrases the union must preserve. */
mustContain: string[];
}
export const CARVE_GUARDS: Record<string, CarveGuard> = {
ship: {
skill: 'ship',
expectedSections: [
'tests.md',
'test-coverage.md',
'plan-completion.md',
'review-army.md',
'greptile.md',
'adversarial.md',
'changelog.md',
'pr-body.md',
],
requiredReads: ['review-army.md', 'changelog.md'],
scenario:
'This is a FRESH version-changing ship: the branch has a real code change, VERSION still equals the base version (needs a bump), and CHANGELOG.md needs a new entry. Follow the skill flow for a version-changing ship: run the pre-landing review and prepare the CHANGELOG entry. Produce the ship plan / review report. Do NOT actually commit, push, or open a PR.',
staticInvariants: {
mustStayInSkeleton: [],
mustMoveToSection: [],
// ship is operational (multi-STOP, not a plan review); no single post-STOP gate.
gateAfterStop: undefined,
},
behavioral: 'external',
externalTest: 'test/skill-e2e-ship-section-loading.test.ts',
maxSkeletonBytes: 90_000,
minUnionBytes: 120_000,
mustContain: ['VERSION', 'CHANGELOG', 'review', 'merge', 'PR'],
},
'plan-ceo-review': {
skill: 'plan-ceo-review',
expectedSections: ['review-sections.md'],
requiredReads: ['review-sections.md'],
scenario:
'Review the plan in PLAN.md. Hold the current scope (HOLD SCOPE mode) — do not challenge or expand scope. Run the full CEO review and produce the review report.',
staticInvariants: {
mustStayInSkeleton: ['## Step 0: Nuclear Scope Challenge'],
mustMoveToSection: ['### Section 1: Architecture Review', '## Mode Quick Reference'],
gateAfterStop: 'EXIT PLAN MODE GATE',
},
behavioral: 'external',
externalTest: 'test/skill-e2e-plan-ceo-review-section-loading.test.ts',
maxSkeletonBytes: 90_000,
minUnionBytes: 80_000,
mustContain: ['SCOPE EXPANSION', 'SELECTIVE EXPANSION', 'HOLD SCOPE', 'SCOPE REDUCTION'],
},
'plan-eng-review': {
skill: 'plan-eng-review',
expectedSections: ['review-sections.md'],
requiredReads: ['review-sections.md'],
scenario:
'Review the plan in PLAN.md. Accept the current scope. Run the full engineering review (architecture, code quality, tests, performance) and produce the review report.',
staticInvariants: {
mustStayInSkeleton: ['### Step 0: Scope Challenge'],
mustMoveToSection: ['### 1. Architecture review'],
gateAfterStop: 'EXIT PLAN MODE GATE',
},
behavioral: 'plan',
maxSkeletonBytes: 62_000,
minUnionBytes: 70_000,
mustContain: ['Architecture', 'Code Quality', 'Test', 'Performance'],
},
'plan-design-review': {
skill: 'plan-design-review',
expectedSections: ['review-sections.md'],
requiredReads: ['review-sections.md'],
scenario:
'Review the plan in PLAN.md for design and UX. Accept the current scope. Run the full design review passes and produce the review report.',
staticInvariants: {
mustStayInSkeleton: [],
mustMoveToSection: ['### Pass 1: Information Architecture'],
gateAfterStop: 'EXIT PLAN MODE GATE',
},
behavioral: 'plan',
maxSkeletonBytes: 82_000,
minUnionBytes: 70_000,
mustContain: ['design', 'visual'],
},
'plan-devex-review': {
skill: 'plan-devex-review',
expectedSections: ['review-sections.md'],
requiredReads: ['review-sections.md'],
scenario:
'Review the plan in PLAN.md for developer experience. Accept the current scope. Run the full DX review passes and produce the review report.',
staticInvariants: {
mustStayInSkeleton: [],
mustMoveToSection: ['### Pass 1: Getting Started Experience'],
gateAfterStop: 'EXIT PLAN MODE GATE',
},
behavioral: 'plan',
maxSkeletonBytes: 76_000,
minUnionBytes: 70_000,
mustContain: ['developer experience', 'Getting Started'],
},
'office-hours': {
skill: 'office-hours',
expectedSections: ['design-and-handoff.md'],
requiredReads: ['design-and-handoff.md'],
scenario:
'Run office hours for this product idea through to the end: have the diagnostic conversation, explore alternatives, then write the design doc and run the relationship handoff (Phases 5-6).',
staticInvariants: {
mustStayInSkeleton: [],
mustMoveToSection: [],
// office-hours is conversational; the design-doc/handoff section has no
// post-STOP review gate in the skeleton.
gateAfterStop: undefined,
},
behavioral: 'prompt',
maxSkeletonBytes: 96_000,
minUnionBytes: 70_000,
mustContain: ['design doc', 'problem statement'],
},
};
/** Sorted carved-skill names. Consumers derive their lists from this — no parallel lists. */
export const CARVED_SKILLS: readonly string[] = Object.freeze(
Object.keys(CARVE_GUARDS).sort(),
);