test(ship): manifest-consistency + context-parity + requiredReads helper (T9)

Free deterministic guards for the carve:
- required-reads.ts + unit test: assertRequiredReads(run, requiredFiles) — the
  mechanical layer-5 check that the agent Read the sections its situation needs
  (required set comes from the fixture, not the passive manifest)
- section-manifest-consistency: 3-tier orphan classification (generated orphan +
  hand-edited generated file → FAIL; manifest orphan → WARN per v2_PLAN.md) and
  pins the PASSIVE-manifest contract (no applies_when/required_for)
- template-context-parity: generated sections have zero unresolved placeholders
  and gated resolvers (ADVERSARIAL_STEP/CONFIDENCE_CALIBRATION/CHANGELOG_WORKFLOW)
  rendered — proving sections resolve with the parent skillName, not 'sections'

16 tests, all green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-05-30 10:11:59 -07:00
parent 261d57a8e3
commit 924025a59c
4 changed files with 216 additions and 0 deletions
+40
View File
@@ -0,0 +1,40 @@
/**
* requiredReads enforcement (v2 plan T9, mitigation layer 5 — the only CI-failing
* layer against silent section-skip).
*
* Given a /ship run's tool calls and the set of section files the run's SITUATION
* required, assert the agent actually Read each one. The required set comes from
* the TEST FIXTURE (which situation it set up), NOT from the manifest — the
* manifest is passive (CM2). This keeps "when is a section required" in exactly
* one machine-checkable place: the eval fixtures.
*
* Builds on extractSectionReads from transcript-section-logger so section-path
* matching (the `/sections/<file>.md` segment, host-layout agnostic) lives in one
* place.
*/
import { extractSectionReads, type TranscriptResultLike } from './transcript-section-logger';
export interface RequiredReadsResult {
required: string[];
read: string[];
missing: string[];
ok: boolean;
}
/**
* @param result the skill run (anything with toolCalls)
* @param requiredFiles section basenames the situation required, e.g.
* ['version-bump.md','changelog.md'] (or with a sections/
* prefix — normalized to basename here)
*/
export function assertRequiredReads(
result: TranscriptResultLike,
requiredFiles: string[],
): RequiredReadsResult {
const read = extractSectionReads(result);
const readSet = new Set(read);
const required = requiredFiles.map(f => f.replace(/^.*\//, '')); // tolerate sections/<f>
const missing = required.filter(f => !readSet.has(f));
return { required, read, missing, ok: missing.length === 0 };
}
+41
View File
@@ -0,0 +1,41 @@
/**
* Unit tests for assertRequiredReads (v2 plan T9 mitigation layer 5). Pure logic
* over synthetic tool-call transcripts — the section-loading E2E (paid) drives
* this against real /ship runs.
*/
import { describe, test, expect } from 'bun:test';
import { assertRequiredReads } from './helpers/required-reads';
import type { ToolCallLike } from './helpers/transcript-section-logger';
const read = (fp: string): ToolCallLike => ({ tool: 'Read', input: { file_path: fp }, output: '' });
describe('assertRequiredReads', () => {
test('passes when every required section was Read', () => {
const result = {
toolCalls: [
read('/Users/x/.claude/skills/gstack/ship/sections/version-bump.md'),
read('ship/sections/changelog.md'),
],
};
const r = assertRequiredReads(result, ['version-bump.md', 'changelog.md']);
expect(r.ok).toBe(true);
expect(r.missing).toEqual([]);
});
test('flags a required section the agent never opened', () => {
const result = { toolCalls: [read('ship/sections/changelog.md')] };
const r = assertRequiredReads(result, ['version-bump.md', 'changelog.md']);
expect(r.ok).toBe(false);
expect(r.missing).toEqual(['version-bump.md']);
});
test('tolerates a sections/ prefix in the required list', () => {
const result = { toolCalls: [read('/abs/gstack/ship/sections/review-army.md')] };
expect(assertRequiredReads(result, ['sections/review-army.md']).ok).toBe(true);
});
test('empty required set always passes', () => {
expect(assertRequiredReads({ toolCalls: [] }, []).ok).toBe(true);
});
});
+77
View File
@@ -0,0 +1,77 @@
/**
* Section manifest ↔ filesystem consistency (v2 plan T9 / Phase C orphan check).
*
* Implements the 3-tier orphan classification from v2_PLAN.md:
* - generated orphan (sections/X.md with no sections/X.md.tmpl) → FAIL
* - hand-edited generated file (X.md missing the AUTO-GENERATED header) → FAIL
* - manifest orphan (sections/X.md.tmpl not listed in manifest) → WARN (v2.0)
*
* Also pins the PASSIVE-manifest contract (CM2 / v2_PLAN.md:663): manifest entries
* carry only id/file/title/trigger — no machine predicate (applies_when/required_for).
*/
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
const ROOT = path.resolve(import.meta.dir, '..');
const SHIP_SECTIONS = path.join(ROOT, 'ship', 'sections');
const manifest = JSON.parse(fs.readFileSync(path.join(SHIP_SECTIONS, 'manifest.json'), 'utf-8'));
const sectionTmpls = fs.readdirSync(SHIP_SECTIONS).filter(f => f.endsWith('.md.tmpl'));
const sectionMds = fs.readdirSync(SHIP_SECTIONS).filter(f => f.endsWith('.md') && !f.endsWith('.md.tmpl'));
describe('section manifest ↔ filesystem consistency', () => {
test('manifest parses with skill + sections array', () => {
expect(manifest.skill).toBe('ship');
expect(Array.isArray(manifest.sections)).toBe(true);
expect(manifest.sections.length).toBeGreaterThan(0);
});
test('every manifest entry has a .md.tmpl source AND a generated .md', () => {
for (const s of manifest.sections) {
expect(fs.existsSync(path.join(SHIP_SECTIONS, `${s.file}.tmpl`))).toBe(true);
expect(fs.existsSync(path.join(SHIP_SECTIONS, s.file))).toBe(true);
}
});
test('manifest is PASSIVE — no applies_when / required_for predicate (CM2)', () => {
for (const s of manifest.sections) {
expect(s).not.toHaveProperty('applies_when');
expect(s).not.toHaveProperty('required_for');
// The allowed passive shape:
expect(typeof s.id).toBe('string');
expect(typeof s.file).toBe('string');
expect(typeof s.title).toBe('string');
expect(typeof s.trigger).toBe('string');
}
});
test('no generated orphan: every sections/X.md has a sections/X.md.tmpl → FAIL', () => {
const orphans = sectionMds.filter(md => !sectionTmpls.includes(`${md}.tmpl`));
expect(orphans).toEqual([]);
});
test('no hand-edited generated file: every sections/X.md has the AUTO-GENERATED header → FAIL', () => {
for (const md of sectionMds) {
const head = fs.readFileSync(path.join(SHIP_SECTIONS, md), 'utf-8').slice(0, 120);
expect(head).toContain('AUTO-GENERATED');
}
});
test('manifest orphan check (WARN in v2.0): every .md.tmpl is listed', () => {
const listed = new Set(manifest.sections.map((s: { file: string }) => `${s.file}.tmpl`));
const unlisted = sectionTmpls.filter(t => !listed.has(t));
if (unlisted.length > 0) {
// v2_PLAN.md: WARN now, FAIL in v2.1. Surface, don't fail the build yet.
// eslint-disable-next-line no-console
console.warn(`[section-manifest] manifest orphan(s) (not in manifest.json): ${unlisted.join(', ')}`);
}
expect(unlisted.length).toBeLessThanOrEqual(unlisted.length); // always passes; WARN only
});
test('section ids are unique', () => {
const ids = manifest.sections.map((s: { id: string }) => s.id);
expect(new Set(ids).size).toBe(ids.length);
});
});
+58
View File
@@ -0,0 +1,58 @@
/**
* Section TemplateContext parity (v2 plan T9 / Codex consult absorbed-refinement #1).
*
* Section generation must use the SAME TemplateContext as the parent skill —
* crucially the same skillName, so resolver `appliesTo` gating + tier behave
* identically. If a section resolved with skillName "sections" (the bug
* processSectionTemplate guards against), gated resolvers like ADVERSARIAL_STEP /
* CONFIDENCE_CALIBRATION would render empty.
*
* We assert on the GENERATED section output: gated resolver content is present and
* no placeholder is left unresolved. That can only be true if the parent ctx
* (skillName=ship) drove the resolve.
*/
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
const ROOT = path.resolve(import.meta.dir, '..');
const SHIP_SECTIONS = path.join(ROOT, 'ship', 'sections');
function readSection(file: string): string {
return fs.readFileSync(path.join(SHIP_SECTIONS, file), 'utf-8');
}
describe('section TemplateContext parity (skillName pinned to parent)', () => {
test('no generated section has unresolved {{PLACEHOLDER}} tokens', () => {
for (const md of fs.readdirSync(SHIP_SECTIONS).filter(f => f.endsWith('.md') && !f.endsWith('.md.tmpl'))) {
const content = readSection(md);
const unresolved = content.match(/\{\{[A-Z_]+(?::[^}]+)?\}\}/g);
expect({ md, unresolved }).toEqual({ md, unresolved: null });
}
});
test('adversarial section rendered the ADVERSARIAL_STEP resolver (proves ship ctx)', () => {
const content = readSection('adversarial.md');
// The codex filesystem-boundary line only appears when ADVERSARIAL_STEP resolves.
expect(content).toContain('Do NOT read or execute any files under');
expect(content.length).toBeGreaterThan(500);
});
test('review-army section rendered CONFIDENCE_CALIBRATION + REVIEW_ARMY (gated resolvers)', () => {
const content = readSection('review-army.md');
expect(content).toContain('Confidence Calibration');
expect(content).toContain('confidence score');
});
test('tests section rendered TEST_BOOTSTRAP + TEST_FAILURE_TRIAGE', () => {
const content = readSection('tests.md');
expect(content).toContain('Test Failure Ownership Triage');
});
test('changelog section rendered CHANGELOG_WORKFLOW', () => {
const content = readSection('changelog.md');
expect(content).toContain('CHANGELOG');
expect(content.length).toBeGreaterThan(300);
});
});