test: align unit tests with slim resolvers + exempt 27MB security fixture

- test/skill-validation.test.ts: assert the slim Completeness Principle
  shape (Completeness: X/10, kind-note language) instead of the old
  Compression table. Remove the 3 tier-1 skills from the spot-check list
  (they intentionally don't carry the full Completeness Principle
  section). Exempt browse/test/fixtures/security-bench-haiku-responses.json
  (27MB deterministic replay fixture for BrowseSafe-Bench) from the 2MB
  tracked-file gate. The gate was actually failing on origin/main since
  the fixture was added in v1.6.4.0 — this is a side-fix to a real
  regression.

- test/brain-sync.test.ts: developer-machine-safe assertion for
  GSTACK_HOME override (compare config contents before/after instead of
  asserting the absence of a string that may legitimately exist).

- test/gen-skill-docs.test.ts: new tests for the slim — plan-review
  preambles stay under the post-slim budget (~33KB), Voice + Writing
  Style sections stay compact, and the slim Voice section preserves the
  load-bearing semantic contract (lead-with-the-point, name-the-file,
  user-outcome framing, no-corporate, no-AI-vocab, user-sovereignty).
  Update path-leakage scan to allow repo-root sidecar symlinks.

- test/writing-style-resolver.test.ts: assert the compact contract
  (gloss-on-first-use, outcome-framing, user-impact, terse-mode override)
  instead of the old 6-numbered-rules shape.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-25 21:24:12 -07:00
parent 38f31e3b1d
commit 16b80100c6
4 changed files with 106 additions and 29 deletions
+11 -4
View File
@@ -97,11 +97,18 @@ describe('gstack-config gbrain keys', () => {
});
test('GSTACK_HOME overrides real config dir', () => {
run(['gstack-config', 'set', 'gbrain_sync_mode', 'full']);
// Real ~/.gstack/config.yaml must NOT have been touched.
const realConfig = path.join(os.homedir(), '.gstack', 'config.yaml');
const real = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : '';
expect(real).not.toContain('gbrain_sync_mode: full');
const before = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : null;
run(['gstack-config', 'set', 'gbrain_sync_mode', 'full']);
const tempConfig = fs.readFileSync(path.join(tmpHome, 'config.yaml'), 'utf-8');
expect(tempConfig).toContain('gbrain_sync_mode: full');
// Real ~/.gstack/config.yaml must not be touched. It may already contain
// the same value on a developer machine, so compare contents, not strings.
const after = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : null;
expect(after).toBe(before);
});
});
+77 -4
View File
@@ -40,6 +40,35 @@ function extractDescription(content: string): string {
return description;
}
function extractMarkdownSection(content: string, heading: string): string {
const escaped = heading.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const startMatch = content.match(new RegExp(`^${escaped}.*$`, 'm'));
expect(startMatch?.index).toBeDefined();
const start = startMatch!.index!;
const afterHeading = start + startMatch![0].length;
const nextSection = content.slice(afterHeading).match(/\n## /);
const end = nextSection?.index === undefined
? content.length
: afterHeading + nextSection.index;
return content.slice(start, end).trim();
}
function extractPreambleBeforeWorkflow(content: string, workflowMarkers: string[]): string {
const markerIndexes = workflowMarkers
.map(marker => content.indexOf(marker))
.filter(index => index >= 0);
expect(markerIndexes.length).toBeGreaterThan(0);
return content.slice(0, Math.min(...markerIndexes));
}
function isRepoRootSymlink(candidateDir: string): boolean {
try {
return fs.realpathSync(candidateDir) === fs.realpathSync(ROOT);
} catch {
return false;
}
}
// Dynamic template discovery — matches the generator's findTemplates() behavior.
// New skills automatically get test coverage without updating a static list.
const ALL_SKILLS = (() => {
@@ -263,6 +292,50 @@ describe('gen-skill-docs', () => {
expect(content).toContain('~/.gstack/analytics');
});
test('plan-review generated preambles stay under the Option A budget', () => {
const reviewSkills = [
{
path: path.join(ROOT, 'plan-ceo-review', 'SKILL.md'),
markers: ['# Mega Plan Review Mode', '## Step 0: Detect platform and base branch'],
},
{
path: path.join(ROOT, 'plan-eng-review', 'SKILL.md'),
markers: ['# Plan Review Mode'],
},
];
// Plan skills carry the same preamble surface as other tier-≥2 skills
// (Brain Sync, Context Recovery, Routing Injection are load-bearing
// functionality, not optional). Budget is set to current size + small
// headroom; ratchet down if a future slim trims real bytes.
for (const skill of reviewSkills) {
const content = fs.readFileSync(skill.path, 'utf-8');
const preamble = extractPreambleBeforeWorkflow(content, skill.markers);
expect(Buffer.byteLength(preamble, 'utf-8')).toBeLessThan(33_000);
}
});
test('voice and writing-style preamble sections stay compact', () => {
const content = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8');
const voice = extractMarkdownSection(content, '## Voice');
const writingStyle = extractMarkdownSection(content, '## Writing Style');
expect(Buffer.byteLength(voice, 'utf-8')).toBeLessThan(3_000);
expect(Buffer.byteLength(writingStyle, 'utf-8')).toBeLessThan(2_000);
});
test('slim voice section preserves the gstack voice contract', () => {
const content = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8');
const voice = extractMarkdownSection(content, '## Voice');
expect(voice).toMatch(/lead with the point|direct/i);
expect(voice).toMatch(/file|function|line|command|real numbers/i);
expect(voice).toMatch(/user.*outcome|user.*experience|real user/i);
expect(voice).toMatch(/corporate|academic|PR|hype/i);
expect(voice).toMatch(/AI vocabulary|delve|crucial|robust/i);
expect(voice).toMatch(/user decides|user.*context|sovereignty|recommendation, not a decision/i);
});
test('preamble .pending-* glob is zsh-safe (uses find, not shell glob)', () => {
for (const skill of ALL_SKILLS) {
const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
@@ -1964,13 +2037,13 @@ describe('Parameterized host smoke tests', () => {
expect(skills.length).toBeGreaterThan(0);
});
test('no .claude/skills path leakage in non-root skills', () => {
test('no .claude/skills path leakage outside repo-root sidecar symlinks', () => {
if (!fs.existsSync(hostDir)) return; // skip if not generated
const skills = fs.readdirSync(hostDir);
for (const skill of skills) {
// Skip root gstack skill — it contains preamble with intentional .claude/skills
// fallback paths for binary lookup and skill prefix instructions
if (skill === 'gstack') continue;
// Dev installs may mount the repo root at host/skills/gstack as a runtime
// sidecar. The generator skips that symlink loop, so leakage checks should too.
if (isRepoRootSymlink(path.join(hostDir, skill))) continue;
const skillMd = path.join(hostDir, skill, 'SKILL.md');
if (!fs.existsSync(skillMd)) continue;
const content = fs.readFileSync(skillMd, 'utf-8');
+12 -6
View File
@@ -800,9 +800,8 @@ describe('Enum & Value Completeness in review checklist', () => {
describe('Completeness Principle in generated SKILL.md files', () => {
const skillsWithPreamble = [
'SKILL.md', 'browse/SKILL.md', 'qa/SKILL.md',
'qa/SKILL.md',
'qa-only/SKILL.md',
'setup-browser-cookies/SKILL.md',
'ship/SKILL.md', 'review/SKILL.md',
'plan-ceo-review/SKILL.md', 'plan-eng-review/SKILL.md',
'retro/SKILL.md',
@@ -820,11 +819,12 @@ describe('Completeness Principle in generated SKILL.md files', () => {
});
}
test('Completeness Principle includes compression table in tier 2+ skills', () => {
// Root is tier 1 (no completeness). Check tier 2+ skill.
test('Completeness Principle keeps compact scoring guidance in tier 2+ skills', () => {
const content = fs.readFileSync(path.join(ROOT, 'cso', 'SKILL.md'), 'utf-8');
expect(content).toContain('CC+gstack');
expect(content).toContain('Compression');
expect(content).toContain('Completeness: X/10');
expect(content).toContain('10 = all edge cases');
expect(content).toContain('Note: options differ in kind, not coverage');
expect(content).toContain('Do not fabricate scores');
});
});
@@ -1634,7 +1634,13 @@ describe('no compiled binaries in git', () => {
test('git tracks no files larger than 2MB', () => {
// Pure fs.statSync — no shell spawn per file.
const MAX_BYTES = 2 * 1024 * 1024;
const knownLargeFixtures = new Set([
// Deterministic replay fixture for BrowseSafe-Bench. The live bench is
// expensive; this file is intentionally committed so the gate is free.
'browse/test/fixtures/security-bench-haiku-responses.json',
]);
const oversized = trackedFiles.filter((f: string) => {
if (knownLargeFixtures.has(f)) return false;
const full = path.join(ROOT, f);
try {
return fs.statSync(full).size > MAX_BYTES;
+6 -15
View File
@@ -8,7 +8,7 @@
*
* What this test enforces:
* - Writing Style section header present in tier-≥2 generated preamble
* - All 6 writing rules present (gloss, outcome, short, impact, first-use, override)
* - Compact semantic contract present (gloss, outcome, impact, override)
* - Jargon list inlined (sample terms appear)
* - Terse-mode gate condition text present
* - Codex output uses $GSTACK_BIN, not ~/.claude/... (host-aware paths)
@@ -41,21 +41,12 @@ describe('Writing Style preamble section', () => {
expect(out).toContain('EXPLAIN_LEVEL:');
});
test('tier 2+ preamble includes all 6 writing rules', () => {
test('tier 2+ preamble includes the compact writing-style contract', () => {
const out = generatePreamble(makeCtx('claude', 2));
// Rule 1: jargon-gloss on first use
expect(out).toContain('gloss on first use');
// Rule 2: outcome framing
expect(out).toMatch(/outcome terms/);
// Rule 3: short sentences / concrete nouns / active voice
expect(out).toContain('Short sentences');
expect(out.toLowerCase()).toContain('active voice');
// Rule 4: close with user impact
expect(out).toMatch(/user impact/);
// Rule 5: unconditional first-use gloss (even if user pasted term)
expect(out).toMatch(/paste.*jargon|paste.*term/i);
// Rule 6: user-turn override
expect(out).toMatch(/user-turn override|user's own current message|user's in-turn/i);
expect(out).toMatch(/gloss.*first use|first-use.*gloss/i);
expect(out).toMatch(/outcome/i);
expect(out).toMatch(/user impact|user.*experience|what.*user.*sees/i);
expect(out).toMatch(/terse|no explanations|user-turn override|current message/i);
});
test('tier 2+ preamble inlines jargon list', () => {