diff --git a/test/brain-sync.test.ts b/test/brain-sync.test.ts index 6ba8e95c..6fdf4046 100644 --- a/test/brain-sync.test.ts +++ b/test/brain-sync.test.ts @@ -97,11 +97,18 @@ describe('gstack-config gbrain keys', () => { }); test('GSTACK_HOME overrides real config dir', () => { - run(['gstack-config', 'set', 'gbrain_sync_mode', 'full']); - // Real ~/.gstack/config.yaml must NOT have been touched. const realConfig = path.join(os.homedir(), '.gstack', 'config.yaml'); - const real = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : ''; - expect(real).not.toContain('gbrain_sync_mode: full'); + const before = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : null; + + run(['gstack-config', 'set', 'gbrain_sync_mode', 'full']); + + const tempConfig = fs.readFileSync(path.join(tmpHome, 'config.yaml'), 'utf-8'); + expect(tempConfig).toContain('gbrain_sync_mode: full'); + + // Real ~/.gstack/config.yaml must not be touched. It may already contain + // the same value on a developer machine, so compare contents, not strings. + const after = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : null; + expect(after).toBe(before); }); }); diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 8afc7b8e..aeafb391 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -40,6 +40,35 @@ function extractDescription(content: string): string { return description; } +function extractMarkdownSection(content: string, heading: string): string { + const escaped = heading.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const startMatch = content.match(new RegExp(`^${escaped}.*$`, 'm')); + expect(startMatch?.index).toBeDefined(); + const start = startMatch!.index!; + const afterHeading = start + startMatch![0].length; + const nextSection = content.slice(afterHeading).match(/\n## /); + const end = nextSection?.index === undefined + ? content.length + : afterHeading + nextSection.index; + return content.slice(start, end).trim(); +} + +function extractPreambleBeforeWorkflow(content: string, workflowMarkers: string[]): string { + const markerIndexes = workflowMarkers + .map(marker => content.indexOf(marker)) + .filter(index => index >= 0); + expect(markerIndexes.length).toBeGreaterThan(0); + return content.slice(0, Math.min(...markerIndexes)); +} + +function isRepoRootSymlink(candidateDir: string): boolean { + try { + return fs.realpathSync(candidateDir) === fs.realpathSync(ROOT); + } catch { + return false; + } +} + // Dynamic template discovery — matches the generator's findTemplates() behavior. // New skills automatically get test coverage without updating a static list. const ALL_SKILLS = (() => { @@ -263,6 +292,50 @@ describe('gen-skill-docs', () => { expect(content).toContain('~/.gstack/analytics'); }); + test('plan-review generated preambles stay under the Option A budget', () => { + const reviewSkills = [ + { + path: path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), + markers: ['# Mega Plan Review Mode', '## Step 0: Detect platform and base branch'], + }, + { + path: path.join(ROOT, 'plan-eng-review', 'SKILL.md'), + markers: ['# Plan Review Mode'], + }, + ]; + + // Plan skills carry the same preamble surface as other tier-≥2 skills + // (Brain Sync, Context Recovery, Routing Injection are load-bearing + // functionality, not optional). Budget is set to current size + small + // headroom; ratchet down if a future slim trims real bytes. + for (const skill of reviewSkills) { + const content = fs.readFileSync(skill.path, 'utf-8'); + const preamble = extractPreambleBeforeWorkflow(content, skill.markers); + expect(Buffer.byteLength(preamble, 'utf-8')).toBeLessThan(33_000); + } + }); + + test('voice and writing-style preamble sections stay compact', () => { + const content = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8'); + const voice = extractMarkdownSection(content, '## Voice'); + const writingStyle = extractMarkdownSection(content, '## Writing Style'); + + expect(Buffer.byteLength(voice, 'utf-8')).toBeLessThan(3_000); + expect(Buffer.byteLength(writingStyle, 'utf-8')).toBeLessThan(2_000); + }); + + test('slim voice section preserves the gstack voice contract', () => { + const content = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8'); + const voice = extractMarkdownSection(content, '## Voice'); + + expect(voice).toMatch(/lead with the point|direct/i); + expect(voice).toMatch(/file|function|line|command|real numbers/i); + expect(voice).toMatch(/user.*outcome|user.*experience|real user/i); + expect(voice).toMatch(/corporate|academic|PR|hype/i); + expect(voice).toMatch(/AI vocabulary|delve|crucial|robust/i); + expect(voice).toMatch(/user decides|user.*context|sovereignty|recommendation, not a decision/i); + }); + test('preamble .pending-* glob is zsh-safe (uses find, not shell glob)', () => { for (const skill of ALL_SKILLS) { const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8'); @@ -1964,13 +2037,13 @@ describe('Parameterized host smoke tests', () => { expect(skills.length).toBeGreaterThan(0); }); - test('no .claude/skills path leakage in non-root skills', () => { + test('no .claude/skills path leakage outside repo-root sidecar symlinks', () => { if (!fs.existsSync(hostDir)) return; // skip if not generated const skills = fs.readdirSync(hostDir); for (const skill of skills) { - // Skip root gstack skill — it contains preamble with intentional .claude/skills - // fallback paths for binary lookup and skill prefix instructions - if (skill === 'gstack') continue; + // Dev installs may mount the repo root at host/skills/gstack as a runtime + // sidecar. The generator skips that symlink loop, so leakage checks should too. + if (isRepoRootSymlink(path.join(hostDir, skill))) continue; const skillMd = path.join(hostDir, skill, 'SKILL.md'); if (!fs.existsSync(skillMd)) continue; const content = fs.readFileSync(skillMd, 'utf-8'); diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts index 625bc0a1..99201089 100644 --- a/test/skill-validation.test.ts +++ b/test/skill-validation.test.ts @@ -800,9 +800,8 @@ describe('Enum & Value Completeness in review checklist', () => { describe('Completeness Principle in generated SKILL.md files', () => { const skillsWithPreamble = [ - 'SKILL.md', 'browse/SKILL.md', 'qa/SKILL.md', + 'qa/SKILL.md', 'qa-only/SKILL.md', - 'setup-browser-cookies/SKILL.md', 'ship/SKILL.md', 'review/SKILL.md', 'plan-ceo-review/SKILL.md', 'plan-eng-review/SKILL.md', 'retro/SKILL.md', @@ -820,11 +819,12 @@ describe('Completeness Principle in generated SKILL.md files', () => { }); } - test('Completeness Principle includes compression table in tier 2+ skills', () => { - // Root is tier 1 (no completeness). Check tier 2+ skill. + test('Completeness Principle keeps compact scoring guidance in tier 2+ skills', () => { const content = fs.readFileSync(path.join(ROOT, 'cso', 'SKILL.md'), 'utf-8'); - expect(content).toContain('CC+gstack'); - expect(content).toContain('Compression'); + expect(content).toContain('Completeness: X/10'); + expect(content).toContain('10 = all edge cases'); + expect(content).toContain('Note: options differ in kind, not coverage'); + expect(content).toContain('Do not fabricate scores'); }); }); @@ -1634,7 +1634,13 @@ describe('no compiled binaries in git', () => { test('git tracks no files larger than 2MB', () => { // Pure fs.statSync — no shell spawn per file. const MAX_BYTES = 2 * 1024 * 1024; + const knownLargeFixtures = new Set([ + // Deterministic replay fixture for BrowseSafe-Bench. The live bench is + // expensive; this file is intentionally committed so the gate is free. + 'browse/test/fixtures/security-bench-haiku-responses.json', + ]); const oversized = trackedFiles.filter((f: string) => { + if (knownLargeFixtures.has(f)) return false; const full = path.join(ROOT, f); try { return fs.statSync(full).size > MAX_BYTES; diff --git a/test/writing-style-resolver.test.ts b/test/writing-style-resolver.test.ts index aa12e4f8..fce957c2 100644 --- a/test/writing-style-resolver.test.ts +++ b/test/writing-style-resolver.test.ts @@ -8,7 +8,7 @@ * * What this test enforces: * - Writing Style section header present in tier-≥2 generated preamble - * - All 6 writing rules present (gloss, outcome, short, impact, first-use, override) + * - Compact semantic contract present (gloss, outcome, impact, override) * - Jargon list inlined (sample terms appear) * - Terse-mode gate condition text present * - Codex output uses $GSTACK_BIN, not ~/.claude/... (host-aware paths) @@ -41,21 +41,12 @@ describe('Writing Style preamble section', () => { expect(out).toContain('EXPLAIN_LEVEL:'); }); - test('tier 2+ preamble includes all 6 writing rules', () => { + test('tier 2+ preamble includes the compact writing-style contract', () => { const out = generatePreamble(makeCtx('claude', 2)); - // Rule 1: jargon-gloss on first use - expect(out).toContain('gloss on first use'); - // Rule 2: outcome framing - expect(out).toMatch(/outcome terms/); - // Rule 3: short sentences / concrete nouns / active voice - expect(out).toContain('Short sentences'); - expect(out.toLowerCase()).toContain('active voice'); - // Rule 4: close with user impact - expect(out).toMatch(/user impact/); - // Rule 5: unconditional first-use gloss (even if user pasted term) - expect(out).toMatch(/paste.*jargon|paste.*term/i); - // Rule 6: user-turn override - expect(out).toMatch(/user-turn override|user's own current message|user's in-turn/i); + expect(out).toMatch(/gloss.*first use|first-use.*gloss/i); + expect(out).toMatch(/outcome/i); + expect(out).toMatch(/user impact|user.*experience|what.*user.*sees/i); + expect(out).toMatch(/terse|no explanations|user-turn override|current message/i); }); test('tier 2+ preamble inlines jargon list', () => {