test: align unit tests with slim resolvers + exempt 27MB security fixture

- test/skill-validation.test.ts: assert the slim Completeness Principle shape (Completeness: X/10, kind-note language) instead of the old Compression table. Remove the 3 tier-1 skills from the spot-check list (they intentionally don't carry the full Completeness Principle section). Exempt browse/test/fixtures/security-bench-haiku-responses.json (27MB deterministic replay fixture for BrowseSafe-Bench) from the 2MB tracked-file gate. The gate was actually failing on origin/main since the fixture was added in v1.6.4.0 — this is a side-fix to a real regression. - test/brain-sync.test.ts: developer-machine-safe assertion for GSTACK_HOME override (compare config contents before/after instead of asserting the absence of a string that may legitimately exist). - test/gen-skill-docs.test.ts: new tests for the slim — plan-review preambles stay under the post-slim budget (~33KB), Voice + Writing Style sections stay compact, and the slim Voice section preserves the load-bearing semantic contract (lead-with-the-point, name-the-file, user-outcome framing, no-corporate, no-AI-vocab, user-sovereignty). Update path-leakage scan to allow repo-root sidecar symlinks. - test/writing-style-resolver.test.ts: assert the compact contract (gloss-on-first-use, outcome-framing, user-impact, terse-mode override) instead of the old 6-numbered-rules shape. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-06 21:46:40 +02:00 · 2026-04-25 21:24:12 -07:00
parent 38f31e3b1d
commit 16b80100c6
4 changed files with 106 additions and 29 deletions
@@ -97,11 +97,18 @@ describe('gstack-config gbrain keys', () => {
  });

  test('GSTACK_HOME overrides real config dir', () => {
-    run(['gstack-config', 'set', 'gbrain_sync_mode', 'full']);
-    // Real ~/.gstack/config.yaml must NOT have been touched.
    const realConfig = path.join(os.homedir(), '.gstack', 'config.yaml');
-    const real = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : '';
-    expect(real).not.toContain('gbrain_sync_mode: full');
+    const before = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : null;
+
+    run(['gstack-config', 'set', 'gbrain_sync_mode', 'full']);
+
+    const tempConfig = fs.readFileSync(path.join(tmpHome, 'config.yaml'), 'utf-8');
+    expect(tempConfig).toContain('gbrain_sync_mode: full');
+
+    // Real ~/.gstack/config.yaml must not be touched. It may already contain
+    // the same value on a developer machine, so compare contents, not strings.
+    const after = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : null;
+    expect(after).toBe(before);
  });
 });

@@ -40,6 +40,35 @@ function extractDescription(content: string): string {
  return description;
 }

+function extractMarkdownSection(content: string, heading: string): string {
+  const escaped = heading.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+  const startMatch = content.match(new RegExp(`^${escaped}.*$`, 'm'));
+  expect(startMatch?.index).toBeDefined();
+  const start = startMatch!.index!;
+  const afterHeading = start + startMatch![0].length;
+  const nextSection = content.slice(afterHeading).match(/\n## /);
+  const end = nextSection?.index === undefined
+    ? content.length
+    : afterHeading + nextSection.index;
+  return content.slice(start, end).trim();
+}
+
+function extractPreambleBeforeWorkflow(content: string, workflowMarkers: string[]): string {
+  const markerIndexes = workflowMarkers
+    .map(marker => content.indexOf(marker))
+    .filter(index => index >= 0);
+  expect(markerIndexes.length).toBeGreaterThan(0);
+  return content.slice(0, Math.min(...markerIndexes));
+}
+
+function isRepoRootSymlink(candidateDir: string): boolean {
+  try {
+    return fs.realpathSync(candidateDir) === fs.realpathSync(ROOT);
+  } catch {
+    return false;
+  }
+}
+
 // Dynamic template discovery — matches the generator's findTemplates() behavior.
 // New skills automatically get test coverage without updating a static list.
 const ALL_SKILLS = (() => {
@@ -263,6 +292,50 @@ describe('gen-skill-docs', () => {
    expect(content).toContain('~/.gstack/analytics');
  });

+  test('plan-review generated preambles stay under the Option A budget', () => {
+    const reviewSkills = [
+      {
+        path: path.join(ROOT, 'plan-ceo-review', 'SKILL.md'),
+        markers: ['# Mega Plan Review Mode', '## Step 0: Detect platform and base branch'],
+      },
+      {
+        path: path.join(ROOT, 'plan-eng-review', 'SKILL.md'),
+        markers: ['# Plan Review Mode'],
+      },
+    ];
+
+    // Plan skills carry the same preamble surface as other tier-≥2 skills
+    // (Brain Sync, Context Recovery, Routing Injection are load-bearing
+    // functionality, not optional). Budget is set to current size + small
+    // headroom; ratchet down if a future slim trims real bytes.
+    for (const skill of reviewSkills) {
+      const content = fs.readFileSync(skill.path, 'utf-8');
+      const preamble = extractPreambleBeforeWorkflow(content, skill.markers);
+      expect(Buffer.byteLength(preamble, 'utf-8')).toBeLessThan(33_000);
+    }
+  });
+
+  test('voice and writing-style preamble sections stay compact', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8');
+    const voice = extractMarkdownSection(content, '## Voice');
+    const writingStyle = extractMarkdownSection(content, '## Writing Style');
+
+    expect(Buffer.byteLength(voice, 'utf-8')).toBeLessThan(3_000);
+    expect(Buffer.byteLength(writingStyle, 'utf-8')).toBeLessThan(2_000);
+  });
+
+  test('slim voice section preserves the gstack voice contract', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8');
+    const voice = extractMarkdownSection(content, '## Voice');
+
+    expect(voice).toMatch(/lead with the point|direct/i);
+    expect(voice).toMatch(/file|function|line|command|real numbers/i);
+    expect(voice).toMatch(/user.*outcome|user.*experience|real user/i);
+    expect(voice).toMatch(/corporate|academic|PR|hype/i);
+    expect(voice).toMatch(/AI vocabulary|delve|crucial|robust/i);
+    expect(voice).toMatch(/user decides|user.*context|sovereignty|recommendation, not a decision/i);
+  });
+
  test('preamble .pending-* glob is zsh-safe (uses find, not shell glob)', () => {
    for (const skill of ALL_SKILLS) {
      const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
@@ -1964,13 +2037,13 @@ describe('Parameterized host smoke tests', () => {
        expect(skills.length).toBeGreaterThan(0);
      });

-      test('no .claude/skills path leakage in non-root skills', () => {
+      test('no .claude/skills path leakage outside repo-root sidecar symlinks', () => {
        if (!fs.existsSync(hostDir)) return; // skip if not generated
        const skills = fs.readdirSync(hostDir);
        for (const skill of skills) {
-          // Skip root gstack skill — it contains preamble with intentional .claude/skills
-          // fallback paths for binary lookup and skill prefix instructions
-          if (skill === 'gstack') continue;
+          // Dev installs may mount the repo root at host/skills/gstack as a runtime
+          // sidecar. The generator skips that symlink loop, so leakage checks should too.
+          if (isRepoRootSymlink(path.join(hostDir, skill))) continue;
          const skillMd = path.join(hostDir, skill, 'SKILL.md');
          if (!fs.existsSync(skillMd)) continue;
          const content = fs.readFileSync(skillMd, 'utf-8');
@@ -800,9 +800,8 @@ describe('Enum & Value Completeness in review checklist', () => {

 describe('Completeness Principle in generated SKILL.md files', () => {
  const skillsWithPreamble = [
-    'SKILL.md', 'browse/SKILL.md', 'qa/SKILL.md',
+    'qa/SKILL.md',
    'qa-only/SKILL.md',
-    'setup-browser-cookies/SKILL.md',
    'ship/SKILL.md', 'review/SKILL.md',
    'plan-ceo-review/SKILL.md', 'plan-eng-review/SKILL.md',
    'retro/SKILL.md',
@@ -820,11 +819,12 @@ describe('Completeness Principle in generated SKILL.md files', () => {
    });
  }

-  test('Completeness Principle includes compression table in tier 2+ skills', () => {
-    // Root is tier 1 (no completeness). Check tier 2+ skill.
+  test('Completeness Principle keeps compact scoring guidance in tier 2+ skills', () => {
    const content = fs.readFileSync(path.join(ROOT, 'cso', 'SKILL.md'), 'utf-8');
-    expect(content).toContain('CC+gstack');
-    expect(content).toContain('Compression');
+    expect(content).toContain('Completeness: X/10');
+    expect(content).toContain('10 = all edge cases');
+    expect(content).toContain('Note: options differ in kind, not coverage');
+    expect(content).toContain('Do not fabricate scores');
  });
 });

@@ -1634,7 +1634,13 @@ describe('no compiled binaries in git', () => {
  test('git tracks no files larger than 2MB', () => {
    // Pure fs.statSync — no shell spawn per file.
    const MAX_BYTES = 2 * 1024 * 1024;
+    const knownLargeFixtures = new Set([
+      // Deterministic replay fixture for BrowseSafe-Bench. The live bench is
+      // expensive; this file is intentionally committed so the gate is free.
+      'browse/test/fixtures/security-bench-haiku-responses.json',
+    ]);
    const oversized = trackedFiles.filter((f: string) => {
+      if (knownLargeFixtures.has(f)) return false;
      const full = path.join(ROOT, f);
      try {
        return fs.statSync(full).size > MAX_BYTES;
@@ -8,7 +8,7 @@
 *
 * What this test enforces:
 * - Writing Style section header present in tier-≥2 generated preamble
- * - All 6 writing rules present (gloss, outcome, short, impact, first-use, override)
+ * - Compact semantic contract present (gloss, outcome, impact, override)
 * - Jargon list inlined (sample terms appear)
 * - Terse-mode gate condition text present
 * - Codex output uses $GSTACK_BIN, not ~/.claude/... (host-aware paths)
@@ -41,21 +41,12 @@ describe('Writing Style preamble section', () => {
    expect(out).toContain('EXPLAIN_LEVEL:');
  });

-  test('tier 2+ preamble includes all 6 writing rules', () => {
+  test('tier 2+ preamble includes the compact writing-style contract', () => {
    const out = generatePreamble(makeCtx('claude', 2));
-    // Rule 1: jargon-gloss on first use
-    expect(out).toContain('gloss on first use');
-    // Rule 2: outcome framing
-    expect(out).toMatch(/outcome terms/);
-    // Rule 3: short sentences / concrete nouns / active voice
-    expect(out).toContain('Short sentences');
-    expect(out.toLowerCase()).toContain('active voice');
-    // Rule 4: close with user impact
-    expect(out).toMatch(/user impact/);
-    // Rule 5: unconditional first-use gloss (even if user pasted term)
-    expect(out).toMatch(/paste.*jargon|paste.*term/i);
-    // Rule 6: user-turn override
-    expect(out).toMatch(/user-turn override|user's own current message|user's in-turn/i);
+    expect(out).toMatch(/gloss.*first use|first-use.*gloss/i);
+    expect(out).toMatch(/outcome/i);
+    expect(out).toMatch(/user impact|user.*experience|what.*user.*sees/i);
+    expect(out).toMatch(/terse|no explanations|user-turn override|current message/i);
  });

  test('tier 2+ preamble inlines jargon list', () => {