test: V1 gate coverage — writing-style resolver + config + jargon + migration + dormancy

Six new gate-tier test files: - test/writing-style-resolver.test.ts — asserts Writing Style section is injected into tier-≥2 preamble, all 6 rules present, jargon list inlined, terse-mode gate condition present, Codex output uses \$GSTACK_BIN (not ~/.claude/), tier-1 does NOT get the section, migration-prompt block present. - test/explain-level-config.test.ts — gstack-config set/get round-trip for default + terse, unknown-value warns + defaults to default, header documents the key, round-trip across set→set→get. - test/jargon-list.test.ts — shape + ~50 terms + no duplicates (case-insensitive) + includes canonical high-signal terms. - test/v0-dormancy.test.ts — 5D dimension names + archetype names forbidden in default-mode tier-≥2 SKILL.md output, except for plan-tune and office-hours where they're load-bearing. - test/readme-throughput.test.ts — script replaces anchor with number on happy path, writes PENDING marker when JSON missing, CI gate asserts committed README contains no PENDING string. - test/upgrade-migration-v1.test.ts — fresh run writes pending flag, idempotent after user-answered, pre-existing explain_level counts as answered. All 95 V1 test-expect() calls pass. Full suite: 0 failures. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 03:35:09 +02:00 · 2026-04-18 11:39:24 +08:00
parent dfc091fca1
commit 00a7a65026
6 changed files with 524 additions and 0 deletions
@@ -0,0 +1,83 @@
+/**
+ * gstack-config explain_level round-trip + validation tests.
+ *
+ * Coverage:
+ * - `set explain_level default` persists, `get` returns "default"
+ * - `set explain_level terse` persists, `get` returns "terse"
+ * - `set explain_level garbage` warns + writes "default"
+ * - `get explain_level` with unset key returns empty (preamble bash defaults)
+ * - Annotated config header documents explain_level
+ */
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { spawnSync } from 'child_process';
+
+const ROOT = path.resolve(import.meta.dir, '..');
+const BIN_CONFIG = path.join(ROOT, 'bin', 'gstack-config');
+
+let tmpHome: string;
+
+beforeEach(() => {
+  tmpHome = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-cfg-test-'));
+});
+
+afterEach(() => {
+  fs.rmSync(tmpHome, { recursive: true, force: true });
+});
+
+function run(...args: string[]): { stdout: string; stderr: string; status: number } {
+  const res = spawnSync(BIN_CONFIG, args, {
+    env: { ...process.env, GSTACK_STATE_DIR: tmpHome },
+    encoding: 'utf-8',
+    cwd: ROOT,
+  });
+  return {
+    stdout: (res.stdout ?? '').trim(),
+    stderr: (res.stderr ?? '').trim(),
+    status: res.status ?? -1,
+  };
+}
+
+describe('gstack-config explain_level', () => {
+  test('set + get default round-trip', () => {
+    expect(run('set', 'explain_level', 'default').status).toBe(0);
+    expect(run('get', 'explain_level').stdout).toBe('default');
+  });
+
+  test('set + get terse round-trip', () => {
+    expect(run('set', 'explain_level', 'terse').status).toBe(0);
+    expect(run('get', 'explain_level').stdout).toBe('terse');
+  });
+
+  test('unknown value warns and defaults to default', () => {
+    const result = run('set', 'explain_level', 'garbage');
+    expect(result.status).toBe(0);
+    expect(result.stderr).toContain('not recognized');
+    expect(result.stderr).toContain('default, terse');
+    expect(run('get', 'explain_level').stdout).toBe('default');
+  });
+
+  test('get with unset explain_level returns empty (preamble default takes over)', () => {
+    // No prior set → no config file → empty output
+    expect(run('get', 'explain_level').stdout).toBe('');
+  });
+
+  test('config header documents explain_level', () => {
+    // Trigger file creation with any set
+    run('set', 'explain_level', 'default');
+    const cfg = fs.readFileSync(path.join(tmpHome, 'config.yaml'), 'utf-8');
+    expect(cfg).toContain('explain_level');
+    expect(cfg).toContain('default');
+    expect(cfg).toContain('terse');
+  });
+
+  test('set terse, then set garbage restores default', () => {
+    run('set', 'explain_level', 'terse');
+    expect(run('get', 'explain_level').stdout).toBe('terse');
+    const garbage = run('set', 'explain_level', 'nonsense');
+    expect(garbage.stderr).toContain('not recognized');
+    expect(run('get', 'explain_level').stdout).toBe('default');
+  });
+});
@@ -0,0 +1,61 @@
+/**
+ * scripts/jargon-list.json — shape + content validation.
+ *
+ * This file is baked into generated SKILL.md prose at gen-skill-docs time.
+ * Tests assert: valid JSON, expected shape, ~50 terms, no duplicates, no empty strings.
+ */
+import { describe, test, expect } from 'bun:test';
+import * as fs from 'fs';
+import * as path from 'path';
+
+const ROOT = path.resolve(import.meta.dir, '..');
+const JARGON_PATH = path.join(ROOT, 'scripts', 'jargon-list.json');
+
+describe('jargon-list.json', () => {
+  test('file exists + parses as JSON', () => {
+    expect(fs.existsSync(JARGON_PATH)).toBe(true);
+    expect(() => JSON.parse(fs.readFileSync(JARGON_PATH, 'utf-8'))).not.toThrow();
+  });
+
+  test('has expected top-level shape', () => {
+    const data = JSON.parse(fs.readFileSync(JARGON_PATH, 'utf-8'));
+    expect(data).toHaveProperty('version');
+    expect(data).toHaveProperty('description');
+    expect(data).toHaveProperty('terms');
+    expect(Array.isArray(data.terms)).toBe(true);
+    expect(typeof data.version).toBe('number');
+  });
+
+  test('contains ~50 terms (±20 tolerance)', () => {
+    const data = JSON.parse(fs.readFileSync(JARGON_PATH, 'utf-8'));
+    expect(data.terms.length).toBeGreaterThanOrEqual(30);
+    expect(data.terms.length).toBeLessThanOrEqual(80);
+  });
+
+  test('all terms are non-empty strings', () => {
+    const data = JSON.parse(fs.readFileSync(JARGON_PATH, 'utf-8'));
+    for (const t of data.terms) {
+      expect(typeof t).toBe('string');
+      expect(t.trim().length).toBeGreaterThan(0);
+    }
+  });
+
+  test('no duplicate terms (case-insensitive)', () => {
+    const data = JSON.parse(fs.readFileSync(JARGON_PATH, 'utf-8'));
+    const seen = new Set<string>();
+    for (const t of data.terms) {
+      const key = t.toLowerCase();
+      expect(seen.has(key)).toBe(false);
+      seen.add(key);
+    }
+  });
+
+  test('includes common high-signal terms', () => {
+    const data = JSON.parse(fs.readFileSync(JARGON_PATH, 'utf-8'));
+    const terms = new Set(data.terms.map((t: string) => t.toLowerCase()));
+    // Sanity: the list should include some canonical gstack-review jargon
+    expect(terms.has('idempotent') || terms.has('idempotency')).toBe(true);
+    expect(terms.has('race condition')).toBe(true);
+    expect(terms.has('n+1') || terms.has('n+1 query')).toBe(true);
+  });
+});
@@ -0,0 +1,113 @@
+/**
+ * scripts/update-readme-throughput.ts + README anchor + CI pending-marker gate.
+ *
+ * Coverage:
+ * - Happy path: JSON present, anchor gets replaced with number + anchor preserved
+ * - Missing JSON: script writes PENDING marker, CI would reject
+ * - Invalid JSON: script errors, README untouched
+ * - CI gate: committed README must not contain PENDING marker
+ */
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { spawnSync } from 'child_process';
+
+const ROOT = path.resolve(import.meta.dir, '..');
+const SCRIPT = path.join(ROOT, 'scripts', 'update-readme-throughput.ts');
+
+const ANCHOR = '<!-- GSTACK-THROUGHPUT-PLACEHOLDER -->';
+const PENDING = 'GSTACK-THROUGHPUT-PENDING';
+
+let tmpDir: string;
+let tmpReadme: string;
+let tmpJsonPath: string;
+
+beforeEach(() => {
+  tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-readme-test-'));
+  tmpReadme = path.join(tmpDir, 'README.md');
+  fs.mkdirSync(path.join(tmpDir, 'docs'), { recursive: true });
+  tmpJsonPath = path.join(tmpDir, 'docs', 'throughput-2013-vs-2026.json');
+});
+
+afterEach(() => {
+  fs.rmSync(tmpDir, { recursive: true, force: true });
+});
+
+function runScript(cwd: string): { stdout: string; stderr: string; status: number } {
+  const res = spawnSync('bun', ['run', SCRIPT], {
+    encoding: 'utf-8',
+    cwd,
+    env: { ...process.env },
+  });
+  return {
+    stdout: (res.stdout ?? '').trim(),
+    stderr: (res.stderr ?? '').trim(),
+    status: res.status ?? -1,
+  };
+}
+
+describe('update-readme-throughput script', () => {
+  test('happy path: JSON present → anchor replaced with number', () => {
+    fs.writeFileSync(tmpReadme, `gstack hero: ${ANCHOR} 2013 pro-rata.\n`);
+    fs.writeFileSync(tmpJsonPath, JSON.stringify({
+      multiples: { logical_lines_added: 12.3 },
+    }));
+
+    const result = runScript(tmpDir);
+    expect(result.status).toBe(0);
+
+    const updated = fs.readFileSync(tmpReadme, 'utf-8');
+    expect(updated).toContain('12.3×');
+    expect(updated).toContain(ANCHOR); // anchor stays for next run
+    expect(updated).not.toContain(PENDING);
+  });
+
+  test('missing JSON: PENDING marker written (CI rejects)', () => {
+    fs.writeFileSync(tmpReadme, `gstack hero: ${ANCHOR} 2013 pro-rata.\n`);
+    // No JSON written
+
+    const result = runScript(tmpDir);
+    expect(result.status).toBe(0);
+
+    const updated = fs.readFileSync(tmpReadme, 'utf-8');
+    expect(updated).toContain(PENDING);
+    expect(updated).toContain(ANCHOR); // anchor preserved for next run
+  });
+
+  test('JSON with null multiple: PENDING marker written (honest missing state)', () => {
+    fs.writeFileSync(tmpReadme, `gstack hero: ${ANCHOR} 2013 pro-rata.\n`);
+    fs.writeFileSync(tmpJsonPath, JSON.stringify({
+      multiples: { logical_lines_added: null },
+    }));
+
+    const result = runScript(tmpDir);
+    expect(result.status).toBe(0);
+
+    const updated = fs.readFileSync(tmpReadme, 'utf-8');
+    expect(updated).toContain(PENDING);
+    expect(updated).not.toMatch(/null×/);
+  });
+
+  test('anchor already replaced: script is a no-op', () => {
+    fs.writeFileSync(tmpReadme, 'gstack hero: 7.0× already set.\n');
+    // No anchor in README → nothing to replace
+
+    const result = runScript(tmpDir);
+    expect(result.status).toBe(0);
+
+    const updated = fs.readFileSync(tmpReadme, 'utf-8');
+    expect(updated).toBe('gstack hero: 7.0× already set.\n');
+  });
+});
+
+describe('CI gate: committed README must not contain PENDING marker', () => {
+  // This is the core reason the PENDING marker exists. A commit that lands
+  // the README with the pending string means the build didn't run.
+  test('real README.md does not contain GSTACK-THROUGHPUT-PENDING', () => {
+    const readmePath = path.join(ROOT, 'README.md');
+    if (!fs.existsSync(readmePath)) return; // Fresh clone edge-case
+    const content = fs.readFileSync(readmePath, 'utf-8');
+    expect(content).not.toContain(PENDING);
+  });
+});
@@ -0,0 +1,76 @@
+/**
+ * gstack-upgrade/migrations/v1.0.0.0.sh — writing style migration.
+ *
+ * Coverage:
+ * - Fresh state: writes the pending-prompt flag
+ * - Idempotent: second run does nothing if .writing-style-prompted exists
+ * - Pre-set explain_level: counts as answered (user already decided)
+ */
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { spawnSync } from 'child_process';
+
+const ROOT = path.resolve(import.meta.dir, '..');
+const MIGRATION = path.join(ROOT, 'gstack-upgrade', 'migrations', 'v1.0.0.0.sh');
+
+let tmpHome: string;
+
+beforeEach(() => {
+  tmpHome = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-mig-test-'));
+});
+
+afterEach(() => {
+  fs.rmSync(tmpHome, { recursive: true, force: true });
+});
+
+function run(): { stdout: string; stderr: string; status: number } {
+  const res = spawnSync('bash', [MIGRATION], {
+    encoding: 'utf-8',
+    env: { ...process.env, GSTACK_HOME: tmpHome },
+  });
+  return {
+    stdout: (res.stdout ?? '').trim(),
+    stderr: (res.stderr ?? '').trim(),
+    status: res.status ?? -1,
+  };
+}
+
+describe('v1.0.0.0 upgrade migration', () => {
+  test('migration file exists and is executable', () => {
+    expect(fs.existsSync(MIGRATION)).toBe(true);
+    const stat = fs.statSync(MIGRATION);
+    // Owner execute bit should be set
+    expect(stat.mode & 0o100).toBeGreaterThan(0);
+  });
+
+  test('fresh state: writes pending-prompt flag', () => {
+    const result = run();
+    expect(result.status).toBe(0);
+    expect(fs.existsSync(path.join(tmpHome, '.writing-style-prompt-pending'))).toBe(true);
+  });
+
+  test('idempotent: second run after user answered is a no-op', () => {
+    // Simulate user answered: flag exists
+    fs.writeFileSync(path.join(tmpHome, '.writing-style-prompted'), '');
+
+    const result = run();
+    expect(result.status).toBe(0);
+    // No pending flag created
+    expect(fs.existsSync(path.join(tmpHome, '.writing-style-prompt-pending'))).toBe(false);
+  });
+
+  test('idempotent: pre-existing pending flag is not duplicated', () => {
+    // First run
+    run();
+    const firstStat = fs.statSync(path.join(tmpHome, '.writing-style-prompt-pending'));
+
+    // Second run — flag stays, no error
+    const result = run();
+    expect(result.status).toBe(0);
+    // Flag still exists; mtime may update but existence is stable
+    expect(fs.existsSync(path.join(tmpHome, '.writing-style-prompt-pending'))).toBe(true);
+    void firstStat;
+  });
+});
@@ -0,0 +1,90 @@
+/**
+ * V0 dormancy — negative tests.
+ *
+ * V1 keeps V0's psychographic machinery (5D dimensions + 8 archetypes + signal map)
+ * in code but explicitly does not surface it in default-mode skill output. This test
+ * enforces the maintenance boundary: if these strings ever appear in a generated
+ * tier-≥2 SKILL.md's normal (default-mode) content, V0 machinery has leaked.
+ *
+ * Exceptions (explicitly allowed): SKILL.md files for skills that legitimately discuss
+ * V0 machinery:
+ *   - plan-tune/ — the conversational inspection skill for /plan-tune
+ *   - office-hours/ — sets the declared profile
+ * For these, V0 vocabulary is load-bearing and must appear.
+ *
+ * All other tier-≥2 skills: 5D dim names + archetype names must NOT appear.
+ */
+import { describe, test, expect } from 'bun:test';
+import * as fs from 'fs';
+import * as path from 'path';
+
+const ROOT = path.resolve(import.meta.dir, '..');
+
+const FORBIDDEN_5D_DIMS = [
+  'scope_appetite',
+  'risk_tolerance',
+  'detail_preference',
+  'architecture_care',
+  // `autonomy` is too common a word to forbid in arbitrary skill output.
+];
+
+const FORBIDDEN_ARCHETYPE_NAMES = [
+  'Cathedral Builder',
+  'Ship-It Pragmatist',
+  'Deep Craft',
+  'Taste Maker',
+  'Solo Operator',
+  // `Consultant`, `Wedge Hunter`, `Builder-Coach` — some may appear in prose
+  // naturally; check the strictly-V0-unique phrases first.
+];
+
+// Skills that legitimately reference V0 psychographic vocabulary.
+const ALLOWED_SKILLS_WITH_V0_VOCAB = new Set([
+  'plan-tune',
+  'office-hours',
+]);
+
+function discoverTier2PlusSkillMds(): Array<{ skillName: string; mdPath: string }> {
+  const entries = fs.readdirSync(ROOT, { withFileTypes: true });
+  const results: Array<{ skillName: string; mdPath: string }> = [];
+  for (const e of entries) {
+    if (!e.isDirectory()) continue;
+    if (e.name.startsWith('.') || e.name === 'node_modules' || e.name === 'test') continue;
+    const mdPath = path.join(ROOT, e.name, 'SKILL.md');
+    const tmplPath = path.join(ROOT, e.name, 'SKILL.md.tmpl');
+    if (!fs.existsSync(mdPath) || !fs.existsSync(tmplPath)) continue;
+    // Check tier via frontmatter
+    const tmpl = fs.readFileSync(tmplPath, 'utf-8');
+    const tierMatch = tmpl.match(/preamble-tier:\s*(\d+)/);
+    const tier = tierMatch ? parseInt(tierMatch[1], 10) : 4;
+    if (tier < 2) continue;
+    results.push({ skillName: e.name, mdPath });
+  }
+  return results;
+}
+
+describe('V0 dormancy in default-mode skill output', () => {
+  const skills = discoverTier2PlusSkillMds();
+
+  for (const { skillName, mdPath } of skills) {
+    if (ALLOWED_SKILLS_WITH_V0_VOCAB.has(skillName)) continue;
+
+    test(`${skillName}/SKILL.md contains no V0 psychographic dimension names`, () => {
+      const content = fs.readFileSync(mdPath, 'utf-8');
+      for (const dim of FORBIDDEN_5D_DIMS) {
+        expect(content).not.toContain(dim);
+      }
+    });
+
+    test(`${skillName}/SKILL.md contains no V0 archetype names`, () => {
+      const content = fs.readFileSync(mdPath, 'utf-8');
+      for (const archetype of FORBIDDEN_ARCHETYPE_NAMES) {
+        expect(content).not.toContain(archetype);
+      }
+    });
+  }
+
+  test('at least 5 tier-≥2 skills were checked (sanity)', () => {
+    expect(skills.length).toBeGreaterThanOrEqual(5);
+  });
+});
@@ -0,0 +1,101 @@
+/**
+ * Writing Style preamble section — gate-tier assertions on generated prose.
+ *
+ * These tests assert the V1 Writing Style section is properly composed into
+ * tier-≥2 preamble output, in both Claude and Codex host outputs. Since the
+ * block itself is prose the agent obeys at runtime, we can't test the agent's
+ * compliance here — that's the periodic LLM-judge E2E test (to-be-added).
+ *
+ * What this test enforces:
+ * - Writing Style section header present in tier-≥2 generated preamble
+ * - All 6 writing rules present (gloss, outcome, short, impact, first-use, override)
+ * - Jargon list inlined (sample terms appear)
+ * - Terse-mode gate condition text present
+ * - Codex output uses $GSTACK_BIN, not ~/.claude/... (host-aware paths)
+ * - Tier-1 preamble does NOT include Writing Style section
+ */
+import { describe, test, expect } from 'bun:test';
+import type { TemplateContext } from '../scripts/resolvers/types';
+import { HOST_PATHS } from '../scripts/resolvers/types';
+import { generatePreamble } from '../scripts/resolvers/preamble';
+
+function makeCtx(host: 'claude' | 'codex', tier: 1 | 2 | 3 | 4): TemplateContext {
+  return {
+    skillName: 'test-skill',
+    tmplPath: 'test.tmpl',
+    host,
+    paths: HOST_PATHS[host],
+    preambleTier: tier,
+  };
+}
+
+describe('Writing Style preamble section', () => {
+  test('tier 2+ Claude preamble includes Writing Style header', () => {
+    const out = generatePreamble(makeCtx('claude', 2));
+    expect(out).toContain('## Writing Style');
+  });
+
+  test('tier 2+ preamble includes EXPLAIN_LEVEL echo in bash', () => {
+    const out = generatePreamble(makeCtx('claude', 2));
+    expect(out).toContain('_EXPLAIN_LEVEL');
+    expect(out).toContain('EXPLAIN_LEVEL:');
+  });
+
+  test('tier 2+ preamble includes all 6 writing rules', () => {
+    const out = generatePreamble(makeCtx('claude', 2));
+    // Rule 1: jargon-gloss on first use
+    expect(out).toContain('gloss on first use');
+    // Rule 2: outcome framing
+    expect(out).toMatch(/outcome terms/);
+    // Rule 3: short sentences / concrete nouns / active voice
+    expect(out).toContain('Short sentences');
+    expect(out.toLowerCase()).toContain('active voice');
+    // Rule 4: close with user impact
+    expect(out).toMatch(/user impact/);
+    // Rule 5: unconditional first-use gloss (even if user pasted term)
+    expect(out).toMatch(/paste.*jargon|paste.*term/i);
+    // Rule 6: user-turn override
+    expect(out).toMatch(/user-turn override|user's own current message|user's in-turn/i);
+  });
+
+  test('tier 2+ preamble inlines jargon list', () => {
+    const out = generatePreamble(makeCtx('claude', 2));
+    // Spot-check a few terms from scripts/jargon-list.json
+    expect(out).toContain('idempotent');
+    expect(out).toContain('race condition');
+  });
+
+  test('tier 2+ preamble includes terse-mode gate condition', () => {
+    const out = generatePreamble(makeCtx('claude', 2));
+    expect(out).toContain('EXPLAIN_LEVEL: terse');
+    expect(out).toMatch(/skip.*terse|Terse mode.*skip/is);
+  });
+
+  test('Codex tier-2 preamble uses host-aware path (no .claude/)', () => {
+    const out = generatePreamble(makeCtx('codex', 2));
+    // The Writing Style section shouldn't reference a Claude-specific bin path.
+    // Specifically check the EXPLAIN_LEVEL bash line.
+    const explainLine = out.split('\n').find(l => l.includes('_EXPLAIN_LEVEL='));
+    expect(explainLine).toBeDefined();
+    expect(explainLine).not.toMatch(/~\/\.claude\//);
+    // Codex uses $GSTACK_BIN
+    expect(explainLine).toContain('$GSTACK_BIN');
+  });
+
+  test('tier 1 preamble does NOT include Writing Style section', () => {
+    const out = generatePreamble(makeCtx('claude', 1));
+    expect(out).not.toContain('## Writing Style');
+  });
+
+  test('tier 2+ preamble composition note references AskUserQuestion Format', () => {
+    const out = generatePreamble(makeCtx('claude', 2));
+    // The Writing Style section should explicitly compose with the existing Format section
+    expect(out).toContain('AskUserQuestion Format');
+  });
+
+  test('tier 2+ preamble migration-prompt block appears', () => {
+    const out = generatePreamble(makeCtx('claude', 2));
+    expect(out).toContain('WRITING_STYLE_PENDING');
+    expect(out).toMatch(/writing-style-prompt-pending/);
+  });
+});