Files
gstack/test/writing-style-resolver.test.ts
T
Garry Tan 2aff29e956 fix(catalog): preserve routing prose when first sentence exceeds 200 chars
splitCatalogDescription truncated the lead BEFORE computing routing
extraction, which meant skills whose first sentence was over 200 chars
(design-consultation: 207 chars) had their entire routing prose silently
dropped — the "## When to invoke" body section came out empty.

Root cause: routing was extracted via `collapsed.indexOf(lead)` after lead
was suffixed with "...". The "..." never appeared in the original string,
so indexOf returned -1 and routingProse fell back to empty.

Fix: compute routing from sentenceLead (the untruncated first sentence)
BEFORE truncating the displayed lead. The displayed lead still gets "..."
when over 200 chars, but the routing extraction uses the real boundary.

Also: refresh golden snapshots for claude/codex/factory ship and update
two unit tests that asserted v1.44 behavior:
- skill-validation.test.ts: trigger-phrase + proactive-routing tests now
  search whole content, not just frontmatter (T4 moved them to a body
  "## When to invoke" section)
- writing-style-resolver.test.ts: jargon-list assertion now expects the
  T3 reference pointer, not the inline list

Test plan:
- bun test test/skill-validation.test.ts test/writing-style-resolver.test.ts
  test/host-config.test.ts test/skill-size-budget.test.ts
  test/parity-suite.test.ts test/skill-coverage-matrix.test.ts
  test/skill-coverage-floor.test.ts test/cso-preserved.test.ts
  test/resolver-entry.test.ts test/helpers/capture-parity-baseline.test.ts
  test/gen-skill-docs.test.ts: 1134 pass, 0 fail
- Manual verify: design-consultation/SKILL.md "## When to invoke this skill"
  body section now contains "Use when asked to..." + "Proactively suggest..."

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 21:09:59 -07:00

99 lines
4.2 KiB
TypeScript

/**
* Writing Style preamble section — gate-tier assertions on generated prose.
*
* These tests assert the V1 Writing Style section is properly composed into
* tier-≥2 preamble output, in both Claude and Codex host outputs. Since the
* block itself is prose the agent obeys at runtime, we can't test the agent's
* compliance here — that's the periodic LLM-judge E2E test (to-be-added).
*
* What this test enforces:
* - Writing Style section header present in tier-≥2 generated preamble
* - Compact semantic contract present (gloss, outcome, impact, override)
* - Jargon list inlined (sample terms appear)
* - Terse-mode gate condition text present
* - Codex output uses $GSTACK_BIN, not ~/.claude/... (host-aware paths)
* - Tier-1 preamble does NOT include Writing Style section
*/
import { describe, test, expect } from 'bun:test';
import type { TemplateContext } from '../scripts/resolvers/types';
import { HOST_PATHS } from '../scripts/resolvers/types';
import { generatePreamble } from '../scripts/resolvers/preamble';
function makeCtx(host: 'claude' | 'codex', tier: 1 | 2 | 3 | 4): TemplateContext {
return {
skillName: 'test-skill',
tmplPath: 'test.tmpl',
host,
paths: HOST_PATHS[host],
preambleTier: tier,
};
}
describe('Writing Style preamble section', () => {
test('tier 2+ Claude preamble includes Writing Style header', () => {
const out = generatePreamble(makeCtx('claude', 2));
expect(out).toContain('## Writing Style');
});
test('tier 2+ preamble includes EXPLAIN_LEVEL echo in bash', () => {
const out = generatePreamble(makeCtx('claude', 2));
expect(out).toContain('_EXPLAIN_LEVEL');
expect(out).toContain('EXPLAIN_LEVEL:');
});
test('tier 2+ preamble includes the compact writing-style contract', () => {
const out = generatePreamble(makeCtx('claude', 2));
expect(out).toMatch(/gloss.*first use|first-use.*gloss/i);
expect(out).toMatch(/outcome/i);
expect(out).toMatch(/user impact|user.*experience|what.*user.*sees/i);
expect(out).toMatch(/terse|no explanations|user-turn override|current message/i);
});
test('tier 2+ preamble references jargon list by path (v1.45.0.0 T3 — pointer, not inline)', () => {
const out = generatePreamble(makeCtx('claude', 2));
// T3 dedup: the 80-term jargon list lives in scripts/jargon-list.json.
// The Writing Style section points at the file rather than inlining it,
// saving ~70 KB across the corpus. Agents Read the JSON on first
// jargon term encountered per session.
expect(out).toContain('jargon-list.json');
expect(out).toContain('Curated jargon list');
// Negative check: the literal term lines should NOT be inlined any more.
expect(out).not.toMatch(/^- idempotent$/m);
expect(out).not.toMatch(/^- race condition$/m);
});
test('tier 2+ preamble includes terse-mode gate condition', () => {
const out = generatePreamble(makeCtx('claude', 2));
expect(out).toContain('EXPLAIN_LEVEL: terse');
expect(out).toMatch(/skip.*terse|Terse mode.*skip/is);
});
test('Codex tier-2 preamble uses host-aware path (no .claude/)', () => {
const out = generatePreamble(makeCtx('codex', 2));
// The Writing Style section shouldn't reference a Claude-specific bin path.
// Specifically check the EXPLAIN_LEVEL bash line.
const explainLine = out.split('\n').find(l => l.includes('_EXPLAIN_LEVEL='));
expect(explainLine).toBeDefined();
expect(explainLine).not.toMatch(/~\/\.claude\//);
// Codex uses $GSTACK_BIN
expect(explainLine).toContain('$GSTACK_BIN');
});
test('tier 1 preamble does NOT include Writing Style section', () => {
const out = generatePreamble(makeCtx('claude', 1));
expect(out).not.toContain('## Writing Style');
});
test('tier 2+ preamble composition note references AskUserQuestion Format', () => {
const out = generatePreamble(makeCtx('claude', 2));
// The Writing Style section should explicitly compose with the existing Format section
expect(out).toContain('AskUserQuestion Format');
});
test('tier 2+ preamble migration-prompt block appears', () => {
const out = generatePreamble(makeCtx('claude', 2));
expect(out).toContain('WRITING_STYLE_PENDING');
expect(out).toMatch(/writing-style-prompt-pending/);
});
});