mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-29 05:00:01 +02:00
8b94e6d993
Three untested surfaces from the v1.46.0.0 work. All three would have caught real bugs we shipped (and fixed) on this branch. 1. test/helpers/budget-override.test.ts — 7 tests pin the audit-trail contract for EVALS_BUDGET_OVERRIDE_REASON and GSTACK_SIZE_BUDGET_OVERRIDE_REASON. Without this, the audit logger could silently drop events and overrides become invisible. Tests cover: required fields per JSONL line, CI provenance capture (CI/GITHUB_ACTIONS/branch/commit), local-runner defaults, append-only behavior, missing-directory recovery, and unwritable- path resilience (logs warning instead of throwing). 2. test/terse-build.test.ts — 16 tests pin --explain-level=terse behavior across the 4 gated resolvers and the composed preamble. Default vs terse vs undefined-ctx all asserted. Without this, a refactor that breaks the explainLevel threading silently regresses the opt-in compression path; the runtime EXPLAIN_LEVEL: terse gate still works so users wouldn't notice. Tier-1 invariant pinned (terse-only-affects-tier-2+). 3. test/gen-skill-docs-idempotency.test.ts — 2 tests catch the class of bug behind the v1.45.0.0 timestamp flap. Two consecutive gen-skill-docs runs must produce byte-identical outputs across STABLE_OUTPUTS (proactive-suggestions.json, SKILL.md, ship/SKILL.md, plan-ceo-review/SKILL.md, office-hours/SKILL.md, gstack/llms.txt). --dry-run reports zero stale files after a fresh gen. CI freshness regressions surface as test failures BEFORE a PR is opened. Test plan: - bun test test/helpers/budget-override.test.ts: 7 pass - bun test test/terse-build.test.ts: 16 pass - bun test test/gen-skill-docs-idempotency.test.ts: 2 pass - Full focused suite (15 test files): 1179 pass, 0 fail (+45 new tests vs the pre-fill baseline of 1134) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
152 lines
6.1 KiB
TypeScript
152 lines
6.1 KiB
TypeScript
/**
|
|
* Unit tests for the terse-build flag (v1.46.0.0 T3).
|
|
*
|
|
* `--explain-level=terse` makes the gen-skill-docs pipeline drop 4 preamble
|
|
* sections at gen time. Default builds keep them. Without these tests, a
|
|
* refactor that breaks the explainLevel threading silently regresses one
|
|
* of the opt-in compression paths — the runtime EXPLAIN_LEVEL: terse runtime
|
|
* gate still works, so users wouldn't notice immediately.
|
|
*
|
|
* Pure-function tests against the resolvers — fast, free, no subprocess.
|
|
*/
|
|
|
|
import { describe, test, expect } from 'bun:test';
|
|
import type { TemplateContext } from '../scripts/resolvers/types';
|
|
import { generateWritingStyle } from '../scripts/resolvers/preamble/generate-writing-style';
|
|
import { generateCompletenessSection } from '../scripts/resolvers/preamble/generate-completeness-section';
|
|
import { generateConfusionProtocol } from '../scripts/resolvers/preamble/generate-confusion-protocol';
|
|
import { generateContextHealth } from '../scripts/resolvers/preamble/generate-context-health';
|
|
import { generatePreamble } from '../scripts/resolvers/preamble';
|
|
|
|
function makeCtx(explainLevel?: 'default' | 'terse', tier: number = 4): TemplateContext {
|
|
return {
|
|
skillName: 'test-skill',
|
|
tmplPath: '/tmp/test/SKILL.md.tmpl',
|
|
host: 'claude',
|
|
paths: {
|
|
skillRoot: '~/.claude/skills/gstack',
|
|
localSkillRoot: '.claude/skills',
|
|
binDir: '~/.claude/skills/gstack/bin',
|
|
browseDir: '~/.claude/skills/gstack/browse/dist',
|
|
designDir: '~/.claude/skills/gstack/design/dist',
|
|
makePdfDir: '~/.claude/skills/gstack/make-pdf/dist',
|
|
},
|
|
preambleTier: tier,
|
|
explainLevel,
|
|
};
|
|
}
|
|
|
|
describe('terse build — per-resolver behavior', () => {
|
|
describe('generateWritingStyle', () => {
|
|
test('default: emits full section with jargon-list pointer', () => {
|
|
const out = generateWritingStyle(makeCtx('default'));
|
|
expect(out).toContain('## Writing Style');
|
|
expect(out).toContain('jargon-list.json');
|
|
expect(out).toContain('Curated jargon list');
|
|
expect(out).toContain('outcome');
|
|
});
|
|
|
|
test('terse: emits one-line terse directive only', () => {
|
|
const out = generateWritingStyle(makeCtx('terse'));
|
|
expect(out).toContain('## Writing Style');
|
|
expect(out).toContain('Terse mode (build-time)');
|
|
// Negative: NONE of the default-mode prose
|
|
expect(out).not.toContain('jargon-list.json');
|
|
expect(out).not.toContain('Curated jargon list');
|
|
expect(out).not.toContain('Frame questions in outcome terms');
|
|
});
|
|
|
|
test('terse is meaningfully shorter than default', () => {
|
|
const fullLen = generateWritingStyle(makeCtx('default')).length;
|
|
const terseLen = generateWritingStyle(makeCtx('terse')).length;
|
|
expect(terseLen).toBeLessThan(fullLen / 3);
|
|
});
|
|
});
|
|
|
|
describe('generateCompletenessSection', () => {
|
|
test('default: emits full section with Boil-the-Lake prose', () => {
|
|
const out = generateCompletenessSection(makeCtx('default'));
|
|
expect(out).toContain('## Completeness Principle');
|
|
expect(out).toContain('Boil the Lake');
|
|
});
|
|
|
|
test('terse: returns empty string', () => {
|
|
expect(generateCompletenessSection(makeCtx('terse'))).toBe('');
|
|
});
|
|
|
|
test('no ctx arg: defaults to non-terse (back-compat with old callers)', () => {
|
|
const out = generateCompletenessSection();
|
|
expect(out).toContain('## Completeness Principle');
|
|
});
|
|
});
|
|
|
|
describe('generateConfusionProtocol', () => {
|
|
test('default: emits full section', () => {
|
|
const out = generateConfusionProtocol(makeCtx('default'));
|
|
expect(out).toContain('## Confusion Protocol');
|
|
expect(out).toContain('high-stakes ambiguity');
|
|
});
|
|
|
|
test('terse: returns empty string', () => {
|
|
expect(generateConfusionProtocol(makeCtx('terse'))).toBe('');
|
|
});
|
|
|
|
test('no ctx arg: defaults to non-terse', () => {
|
|
expect(generateConfusionProtocol()).toContain('## Confusion Protocol');
|
|
});
|
|
});
|
|
|
|
describe('generateContextHealth', () => {
|
|
test('default: emits full section', () => {
|
|
const out = generateContextHealth(makeCtx('default'));
|
|
expect(out).toContain('## Context Health');
|
|
expect(out).toContain('PROGRESS');
|
|
});
|
|
|
|
test('terse: returns empty string', () => {
|
|
expect(generateContextHealth(makeCtx('terse'))).toBe('');
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('terse build — generatePreamble integration', () => {
|
|
test('default tier-2 preamble includes all 4 terse-gated sections', () => {
|
|
const out = generatePreamble(makeCtx('default', 2));
|
|
expect(out).toContain('## Writing Style');
|
|
expect(out).toContain('## Completeness Principle');
|
|
expect(out).toContain('## Confusion Protocol');
|
|
expect(out).toContain('## Context Health');
|
|
});
|
|
|
|
test('terse tier-2 preamble drops 3 of 4 sections + collapses Writing Style', () => {
|
|
const out = generatePreamble(makeCtx('terse', 2));
|
|
// Writing Style heading still present (collapsed to one line)
|
|
expect(out).toContain('## Writing Style');
|
|
expect(out).toContain('Terse mode (build-time)');
|
|
// Three sections dropped entirely
|
|
expect(out).not.toContain('## Completeness Principle');
|
|
expect(out).not.toContain('## Confusion Protocol');
|
|
expect(out).not.toContain('## Context Health');
|
|
});
|
|
|
|
test('terse preamble is measurably smaller', () => {
|
|
const defaultLen = generatePreamble(makeCtx('default', 2)).length;
|
|
const terseLen = generatePreamble(makeCtx('terse', 2)).length;
|
|
// Saving roughly 2-4 KB across the 4 sections; assert at least 1 KB saved.
|
|
expect(defaultLen - terseLen).toBeGreaterThan(1024);
|
|
});
|
|
|
|
test('terse preamble at tier 1 is identical to default (terse only affects tier-2+ sections)', () => {
|
|
// Tier 1 doesn't include the 4 terse-gated sections in the first place.
|
|
const defaultT1 = generatePreamble(makeCtx('default', 1));
|
|
const terseT1 = generatePreamble(makeCtx('terse', 1));
|
|
expect(terseT1).toBe(defaultT1);
|
|
});
|
|
|
|
test('explainLevel undefined behaves as default', () => {
|
|
const undefinedOut = generatePreamble(makeCtx(undefined, 2));
|
|
const defaultOut = generatePreamble(makeCtx('default', 2));
|
|
expect(undefinedOut).toBe(defaultOut);
|
|
});
|
|
});
|