mirror of
https://github.com/garrytan/gstack.git
synced 2026-07-02 22:36:00 +02:00
8b94e6d993
Three untested surfaces from the v1.46.0.0 work. All three would have caught real bugs we shipped (and fixed) on this branch. 1. test/helpers/budget-override.test.ts — 7 tests pin the audit-trail contract for EVALS_BUDGET_OVERRIDE_REASON and GSTACK_SIZE_BUDGET_OVERRIDE_REASON. Without this, the audit logger could silently drop events and overrides become invisible. Tests cover: required fields per JSONL line, CI provenance capture (CI/GITHUB_ACTIONS/branch/commit), local-runner defaults, append-only behavior, missing-directory recovery, and unwritable- path resilience (logs warning instead of throwing). 2. test/terse-build.test.ts — 16 tests pin --explain-level=terse behavior across the 4 gated resolvers and the composed preamble. Default vs terse vs undefined-ctx all asserted. Without this, a refactor that breaks the explainLevel threading silently regresses the opt-in compression path; the runtime EXPLAIN_LEVEL: terse gate still works so users wouldn't notice. Tier-1 invariant pinned (terse-only-affects-tier-2+). 3. test/gen-skill-docs-idempotency.test.ts — 2 tests catch the class of bug behind the v1.45.0.0 timestamp flap. Two consecutive gen-skill-docs runs must produce byte-identical outputs across STABLE_OUTPUTS (proactive-suggestions.json, SKILL.md, ship/SKILL.md, plan-ceo-review/SKILL.md, office-hours/SKILL.md, gstack/llms.txt). --dry-run reports zero stale files after a fresh gen. CI freshness regressions surface as test failures BEFORE a PR is opened. Test plan: - bun test test/helpers/budget-override.test.ts: 7 pass - bun test test/terse-build.test.ts: 16 pass - bun test test/gen-skill-docs-idempotency.test.ts: 2 pass - Full focused suite (15 test files): 1179 pass, 0 fail (+45 new tests vs the pre-fill baseline of 1134) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
111 lines
3.9 KiB
TypeScript
111 lines
3.9 KiB
TypeScript
/**
|
|
* Idempotency test for gen-skill-docs (regression for v1.45.0.0 timestamp flap).
|
|
*
|
|
* Running `bun run gen:skill-docs` twice in a row must produce a no-op on
|
|
* the second run: every output file is byte-identical to itself. Without
|
|
* this gate, CI freshness checks flap whenever someone introduces a
|
|
* timestamp, a random seed, or any other non-deterministic field into a
|
|
* generated artifact.
|
|
*
|
|
* v1.45.0.0 shipped with a `generated_at` ISO timestamp in
|
|
* scripts/proactive-suggestions.json that updated every run. CI freshness
|
|
* checks failed because the committed file's timestamp never matched the
|
|
* latest gen. Fixed in 43e18af4 — this test pins the contract going forward.
|
|
*
|
|
* The test pays a small cost (~2 gen-skill-docs invocations, ~3s total) but
|
|
* catches a class of bugs that's invisible until CI fails.
|
|
*/
|
|
|
|
import { describe, test, expect } from 'bun:test';
|
|
import { spawnSync } from 'child_process';
|
|
import * as fs from 'fs';
|
|
import * as path from 'path';
|
|
|
|
const REPO_ROOT = path.resolve(import.meta.dir, '..');
|
|
|
|
/** Files that gen-skill-docs writes and that must be byte-stable across runs. */
|
|
const STABLE_OUTPUTS = [
|
|
'scripts/proactive-suggestions.json',
|
|
'SKILL.md',
|
|
'ship/SKILL.md',
|
|
'plan-ceo-review/SKILL.md',
|
|
'office-hours/SKILL.md',
|
|
'gstack/llms.txt',
|
|
];
|
|
|
|
function runGen(): { exitCode: number; stderr: string } {
|
|
const result = spawnSync('bun', ['run', 'gen:skill-docs'], {
|
|
cwd: REPO_ROOT,
|
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
timeout: 60_000,
|
|
});
|
|
return {
|
|
exitCode: result.status ?? -1,
|
|
stderr: result.stderr?.toString() ?? '',
|
|
};
|
|
}
|
|
|
|
function snapshot(): Map<string, string> {
|
|
const m = new Map<string, string>();
|
|
for (const rel of STABLE_OUTPUTS) {
|
|
const full = path.join(REPO_ROOT, rel);
|
|
if (fs.existsSync(full)) {
|
|
m.set(rel, fs.readFileSync(full, 'utf-8'));
|
|
}
|
|
}
|
|
return m;
|
|
}
|
|
|
|
describe('gen-skill-docs idempotency', () => {
|
|
test('two consecutive runs produce byte-identical outputs (no flapping fields)', () => {
|
|
const firstRun = runGen();
|
|
expect(firstRun.exitCode).toBe(0);
|
|
|
|
const after1 = snapshot();
|
|
expect(after1.size).toBeGreaterThan(0);
|
|
|
|
const secondRun = runGen();
|
|
expect(secondRun.exitCode).toBe(0);
|
|
|
|
const after2 = snapshot();
|
|
|
|
// Compare each stable output byte-for-byte.
|
|
const flapping: string[] = [];
|
|
for (const [file, before] of after1.entries()) {
|
|
const now = after2.get(file);
|
|
if (now !== before) flapping.push(file);
|
|
}
|
|
|
|
if (flapping.length > 0) {
|
|
throw new Error(
|
|
`${flapping.length} file(s) changed between two consecutive gen-skill-docs runs (flapping):\n` +
|
|
flapping.map(f => ` - ${f}`).join('\n') +
|
|
`\nLikely cause: a non-deterministic field (timestamp, random ID, ` +
|
|
`filesystem-iteration order) leaked into the generated output. CI freshness ` +
|
|
`checks (git diff --exit-code) will fail unpredictably until this is fixed.`,
|
|
);
|
|
}
|
|
}, 180_000); // ~2 min budget for two gen runs
|
|
|
|
test('--dry-run after a fresh gen reports zero stale files', () => {
|
|
// Pre-condition: working tree gen must be fresh (idempotency test above ran first).
|
|
// If a contributor introduces a non-deterministic field, this dry-run reports STALE.
|
|
const result = spawnSync('bun', ['run', 'gen:skill-docs', '--dry-run'], {
|
|
cwd: REPO_ROOT,
|
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
timeout: 60_000,
|
|
});
|
|
expect(result.status).toBe(0);
|
|
const stdout = result.stdout?.toString() ?? '';
|
|
// STALE: prefix means a file would change. Count them.
|
|
const staleLines = stdout.split('\n').filter(l => l.startsWith('STALE:'));
|
|
if (staleLines.length > 0) {
|
|
throw new Error(
|
|
`--dry-run reports ${staleLines.length} stale file(s) after a fresh gen:\n` +
|
|
staleLines.map(l => ` ${l}`).join('\n') +
|
|
`\nRun \`bun run gen:skill-docs\` and commit the result.`,
|
|
);
|
|
}
|
|
}, 90_000);
|
|
});
|