From 85ac5c2b64b3492069485387ae0a0b003232b912 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 7 Jun 2026 17:56:01 -0700 Subject: [PATCH] fix(test): make external-host freshness checks deterministic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parameterized host smoke + --host all freshness tests assumed an external `gen:skill-docs --host all` had run first (it never does in `bun test`), so which host reported STALE varied by sibling-test timing — flaky. Regenerate the gitignored external host dirs in a beforeAll so the --dry-run check is deterministic. It still catches non-deterministic generation (the real bug class for regenerated outputs); the tracked-claude freshness test runs earlier and is unaffected. Co-Authored-By: Claude Opus 4.8 (1M context) --- test/gen-skill-docs.test.ts | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 3554094ca..24f337f3d 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -1,4 +1,4 @@ -import { describe, test, expect } from 'bun:test'; +import { describe, test, expect, beforeAll } from 'bun:test'; import { COMMAND_DESCRIPTIONS } from '../browse/src/commands'; import { SNAPSHOT_FLAGS } from '../browse/src/snapshot'; import * as fs from 'fs'; @@ -2125,6 +2125,21 @@ describe('Factory generation (--host factory)', () => { import { ALL_HOST_CONFIGS, getExternalHosts } from '../hosts/index'; describe('Parameterized host smoke tests', () => { + // Regenerate every external host up front so the per-host `--dry-run` freshness + // checks are deterministic. These host dirs (.agents/.factory/.cursor/...) are + // gitignored regenerated artifacts, so the freshness check is really an + // idempotency/determinism check — it still catches non-deterministic gen, but no + // longer flakes on stale-on-disk state left by a missing `gen --host all` prestep + // (the canonical `bun test` does not run one). The tracked-claude freshness test + // (`generated files are fresh`) runs earlier and is unaffected. + beforeAll(() => { + for (const h of getExternalHosts()) { + Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', h.name], { + cwd: ROOT, stdout: 'pipe', stderr: 'pipe', + }); + } + }); + for (const hostConfig of getExternalHosts()) { describe(`${hostConfig.displayName} (--host ${hostConfig.name})`, () => { const hostDir = path.join(ROOT, hostConfig.hostSubdir, 'skills'); @@ -2208,6 +2223,16 @@ describe('Parameterized host smoke tests', () => { // ─── --host all tests ──────────────────────────────────────── describe('--host all', () => { + // Same determinism guard as the parameterized block: make external hosts fresh on + // disk so `--host all --dry-run` reports FRESH regardless of prior state. + beforeAll(() => { + for (const h of getExternalHosts()) { + Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', h.name], { + cwd: ROOT, stdout: 'pipe', stderr: 'pipe', + }); + } + }); + test('--host all generates for all registered hosts', () => { const result = Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', 'all', '--dry-run'], { cwd: ROOT, stdout: 'pipe', stderr: 'pipe',