From 66887b2f05275f226818616ced998f11d74c009e Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Thu, 23 Apr 2026 09:13:37 -0700 Subject: [PATCH] feat(preflight): sanity check for agent-sdk + overlay resolver Verifies: SDK loads, claude-opus-4-7 is a live API model, SDKMessage event shape matches assumptions, readOverlay resolves INHERIT directives and includes expected content. Run with `bun run scripts/preflight-agent-sdk.ts`. PREFLIGHT OK on first run, $0.013 API spend. --- scripts/preflight-agent-sdk.ts | 133 +++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 scripts/preflight-agent-sdk.ts diff --git a/scripts/preflight-agent-sdk.ts b/scripts/preflight-agent-sdk.ts new file mode 100644 index 00000000..9902306c --- /dev/null +++ b/scripts/preflight-agent-sdk.ts @@ -0,0 +1,133 @@ +/** + * Preflight for the overlay efficacy harness. + * + * Confirms, before any paid eval runs: + * 1. `@anthropic-ai/claude-agent-sdk` loads and `query()` is the expected shape. + * 2. `claude-opus-4-7` is a live API model ID (not a Claude Code alias). + * 3. The SDK event stream contains the types we assume (system init, assistant, + * result) with the fields we destructure. + * 4. `scripts/resolvers/model-overlay.ts` resolves `{{INHERIT:claude}}` against + * `opus-4-7.md` AND the resolved text contains the "Fan out explicitly" nudge. + * 5. A local `claude` binary exists at `which claude` so binary pinning is possible. + * + * Run: bun run scripts/preflight-agent-sdk.ts + * + * Exit 0 on success. Exit non-zero with a clear message on any failure. No + * side effects beyond stdout and a ~15 token API call. + */ + +import { query, type SDKMessage } from '@anthropic-ai/claude-agent-sdk'; +import { readOverlay } from './resolvers/model-overlay'; +import { execSync } from 'child_process'; + +async function main() { + const failures: string[] = []; + const pass = (msg: string) => console.log(` ok ${msg}`); + const fail = (msg: string) => { + console.log(` FAIL ${msg}`); + failures.push(msg); + }; + + // 1. Overlay resolver + fanout nudge text + console.log('1. Overlay resolver'); + const resolved = readOverlay('opus-4-7'); + if (!resolved) { + fail("readOverlay('opus-4-7') returned empty"); + } else { + pass(`resolved overlay length: ${resolved.length} chars`); + if (resolved.includes('{{INHERIT:')) { + fail('resolved overlay still contains {{INHERIT:...}} directive'); + } else { + pass('no unresolved INHERIT directives'); + } + if (!/Fan out explicitly/i.test(resolved)) { + fail('resolved overlay does not contain "Fan out explicitly" text'); + } else { + pass('fanout nudge text present in resolved overlay'); + } + } + + // 2. Local claude binary exists + console.log('\n2. Binary pinning'); + let claudePath: string | null = null; + try { + claudePath = execSync('which claude', { encoding: 'utf-8' }).trim(); + pass(`local claude binary: ${claudePath}`); + } catch { + fail('`which claude` failed — cannot pin binary'); + } + + // 3. SDK query end-to-end + console.log('\n3. SDK query end-to-end'); + if (!process.env.ANTHROPIC_API_KEY) { + console.log(' skip ANTHROPIC_API_KEY not set — cannot test live query'); + } else { + try { + const events: SDKMessage[] = []; + const q = query({ + prompt: 'say pong', + options: { + model: 'claude-opus-4-7', + systemPrompt: '', + tools: [], + permissionMode: 'bypassPermissions', + allowDangerouslySkipPermissions: true, + settingSources: [], + maxTurns: 1, + pathToClaudeCodeExecutable: claudePath ?? undefined, + env: { ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY }, + }, + }); + for await (const ev of q) events.push(ev); + pass(`received ${events.length} events`); + + const init = events.find( + (e) => e.type === 'system' && (e as { subtype?: string }).subtype === 'init', + ) as { claude_code_version?: string; model?: string } | undefined; + if (!init) { + fail('no system/init event received'); + } else { + pass(`system init: claude_code_version=${init.claude_code_version}, model=${init.model}`); + } + + const assistantEvents = events.filter((e) => e.type === 'assistant'); + if (assistantEvents.length === 0) { + fail('no assistant events received — model ID may be rejected'); + } else { + pass(`received ${assistantEvents.length} assistant event(s)`); + const first = assistantEvents[0] as { message?: { content?: unknown[] } }; + const content = first.message?.content; + if (!Array.isArray(content)) { + fail('first assistant event has no content[] array'); + } else { + pass(`first assistant content[] has ${content.length} block(s)`); + } + } + + const result = events.find((e) => e.type === 'result') as + | { subtype?: string; total_cost_usd?: number; num_turns?: number } + | undefined; + if (!result) { + fail('no result event received'); + } else { + pass( + `result: subtype=${result.subtype}, cost=$${result.total_cost_usd?.toFixed(4)}, turns=${result.num_turns}`, + ); + } + } catch (err) { + fail(`SDK query threw: ${err instanceof Error ? err.message : String(err)}`); + } + } + + console.log(); + if (failures.length > 0) { + console.log(`PREFLIGHT FAILED: ${failures.length} check(s) failed`); + process.exit(1); + } + console.log('PREFLIGHT OK'); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +});