From 5f316e0eb4bdf4914f6751f03736df44d55cd80d Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 19 Apr 2026 06:08:29 +0800 Subject: [PATCH] test harness: runSkillTest accepts per-test env vars Adds an optional env: param that Bun.spawn merges into the spawned claude -p process environment. Backwards-compatible: omitting the param keeps the prior behavior (inherit parent env only). Motivation: E2E tests were stuffing environment setup into the prompt itself ("Use GSTACK_HOME=X and the bin scripts at ./bin/"), which made the agent interpret the prompt as bash-run instructions and bypass the Skill tool. Slash-command routing tests failed because the routing assertion (skillCalls includes "context-save") never fired. With env: support, a test can pass GSTACK_HOME via process env and leave the prompt as a minimal slash-command invocation. The agent sees "/context-save wintermute" and the skill handles env lookup in its own preamble. Routing assertion can now actually observe the Skill tool being called. Two lines of code. No behavioral change for existing tests that don't pass env:. --- test/helpers/session-runner.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/helpers/session-runner.ts b/test/helpers/session-runner.ts index c0f2ac00..ae045433 100644 --- a/test/helpers/session-runner.ts +++ b/test/helpers/session-runner.ts @@ -126,6 +126,10 @@ export async function runSkillTest(options: { runId?: string; /** Model to use. Defaults to claude-sonnet-4-6 (overridable via EVALS_MODEL env). */ model?: string; + /** Extra env vars merged into the spawned claude -p process. Useful for + * per-test GSTACK_HOME overrides so the test doesn't have to spell out + * env setup in the prompt itself. */ + env?: Record; }): Promise { const { prompt, @@ -135,6 +139,7 @@ export async function runSkillTest(options: { timeout = 120_000, testName, runId, + env: extraEnv, } = options; const model = options.model ?? process.env.EVALS_MODEL ?? 'claude-sonnet-4-6'; @@ -171,6 +176,7 @@ export async function runSkillTest(options: { const proc = Bun.spawn(['sh', '-c', `cat "${promptFile}" | claude ${args.map(a => `"${a}"`).join(' ')}`], { cwd: workingDirectory, + env: extraEnv ? { ...process.env, ...extraEnv } : undefined, stdout: 'pipe', stderr: 'pipe', });