diff --git a/test/helpers/agent-sdk-runner.ts b/test/helpers/agent-sdk-runner.ts index ce4512bfe..c35585438 100644 --- a/test/helpers/agent-sdk-runner.ts +++ b/test/helpers/agent-sdk-runner.ts @@ -300,6 +300,14 @@ export async function runAgentSdkTest( const queryImpl: QueryProvider = opts.queryProvider ?? query; const model = opts.model ?? 'claude-opus-4-7'; + // Default GSTACK_HEADLESS=1 so SDK-driven eval/E2E runs classify as headless: an + // AskUserQuestion failure BLOCKs instead of emitting a prose question no human can + // answer. Set ambiently (the SDK child inherits process.env) rather than via + // sdkOpts.env — passing an env object to the SDK breaks its auth pipeline (see + // CLAUDE.md). A suite testing the interactive prose-fallback path sets + // process.env.GSTACK_HEADLESS='' before calling. + if (process.env.GSTACK_HEADLESS === undefined) process.env.GSTACK_HEADLESS = '1'; + let attempt = 0; let lastErr: unknown = null; diff --git a/test/helpers/providers/claude.ts b/test/helpers/providers/claude.ts index 5e3c1acb1..ce77767c2 100644 --- a/test/helpers/providers/claude.ts +++ b/test/helpers/providers/claude.ts @@ -52,6 +52,9 @@ export class ClaudeAdapter implements ProviderAdapter { timeout: opts.timeoutMs, encoding: 'utf-8', maxBuffer: 32 * 1024 * 1024, + // Default GSTACK_HEADLESS=1 so a benchmark run classifies as headless (an + // AskUserQuestion failure BLOCKs rather than emitting unanswerable prose). + env: { ...process.env, GSTACK_HEADLESS: '1' }, }); const parsed = this.parseOutput(out); return { diff --git a/test/helpers/session-runner.ts b/test/helpers/session-runner.ts index ae0454335..675255cf2 100644 --- a/test/helpers/session-runner.ts +++ b/test/helpers/session-runner.ts @@ -176,7 +176,11 @@ export async function runSkillTest(options: { const proc = Bun.spawn(['sh', '-c', `cat "${promptFile}" | claude ${args.map(a => `"${a}"`).join(' ')}`], { cwd: workingDirectory, - env: extraEnv ? { ...process.env, ...extraEnv } : undefined, + // Default GSTACK_HEADLESS=1 so eval/E2E runs classify as headless (BLOCK on an + // AskUserQuestion failure rather than emit a prose question no human reads). A + // suite exercising the INTERACTIVE prose-fallback path opts out by passing + // `env: { GSTACK_HEADLESS: '' }` — extraEnv wins because it spreads last. + env: { ...process.env, GSTACK_HEADLESS: '1', ...extraEnv }, stdout: 'pipe', stderr: 'pipe', });