mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-17 07:10:12 +02:00
test(auq): default GSTACK_HEADLESS=1 in eval/E2E runners
Headless harness runs classify as headless (BLOCK on AUQ failure rather than emit a prose question no one reads). SDK runner uses ambient mutation, not the Options.env object, to avoid breaking the SDK auth pipeline. Interactive-path suites opt out by overriding the env per-run. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -300,6 +300,14 @@ export async function runAgentSdkTest(
|
||||
const queryImpl: QueryProvider = opts.queryProvider ?? query;
|
||||
const model = opts.model ?? 'claude-opus-4-7';
|
||||
|
||||
// Default GSTACK_HEADLESS=1 so SDK-driven eval/E2E runs classify as headless: an
|
||||
// AskUserQuestion failure BLOCKs instead of emitting a prose question no human can
|
||||
// answer. Set ambiently (the SDK child inherits process.env) rather than via
|
||||
// sdkOpts.env — passing an env object to the SDK breaks its auth pipeline (see
|
||||
// CLAUDE.md). A suite testing the interactive prose-fallback path sets
|
||||
// process.env.GSTACK_HEADLESS='' before calling.
|
||||
if (process.env.GSTACK_HEADLESS === undefined) process.env.GSTACK_HEADLESS = '1';
|
||||
|
||||
let attempt = 0;
|
||||
let lastErr: unknown = null;
|
||||
|
||||
|
||||
@@ -52,6 +52,9 @@ export class ClaudeAdapter implements ProviderAdapter {
|
||||
timeout: opts.timeoutMs,
|
||||
encoding: 'utf-8',
|
||||
maxBuffer: 32 * 1024 * 1024,
|
||||
// Default GSTACK_HEADLESS=1 so a benchmark run classifies as headless (an
|
||||
// AskUserQuestion failure BLOCKs rather than emitting unanswerable prose).
|
||||
env: { ...process.env, GSTACK_HEADLESS: '1' },
|
||||
});
|
||||
const parsed = this.parseOutput(out);
|
||||
return {
|
||||
|
||||
@@ -176,7 +176,11 @@ export async function runSkillTest(options: {
|
||||
|
||||
const proc = Bun.spawn(['sh', '-c', `cat "${promptFile}" | claude ${args.map(a => `"${a}"`).join(' ')}`], {
|
||||
cwd: workingDirectory,
|
||||
env: extraEnv ? { ...process.env, ...extraEnv } : undefined,
|
||||
// Default GSTACK_HEADLESS=1 so eval/E2E runs classify as headless (BLOCK on an
|
||||
// AskUserQuestion failure rather than emit a prose question no human reads). A
|
||||
// suite exercising the INTERACTIVE prose-fallback path opts out by passing
|
||||
// `env: { GSTACK_HEADLESS: '' }` — extraEnv wins because it spreads last.
|
||||
env: { ...process.env, GSTACK_HEADLESS: '1', ...extraEnv },
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user