feat: session-runner spawns hermetic children + isolation canaries

claude -p children now get the allowlist-scrubbed env and a gated
--strict-mcp-config (EVALS_HERMETIC=0 restores operator env AND args).
Two gate-tier canaries make the clean room falsifiable: hermetic-canary
asserts env redirect + scrub + zero MCP servers + nonzero API-key cost
from the Bash tool_result (never model prose); hermetic-sentinel plants a
poisoned operator config (user CLAUDE.md + MCP server) and proves the
child cannot see it. Empirically verified on claude 2.1.175: print mode
needs no seed config (the seed serves the PTY path); the child CLI sets
CLAUDECODE for its own tools, so that scrub is pinned in unit tests, not
E2E. hermetic-env.ts joins GLOBAL_TOUCHFILES.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-06-12 11:13:50 -07:00
parent 2d56961636
commit c3e65b1634
4 changed files with 219 additions and 6 deletions
+9 -1
View File
@@ -10,6 +10,7 @@ import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { getProjectEvalDir } from './eval-store';
import { hermeticChildEnv, isHermeticEnabled } from './hermetic-env';
const GSTACK_DEV_DIR = path.join(os.homedir(), '.gstack-dev');
const HEARTBEAT_PATH = path.join(GSTACK_DEV_DIR, 'e2e-live.json'); // heartbeat stays global
@@ -167,6 +168,10 @@ export async function runSkillTest(options: {
'--max-turns', String(maxTurns),
'--allowed-tools', ...allowedTools,
];
// Hermetic children get zero MCP servers (no --mcp-config is passed).
// Gated on the same call-time check as the env scrub so EVALS_HERMETIC=0
// restores operator MCP along with the operator env.
if (isHermeticEnabled()) args.push('--strict-mcp-config');
// Write prompt to a temp file OUTSIDE workingDirectory to avoid race conditions
// where afterAll cleanup deletes the dir before cat reads the file (especially
@@ -176,11 +181,14 @@ export async function runSkillTest(options: {
const proc = Bun.spawn(['sh', '-c', `cat "${promptFile}" | claude ${args.map(a => `"${a}"`).join(' ')}`], {
cwd: workingDirectory,
// Hermetic by default (see test/helpers/hermetic-env.ts): operator
// session context (CONDUCTOR_*, CLAUDECODE, ~/.claude config, ~/.gstack)
// never reaches the child; EVALS_HERMETIC=0 restores the legacy env.
// Default GSTACK_HEADLESS=1 so eval/E2E runs classify as headless (BLOCK on an
// AskUserQuestion failure rather than emit a prose question no human reads). A
// suite exercising the INTERACTIVE prose-fallback path opts out by passing
// `env: { GSTACK_HEADLESS: '' }` — extraEnv wins because it spreads last.
env: { ...process.env, GSTACK_HEADLESS: '1', ...extraEnv },
env: hermeticChildEnv({ GSTACK_HEADLESS: '1', ...extraEnv }),
stdout: 'pipe',
stderr: 'pipe',
});