mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-23 10:10:03 +02:00
feat: session-runner spawns hermetic children + isolation canaries
claude -p children now get the allowlist-scrubbed env and a gated --strict-mcp-config (EVALS_HERMETIC=0 restores operator env AND args). Two gate-tier canaries make the clean room falsifiable: hermetic-canary asserts env redirect + scrub + zero MCP servers + nonzero API-key cost from the Bash tool_result (never model prose); hermetic-sentinel plants a poisoned operator config (user CLAUDE.md + MCP server) and proves the child cannot see it. Empirically verified on claude 2.1.175: print mode needs no seed config (the seed serves the PTY path); the child CLI sets CLAUDECODE for its own tools, so that scrub is pinned in unit tests, not E2E. hermetic-env.ts joins GLOBAL_TOUCHFILES. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -131,14 +131,18 @@ export interface SeedConfigOpts {
|
||||
}
|
||||
|
||||
/**
|
||||
* Minimal $CLAUDE_CONFIG_DIR/.claude.json that gets a fresh-config child past
|
||||
* first-run prompts non-interactively. Every key here was empirically
|
||||
* verified against a real ~/.claude.json (2026-06-12, claude 2.1.175):
|
||||
* Minimal $CLAUDE_CONFIG_DIR/.claude.json for fresh-config children.
|
||||
*
|
||||
* Empirically verified 2026-06-12 on claude 2.1.175: PRINT MODE (`claude -p`)
|
||||
* with ANTHROPIC_API_KEY needs NO seed at all — a fresh empty config dir ran
|
||||
* non-interactively (exit 0, real cost billed to the key). The seed exists
|
||||
* for the PTY path, where first-run TUI prompts DO appear:
|
||||
* - hasCompletedOnboarding: suppresses the onboarding flow
|
||||
* - customApiKeyResponses.approved: suppresses the "use this API key?"
|
||||
* prompt; entries are the key's LAST 20 CHARS
|
||||
* prompt; entries are the key's LAST 20 CHARS (shape verified against a
|
||||
* real ~/.claude.json)
|
||||
* - projects[dir].hasTrustDialogAccepted: pre-trusts repo-cwd PTY sessions
|
||||
* (print mode skips the dialog; PTY plan-mode tests don't)
|
||||
* (the pty-runner's 15s trust-watcher remains as fallback for temp cwds)
|
||||
* bypassPermissionsModeAccepted was considered and dropped: absent from a
|
||||
* real config even though --dangerously-skip-permissions is in daily use.
|
||||
*/
|
||||
|
||||
@@ -10,6 +10,7 @@ import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { getProjectEvalDir } from './eval-store';
|
||||
import { hermeticChildEnv, isHermeticEnabled } from './hermetic-env';
|
||||
|
||||
const GSTACK_DEV_DIR = path.join(os.homedir(), '.gstack-dev');
|
||||
const HEARTBEAT_PATH = path.join(GSTACK_DEV_DIR, 'e2e-live.json'); // heartbeat stays global
|
||||
@@ -167,6 +168,10 @@ export async function runSkillTest(options: {
|
||||
'--max-turns', String(maxTurns),
|
||||
'--allowed-tools', ...allowedTools,
|
||||
];
|
||||
// Hermetic children get zero MCP servers (no --mcp-config is passed).
|
||||
// Gated on the same call-time check as the env scrub so EVALS_HERMETIC=0
|
||||
// restores operator MCP along with the operator env.
|
||||
if (isHermeticEnabled()) args.push('--strict-mcp-config');
|
||||
|
||||
// Write prompt to a temp file OUTSIDE workingDirectory to avoid race conditions
|
||||
// where afterAll cleanup deletes the dir before cat reads the file (especially
|
||||
@@ -176,11 +181,14 @@ export async function runSkillTest(options: {
|
||||
|
||||
const proc = Bun.spawn(['sh', '-c', `cat "${promptFile}" | claude ${args.map(a => `"${a}"`).join(' ')}`], {
|
||||
cwd: workingDirectory,
|
||||
// Hermetic by default (see test/helpers/hermetic-env.ts): operator
|
||||
// session context (CONDUCTOR_*, CLAUDECODE, ~/.claude config, ~/.gstack)
|
||||
// never reaches the child; EVALS_HERMETIC=0 restores the legacy env.
|
||||
// Default GSTACK_HEADLESS=1 so eval/E2E runs classify as headless (BLOCK on an
|
||||
// AskUserQuestion failure rather than emit a prose question no human reads). A
|
||||
// suite exercising the INTERACTIVE prose-fallback path opts out by passing
|
||||
// `env: { GSTACK_HEADLESS: '' }` — extraEnv wins because it spreads last.
|
||||
env: { ...process.env, GSTACK_HEADLESS: '1', ...extraEnv },
|
||||
env: hermeticChildEnv({ GSTACK_HEADLESS: '1', ...extraEnv }),
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
});
|
||||
|
||||
@@ -36,6 +36,11 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
'browse-basic': ['browse/src/**', 'browse/test/test-server.ts'],
|
||||
'browse-snapshot': ['browse/src/**', 'browse/test/test-server.ts'],
|
||||
|
||||
// Hermetic isolation canaries (hermetic-env.ts is also a GLOBAL touchfile;
|
||||
// these entries exist so the canaries themselves stay tier-classified)
|
||||
'hermetic-canary': ['test/helpers/hermetic-env.ts', 'test/helpers/session-runner.ts', 'test/skill-e2e-hermetic-canary.test.ts', 'lib/conductor-env-shim.ts'],
|
||||
'hermetic-sentinel': ['test/helpers/hermetic-env.ts', 'test/helpers/session-runner.ts', 'test/skill-e2e-hermetic-canary.test.ts', 'lib/conductor-env-shim.ts'],
|
||||
|
||||
// SKILL.md setup + preamble (depend on ROOT SKILL.md + gen-skill-docs)
|
||||
'skillmd-setup-discovery': ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
|
||||
'skillmd-no-local-binary': ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
|
||||
@@ -437,6 +442,11 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
'browse-basic': 'gate',
|
||||
'browse-snapshot': 'gate',
|
||||
|
||||
// Hermetic isolation — gate (deterministic env/config assertions; if the
|
||||
// clean room breaks, every other eval's signal is contaminated)
|
||||
'hermetic-canary': 'gate',
|
||||
'hermetic-sentinel': 'gate',
|
||||
|
||||
// SKILL.md setup — gate (if setup breaks, no skill works)
|
||||
'skillmd-setup-discovery': 'gate',
|
||||
'skillmd-no-local-binary': 'gate',
|
||||
@@ -782,6 +792,7 @@ export const LLM_JUDGE_TOUCHFILES: Record<string, string[]> = {
|
||||
*/
|
||||
export const GLOBAL_TOUCHFILES = [
|
||||
'test/helpers/session-runner.ts', // All E2E tests use this runner
|
||||
'test/helpers/hermetic-env.ts', // Changes every E2E child's environment
|
||||
'test/helpers/eval-store.ts', // All E2E tests store results here
|
||||
'test/helpers/touchfiles.ts', // Self-referential — reclassifying wrong is dangerous
|
||||
];
|
||||
|
||||
Reference in New Issue
Block a user