Merge remote-tracking branch 'origin/main' into garrytan/trunk-land-skill

# Conflicts: # CHANGELOG.md # VERSION # package.json
2026-06-24 02:29:59 +02:00 · 2026-06-17 08:36:46 -07:00
parent e0bb11dfc9 c7ae63201a
commit 2e102232e4
159 changed files with 15237 additions and 500 deletions
@@ -36,6 +36,7 @@ import {
 import * as fs from 'fs';
 import * as path from 'path';
 import { resolveClaudeBinary as resolveClaudeBinaryShared } from '../../browse/src/claude-bin';
+import { hermeticChildEnv } from './hermetic-env';
 import type { SkillTestResult } from './session-runner';

 // ---------------------------------------------------------------------------
@@ -300,12 +301,17 @@ export async function runAgentSdkTest(
  const queryImpl: QueryProvider = opts.queryProvider ?? query;
  const model = opts.model ?? 'claude-opus-4-7';

-  // NOTE on GSTACK_HEADLESS: the SDK child inherits process.env, so headless
-  // classification for eval/E2E runs is set by the `test:gate` / `test:evals`
-  // package.json scripts (scoped to that invocation), NOT mutated here. We must not
-  // pass sdkOpts.env (it breaks the SDK auth pipeline — see CLAUDE.md) and must not
-  // mutate process.env ambiently (it would leak headless into later interactive-path
-  // tests in the same Bun process — Codex review finding).
+  // NOTE on env: the SDK child gets the COMPLETE hermetic env (allowlist
+  // scrub + ANTHROPIC_API_KEY + hermetic CLAUDE_CONFIG_DIR/GSTACK_HOME), with
+  // per-test opts.env merging last. The historical "passing env: breaks SDK
+  // auth" failure (old CLAUDE.md warning) was partial-env replacement —
+  // Options.env REPLACES the child's entire environment, so an object without
+  // the key killed auth. A complete env is safe (validated 2026-06-12 via
+  // query() with hermeticChildEnv(): success, real cost, Bash tool working).
+  // Do not mutate process.env ambiently here (it would leak into later
+  // interactive-path tests in the same Bun process — Codex review finding);
+  // ambient ANTHROPIC_API_KEY mutation by tests still works because the
+  // builder reads process.env at call time.

  let attempt = 0;
  let lastErr: unknown = null;
@@ -356,7 +362,7 @@ export async function runAgentSdkTest(
        permissionMode: resolvedPermissionMode,
        allowDangerouslySkipPermissions: resolvedPermissionMode === 'bypassPermissions',
        settingSources: opts.settingSources ?? [],
-        env: opts.env,
+        env: hermeticChildEnv(opts.env),
        pathToClaudeCodeExecutable: opts.pathToClaudeCodeExecutable,
        ...(hasCanUseTool ? { canUseTool: opts.canUseTool } : {}),
      };
@@ -145,6 +145,9 @@ export const CARVE_GUARDS: Record<string, CarveGuard> = {
    maxSkeletonBytes: 90_000,
    minUnionBytes: 80_000,
    mustContain: ['SCOPE EXPANSION', 'SELECTIVE EXPANSION', 'HOLD SCOPE', 'SCOPE REDUCTION'],
+    // Default-on Codex outside-voice (codexPreflight block + CODEX_MODE branch
+    // prose replacing the smaller opt-in question) lands this ~5.2% over baseline.
+    maxSizeRatio: 1.08,
  },
  'plan-eng-review': {
    skill: 'plan-eng-review',
@@ -162,9 +165,11 @@ export const CARVE_GUARDS: Record<string, CarveGuard> = {
    minUnionBytes: 70_000,
    mustContain: ['Architecture', 'Code Quality', 'Test', 'Performance'],
    // Cross-cutting preamble growth (v1.57.2.0 AUQ-failure prose fallback + the
-    // decision-memory nudge + the v1.57.4.0 Boil-the-Ocean rename) lands this just
-    // over the strict 1.05; small headroom for the shared preamble additions.
-    maxSizeRatio: 1.06,
+    // decision-memory nudge + the v1.57.4.0 Boil-the-Ocean rename) plus the
+    // default-on Codex outside-voice (codexPreflight block + CODEX_MODE branch
+    // prose, replacing the smaller opt-in question) land this at ~6.6% over the
+    // v1.53.0.0 baseline. Headroom for those intentional additions.
+    maxSizeRatio: 1.08,
  },
  'plan-design-review': {
    skill: 'plan-design-review',
@@ -178,7 +183,9 @@ export const CARVE_GUARDS: Record<string, CarveGuard> = {
      gateAfterStop: 'EXIT PLAN MODE GATE',
    },
    behavioral: 'plan',
-    maxSkeletonBytes: 82_000,
+    // +Conductor AUQ-default-prose rule + one-way/continuation safety in the
+    // always-loaded AskUserQuestion Format section.
+    maxSkeletonBytes: 84_000,
    minUnionBytes: 70_000,
    mustContain: ['design', 'visual'],
  },
@@ -194,9 +201,14 @@ export const CARVE_GUARDS: Record<string, CarveGuard> = {
      gateAfterStop: 'EXIT PLAN MODE GATE',
    },
    behavioral: 'plan',
-    maxSkeletonBytes: 76_000,
+    // +Conductor AUQ-default-prose rule + one-way/destructive prose safety +
+    // continuation protocol in the always-loaded AskUserQuestion Format section.
+    maxSkeletonBytes: 78_000,
    minUnionBytes: 70_000,
    mustContain: ['developer experience', 'Getting Started'],
+    // Default-on Codex outside-voice (codexPreflight block + CODEX_MODE branch
+    // prose replacing the smaller opt-in question) lands this ~5.7% over baseline.
+    maxSizeRatio: 1.08,
  },
  'office-hours': {
    skill: 'office-hours',
@@ -229,14 +241,20 @@ export const CARVE_GUARDS: Record<string, CarveGuard> = {
      gateAfterStop: undefined,
    },
    behavioral: 'prompt',
-    maxSkeletonBytes: 50_000,
+    // +Conductor AUQ-default-prose rule + one-way/continuation safety in the
+    // always-loaded AskUserQuestion Format section.
+    maxSkeletonBytes: 53_000,
    minUnionBytes: 55_000,
    mustContain: ['CHANGELOG', 'Diataxis', 'coverage'],
-    // The AUQ-failure prose fallback (v1.57.2.0) adds ~2KB to every skill's
-    // always-loaded preamble; on this small carved skeleton that lands at ~5.9%
-    // over the pre-carve/pre-AUQ v1.53.0.0 baseline. Headroom for the
-    // cross-cutting addition; all other skills keep the strict 1.05 ceiling.
-    maxSizeRatio: 1.08,
+    // Two intentional additions stack on this small skill: the AUQ-failure prose
+    // fallback (v1.57.2.0, ~2KB to every preamble) AND the new default-on Codex
+    // documentation-review section (codexPreflight + prompt + apply-gate, carved
+    // into release-body so the SKELETON stays under maxSkeletonBytes). On a ~55KB
+    // baseline that whole new capability is ~18.6% of union bytes. The doc review
+    // is a deliberate new feature, not preamble creep; the union ceiling is raised
+    // to match while the skeleton budget (50_000) still holds the always-loaded
+    // cost flat.
+    maxSizeRatio: 1.20,
  },
  'design-consultation': {
    skill: 'design-consultation',
@@ -250,7 +268,9 @@ export const CARVE_GUARDS: Record<string, CarveGuard> = {
      gateAfterStop: undefined,
    },
    behavioral: 'prompt',
-    maxSkeletonBytes: 64_000,
+    // +Conductor AUQ-default-prose rule + one-way/continuation safety in the
+    // always-loaded AskUserQuestion Format section.
+    maxSkeletonBytes: 67_000,
    minUnionBytes: 72_000,
    mustContain: ['Typography', 'Color', 'Aesthetic Direction'],
    // Cross-cutting preamble growth (v1.57.2.0 AUQ-failure prose fallback ~2KB +
@@ -286,7 +306,9 @@ export const CARVE_GUARDS: Record<string, CarveGuard> = {
      gateAfterStop: undefined,
    },
    behavioral: 'prompt',
-    maxSkeletonBytes: 70_000,
+    // +Conductor AUQ-default-prose rule + one-way/continuation safety in the
+    // always-loaded AskUserQuestion Format section.
+    maxSkeletonBytes: 73_000,
    minUnionBytes: 72_000,
    mustContain: ['OWASP', 'STRIDE', 'daily', 'comprehensive', 'verif'],
    // cso keeps its mode-dispatch + FP-filtering phases always-loaded, so the
@@ -24,6 +24,7 @@
 import * as fs from 'fs';
 import * as os from 'os';
 import * as path from 'path';
+import { hermeticChildEnv, isHermeticEnabled } from './hermetic-env';

 /** Strip ANSI escapes for pattern-matching against visible text. */
 export function stripAnsi(s: string): string {
@@ -120,6 +121,13 @@ export interface ClaudePtySession {
  exited(): boolean;
  /** Exit code, if known. */
  exitCode(): number | null;
+  /**
+   * The hermetic CLAUDE_CONFIG_DIR this session's claude was pointed at, or
+   * null when EVALS_HERMETIC=0. Forensics: hermetic plan files live under
+   * `<hermeticConfigDir>/plans/` (extractPlanFilePath still matches them —
+   * the dir name ends in `/.claude` by contract).
+   */
+  hermeticConfigDir: string | null;
  /**
   * Send SIGINT, then SIGKILL after 1s. Always safe to call multiple times.
   * Awaits process exit before resolving.
@@ -1143,8 +1151,17 @@ export async function launchClaudePty(
  if (permissionMode !== null) {
    args.push('--permission-mode', permissionMode);
  }
+  // Hermetic children get zero MCP servers; gated on the same call-time
+  // check as the env scrub so EVALS_HERMETIC=0 restores operator MCP too.
+  // Before opts.extraArgs so a test could theoretically supply --mcp-config.
+  const hermetic = isHermeticEnabled();
+  if (hermetic) args.push('--strict-mcp-config');
  if (opts.extraArgs) args.push(...opts.extraArgs);

+  // Hermetic by default (test/helpers/hermetic-env.ts): operator session
+  // context never reaches the child; per-test opts.env merges last.
+  const childEnv = hermeticChildEnv(opts.env);
+
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  const proc = (Bun as any).spawn([claudePath, ...args], {
    terminal: {
@@ -1155,7 +1172,7 @@ export async function launchClaudePty(
      },
    },
    cwd,
-    env: { ...process.env, ...(opts.env ?? {}) },
+    env: childEnv,
  });

  // Track exit so waitForAny can fail fast if claude crashes.
@@ -1307,6 +1324,7 @@ export async function launchClaudePty(
    pid: () => proc.pid as number | undefined,
    exited: () => exited,
    exitCode: () => exitCodeCaptured,
+    hermeticConfigDir: hermetic ? childEnv.CLAUDE_CONFIG_DIR ?? null : null,
    close,
  };
 }
@@ -15,6 +15,7 @@
 import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
+import { hermeticChildEnv } from './hermetic-env';

 // --- Interfaces ---

@@ -201,15 +202,18 @@ export async function runCodexSkill(opts: {
    // Build codex exec command
    const args = ['exec', prompt, '--json', '-s', sandbox];

-    // Spawn codex with temp HOME so it discovers our installed skill
+    // Spawn codex with temp HOME so it discovers our installed skill.
+    // Hermetic scrub (test/helpers/hermetic-env.ts) with codex's auth surface
+    // re-admitted: codex auths from $HOME/.codex (copied into tempHome above)
+    // plus OPENAI_API_KEY/CODEX_* when present. HOME override merges last.
    const proc = Bun.spawn(['codex', ...args], {
      cwd: cwd || skillDir,
      stdout: 'pipe',
      stderr: 'pipe',
-      env: {
-        ...process.env,
-        HOME: tempHome,
-      },
+      env: hermeticChildEnv(
+        { HOME: tempHome },
+        { extraAllow: ['OPENAI_API_KEY', 'CODEX_*'] },
+      ),
    });

    // Race against timeout
@@ -14,6 +14,7 @@
 */

 import * as path from 'path';
+import { hermeticChildEnv } from './hermetic-env';

 // --- Interfaces ---

@@ -122,11 +123,16 @@ export async function runGeminiSkill(opts: {
  // Build gemini command
  const args = ['-p', prompt, '--output-format', 'stream-json', '--yolo'];

-  // Spawn gemini — uses real HOME for auth, cwd for skill discovery
+  // Spawn gemini — uses real HOME for auth (~/.gemini; HOME is allowlisted),
+  // cwd for skill discovery. Hermetic scrub with gemini's auth surface
+  // re-admitted (previously this spawn inherited the full operator env).
  const proc = Bun.spawn(['gemini', ...args], {
    cwd: cwd || process.cwd(),
    stdout: 'pipe',
    stderr: 'pipe',
+    env: hermeticChildEnv(undefined, {
+      extraAllow: ['GEMINI_API_KEY', 'GOOGLE_API_KEY', 'GOOGLE_APPLICATION_CREDENTIALS', 'GOOGLE_CLOUD_*', 'GEMINI_*'],
+    }),
  });

  // Race against timeout
@@ -0,0 +1,269 @@
+/**
+ * Unit tests for the hermetic child-env builder. Free tier — no API calls.
+ *
+ * Pins three contracts:
+ * 1. Allowlist semantics: contamination vars dropped, basics/auth/network
+ *    kept, overrides merge last, EVALS_HERMETIC=0 is byte-identical legacy.
+ * 2. Seed-config shape: 20-char key suffix, trusted dirs, undefined-key safe.
+ * 3. Dir lifecycle: /.claude suffix (extractPlanFilePath contract —
+ *    claude-pty-runner.ts:191), sync singleton reuse, pid-aware GC.
+ */
+
+import { describe, test, expect, afterAll } from 'bun:test';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import {
+  buildHermeticEnv,
+  buildSeedConfig,
+  isHermeticEnabled,
+  getHermeticDirs,
+  gcStaleHermeticDirs,
+  hermeticChildEnv,
+} from './hermetic-env';
+
+const CONTAMINATED: NodeJS.ProcessEnv = {
+  PATH: '/usr/bin', HOME: '/Users/op', TMPDIR: '/tmp', TERM: 'xterm',
+  ANTHROPIC_API_KEY: 'sk-ant-0123456789abcdefghijklmn',
+  ANTHROPIC_BASE_URL: 'https://proxy.example/api',
+  ANTHROPIC_MODEL: 'sneaky-model-override',
+  EVALS_MODEL: 'claude-sonnet-4-6',
+  GITHUB_ACTIONS: 'true',
+  HTTPS_PROXY: 'http://corp:3128',
+  NODE_EXTRA_CA_CERTS: '/etc/corp.pem',
+  CONDUCTOR_WORKSPACE_PATH: '/Users/op/conductor/ws',
+  CONDUCTOR_SESSION: '1',
+  CLAUDECODE: '1',
+  CLAUDE_CODE_ENTRYPOINT: 'cli',
+  CLAUDE_CONFIG_DIR: '/Users/op/.claude',
+  GSTACK_HOME: '/Users/op/.gstack',
+  GSTACK_HEADLESS_DEFAULT: 'x',
+  MCP_TIMEOUT: '5000',
+  GBRAIN_ENDPOINT: 'http://localhost:1234',
+  OPENAI_API_KEY: 'sk-openai-secret',
+  VOYAGE_API_KEY: 'vg-secret',
+  GH_TOKEN: 'gho_secret',
+  SSH_AUTH_SOCK: '/tmp/ssh.sock',
+  GIT_AUTHOR_NAME: 'Op',
+};
+
+const HERMETIC_VARS = { CLAUDE_CONFIG_DIR: '/x/.claude', GSTACK_HOME: '/x/gstack-home' };
+
+describe('buildHermeticEnv allowlist', () => {
+  const env = buildHermeticEnv(CONTAMINATED, HERMETIC_VARS);
+
+  test('keeps process basics, network, CI, and eval knobs', () => {
+    expect(env.PATH).toBe('/usr/bin');
+    expect(env.HOME).toBe('/Users/op');
+    expect(env.EVALS_MODEL).toBe('claude-sonnet-4-6');
+    expect(env.GITHUB_ACTIONS).toBe('true');
+    expect(env.HTTPS_PROXY).toBe('http://corp:3128');
+    expect(env.NODE_EXTRA_CA_CERTS).toBe('/etc/corp.pem');
+  });
+
+  test('keeps named auth vars but not the broad ANTHROPIC_ prefix', () => {
+    expect(env.ANTHROPIC_API_KEY).toBe(CONTAMINATED.ANTHROPIC_API_KEY);
+    expect(env.ANTHROPIC_BASE_URL).toBe(CONTAMINATED.ANTHROPIC_BASE_URL);
+    expect(env.ANTHROPIC_MODEL).toBeUndefined(); // behavior knob, not auth
+  });
+
+  test('drops session-context and operator-credential vars', () => {
+    for (const k of [
+      'CONDUCTOR_WORKSPACE_PATH', 'CONDUCTOR_SESSION', 'CLAUDECODE',
+      'CLAUDE_CODE_ENTRYPOINT', 'GSTACK_HEADLESS_DEFAULT', 'MCP_TIMEOUT',
+      'GBRAIN_ENDPOINT', 'OPENAI_API_KEY', 'VOYAGE_API_KEY', 'GH_TOKEN',
+      'SSH_AUTH_SOCK', 'GIT_AUTHOR_NAME',
+    ]) {
+      expect(env[k]).toBeUndefined();
+    }
+  });
+
+  test('redirects CLAUDE_CONFIG_DIR and GSTACK_HOME to hermetic values', () => {
+    expect(env.CLAUDE_CONFIG_DIR).toBe('/x/.claude');
+    expect(env.GSTACK_HOME).toBe('/x/gstack-home');
+  });
+
+  test('overrides merge last — per-test re-contamination is deliberate', () => {
+    const e = buildHermeticEnv(CONTAMINATED, HERMETIC_VARS, {
+      CONDUCTOR_WORKSPACE_PATH: '/tmp/test-ws',
+      GSTACK_HOME: '/tmp/test-home',
+      GSTACK_HEADLESS: '',
+    });
+    expect(e.CONDUCTOR_WORKSPACE_PATH).toBe('/tmp/test-ws');
+    expect(e.GSTACK_HOME).toBe('/tmp/test-home');
+    expect(e.GSTACK_HEADLESS).toBe('');
+  });
+
+  test('promotes GSTACK_ANTHROPIC_API_KEY when canonical absent (shared shim fn)', () => {
+    const base = { ...CONTAMINATED } as NodeJS.ProcessEnv;
+    delete base.ANTHROPIC_API_KEY;
+    base.GSTACK_ANTHROPIC_API_KEY = 'sk-ant-promoted-9876543210';
+    const e = buildHermeticEnv(base, HERMETIC_VARS);
+    expect(e.ANTHROPIC_API_KEY).toBe('sk-ant-promoted-9876543210');
+    expect(e.GSTACK_ANTHROPIC_API_KEY).toBeUndefined(); // GSTACK_* still dropped
+  });
+
+  test('extraAllow re-admits exact names and prefixes per runner', () => {
+    const e = buildHermeticEnv(CONTAMINATED, HERMETIC_VARS, undefined, {
+      extraAllow: ['OPENAI_API_KEY', 'GIT_*'],
+    });
+    expect(e.OPENAI_API_KEY).toBe('sk-openai-secret');
+    expect(e.GIT_AUTHOR_NAME).toBe('Op');
+    expect(e.GH_TOKEN).toBeUndefined(); // not in extraAllow
+  });
+
+  test('TERM falls back when base omits it', () => {
+    const base = { ...CONTAMINATED } as NodeJS.ProcessEnv;
+    delete base.TERM;
+    expect(buildHermeticEnv(base, HERMETIC_VARS).TERM).toBe('xterm-256color');
+  });
+});
+
+describe('EVALS_HERMETIC=0 escape hatch', () => {
+  test('returns byte-identical legacy env, overrides still last', () => {
+    const base = { ...CONTAMINATED, EVALS_HERMETIC: '0' } as NodeJS.ProcessEnv;
+    const e = buildHermeticEnv(base, HERMETIC_VARS, { GSTACK_HEADLESS: '1' });
+    // Legacy spread: every base var survives, hermeticVars NOT applied.
+    expect(e.CONDUCTOR_WORKSPACE_PATH).toBe(CONTAMINATED.CONDUCTOR_WORKSPACE_PATH);
+    expect(e.CLAUDE_CONFIG_DIR).toBe('/Users/op/.claude');
+    expect(e.GSTACK_HOME).toBe('/Users/op/.gstack');
+    expect(e.GSTACK_HEADLESS).toBe('1');
+    expect(e).toEqual({ ...(base as Record<string, string>), GSTACK_HEADLESS: '1' });
+  });
+
+  test('isHermeticEnabled reads at call time (ESM-hoist safety)', () => {
+    const prev = process.env.EVALS_HERMETIC;
+    try {
+      process.env.EVALS_HERMETIC = '0';
+      expect(isHermeticEnabled()).toBe(false);
+      process.env.EVALS_HERMETIC = '1';
+      expect(isHermeticEnabled()).toBe(true);
+      delete process.env.EVALS_HERMETIC;
+      expect(isHermeticEnabled()).toBe(true);
+    } finally {
+      if (prev === undefined) delete process.env.EVALS_HERMETIC;
+      else process.env.EVALS_HERMETIC = prev;
+    }
+  });
+});
+
+describe('buildSeedConfig', () => {
+  test('stores only the 20-char key suffix and trusts the given dirs', () => {
+    const seed = buildSeedConfig({
+      apiKey: 'sk-ant-0123456789abcdefghijklmn',
+      trustedDirs: ['/repo/root'],
+    }) as any;
+    expect(seed.hasCompletedOnboarding).toBe(true);
+    const approved = seed.customApiKeyResponses.approved;
+    expect(approved).toHaveLength(1);
+    expect(approved[0]).toHaveLength(20);
+    expect('sk-ant-0123456789abcdefghijklmn'.endsWith(approved[0])).toBe(true);
+    expect(seed.projects['/repo/root'].hasTrustDialogAccepted).toBe(true);
+    expect(seed.projects['/repo/root'].hasCompletedProjectOnboarding).toBe(true);
+  });
+
+  test('apiKey undefined → omits customApiKeyResponses, does not throw', () => {
+    const seed = buildSeedConfig({ apiKey: undefined, trustedDirs: [] }) as any;
+    expect(seed.customApiKeyResponses).toBeUndefined();
+    expect(seed.hasCompletedOnboarding).toBe(true);
+  });
+
+  test('no full key material anywhere in the seed', () => {
+    const key = 'sk-ant-0123456789abcdefghijklmn';
+    const json = JSON.stringify(buildSeedConfig({ apiKey: key, trustedDirs: [] }));
+    expect(json.includes(key)).toBe(false);
+  });
+});
+
+describe('getHermeticDirs lifecycle', () => {
+  test('configDir ends in /.claude — extractPlanFilePath contract', () => {
+    // claude-pty-runner.ts:191 anchors plan paths on `.claude/plans/` under
+    // /var|/tmp prefixes; the dir-name suffix is what keeps PTY plan-mode
+    // tests extracting hermetic plan files with zero extractor changes.
+    const dirs = getHermeticDirs();
+    expect(dirs.configDir.endsWith(`${path.sep}.claude`)).toBe(true);
+    expect(dirs.configDir.startsWith(os.tmpdir())).toBe(true);
+  });
+
+  test('sync singleton: repeat calls return the same dirs', () => {
+    expect(getHermeticDirs()).toBe(getHermeticDirs());
+  });
+
+  test('seeds .claude.json in the config dir', () => {
+    const dirs = getHermeticDirs();
+    const seed = JSON.parse(fs.readFileSync(path.join(dirs.configDir, '.claude.json'), 'utf-8'));
+    expect(seed.hasCompletedOnboarding).toBe(true);
+    const root = path.resolve(__dirname, '..', '..');
+    expect(seed.projects[root].hasTrustDialogAccepted).toBe(true);
+  });
+});
+
+describe('gcStaleHermeticDirs', () => {
+  test('removes dead-pid dirs, keeps live-pid and foreign dirs', () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'hermetic-gc-test-'));
+    // Find a pid that is definitely dead: spawn-and-reap is overkill; use a
+    // huge pid beyond pid_max on macOS/Linux defaults.
+    const deadPid = 99999999;
+    const dead = path.join(tmp, `gstack-hermetic-${deadPid}-abc`);
+    const live = path.join(tmp, `gstack-hermetic-${process.pid}-abc`);
+    const foreign = path.join(tmp, 'unrelated-dir');
+    const malformed = path.join(tmp, 'gstack-hermetic-notapid-abc');
+    for (const d of [dead, live, foreign, malformed]) fs.mkdirSync(d);
+    // GC only reclaims dirs older than its 1h age floor (PID-reuse guard);
+    // backdate the dead-pid dir's mtime so it qualifies.
+    const old = new Date(Date.now() - 2 * 60 * 60 * 1000);
+    fs.utimesSync(dead, old, old);
+
+    gcStaleHermeticDirs(tmp);
+
+    expect(fs.existsSync(dead)).toBe(false);
+    expect(fs.existsSync(live)).toBe(true);
+    expect(fs.existsSync(foreign)).toBe(true);
+    expect(fs.existsSync(malformed)).toBe(true); // never guess on malformed names
+    fs.rmSync(tmp, { recursive: true, force: true });
+  });
+
+  test('keeps a fresh dead-pid dir (PID-reuse grace window)', () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'hermetic-gc-fresh-'));
+    // Dead pid but just created — must survive GC, else PID reuse could delete
+    // a dir whose original pid exited and got recycled to a live process.
+    const freshDead = path.join(tmp, 'gstack-hermetic-99999999-xyz');
+    fs.mkdirSync(freshDead);
+    gcStaleHermeticDirs(tmp);
+    expect(fs.existsSync(freshDead)).toBe(true);
+    fs.rmSync(tmp, { recursive: true, force: true });
+  });
+});
+
+describe('hermeticChildEnv composition', () => {
+  test('hermetic by default: redirects config dirs, drops contamination', () => {
+    // process.env in a real test run may carry CONDUCTOR_*/CLAUDECODE — the
+    // composition must scrub them and point at the singleton dirs.
+    const e = hermeticChildEnv({ GSTACK_HEADLESS: '1' });
+    const dirs = getHermeticDirs();
+    expect(e.CLAUDE_CONFIG_DIR).toBe(dirs.configDir);
+    expect(e.GSTACK_HOME).toBe(dirs.gstackHome);
+    expect(e.GSTACK_HEADLESS).toBe('1');
+    expect(e.CLAUDECODE).toBeUndefined();
+    expect(e.CONDUCTOR_WORKSPACE_PATH).toBeUndefined();
+  });
+
+  test('EVALS_HERMETIC=0: legacy passthrough of live process.env', () => {
+    const prev = process.env.EVALS_HERMETIC;
+    try {
+      process.env.EVALS_HERMETIC = '0';
+      const e = hermeticChildEnv({ EXTRA: 'x' });
+      expect(e.PATH).toBe(process.env.PATH as string);
+      expect(e.EXTRA).toBe('x');
+      // No hermetic redirection in legacy mode.
+      expect(e.CLAUDE_CONFIG_DIR).toBe(process.env.CLAUDE_CONFIG_DIR as any);
+    } finally {
+      if (prev === undefined) delete process.env.EVALS_HERMETIC;
+      else process.env.EVALS_HERMETIC = prev;
+    }
+  });
+});
+
+afterAll(() => {
+  // The singleton's own exit hook handles runRoot; nothing else to clean.
+});
@@ -0,0 +1,276 @@
+/**
+ * Hermetic child environment for E2E test runners.
+ *
+ * Local E2E runs spawn `claude` (and codex/gemini/SDK) children that, until
+ * this module, inherited the operator's full session context: ~/.claude
+ * (user CLAUDE.md, .claude.json MCP servers incl. gbrain + Conductor,
+ * skills), ~/.gstack decision logs, and CONDUCTOR_-/CLAUDECODE-style env vars.
+ * CI was hermetic only by accident (fresh Docker /home/runner). This module
+ * makes local children see a CI-equivalent clean room by default.
+ *
+ *   operator shell (contaminated)            hermetic child env
+ *   ┌─────────────────────────────┐  buildHermeticEnv()
+ *   │ PATH, HOME, TMPDIR, ...     │── allowlist ─────────► kept
+ *   │ HTTP(S)_PROXY, SSL_CERT_*   │── allowlist ─────────► kept (network)
+ *   │ ANTHROPIC_API_KEY/BASE_URL/ │── named list ────────► kept (auth)
+ *   │   AUTH_TOKEN                │
+ *   │ GSTACK_ANTHROPIC_API_KEY    │── promotedEnv() ─────► ANTHROPIC_API_KEY
+ *   │ CONDUCTOR_*, CLAUDECODE,    │
+ *   │ CLAUDE_*, GSTACK_*, MCP_*,  │── dropped ───────────► ∅
+ *   │ GBRAIN_*, GH_TOKEN, ...     │
+ *   └─────────────────────────────┘
+ *      + per-runner extraAllow (codex: OpenAI vars; gemini: Google vars)
+ *      + CLAUDE_CONFIG_DIR=<runRoot>/.claude  GSTACK_HOME=<runRoot>/gstack-home
+ *      + per-test overrides spread LAST
+ *
+ * Escape hatch: EVALS_HERMETIC=0 restores the legacy contaminated env
+ * byte-identically (runners must also gate --strict-mcp-config on
+ * isHermeticEnabled() so the escape hatch restores args too).
+ *
+ * isHermeticEnabled() is evaluated at CALL time, never at module load —
+ * ESM hoists imports above any in-file `process.env.EVALS_HERMETIC = '0'`
+ * assignment, so a module-load-time read would silently ignore test pins.
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { promotedEnv } from '../../lib/conductor-env-shim';
+import { isProcessAlive } from '../../browse/src/error-handling';
+
+/** Exact env names a hermetic child keeps. Everything not listed (or matched
+ * by a prefix rule below) is dropped. */
+const ALLOW_EXACT = new Set([
+  // Process basics
+  'PATH', 'HOME', 'TMPDIR', 'TERM', 'COLORTERM', 'LANG', 'LC_ALL', 'SHELL',
+  'USER', 'LOGNAME', 'TZ', 'NODE_ENV', 'CI',
+  // Browser/runtime caches the child legitimately shares with the operator
+  'PLAYWRIGHT_BROWSERS_PATH',
+  // Network reachability — without these, children on proxied networks can't
+  // reach the Anthropic API at all
+  'HTTP_PROXY', 'HTTPS_PROXY', 'NO_PROXY',
+  'http_proxy', 'https_proxy', 'no_proxy',
+  'SSL_CERT_FILE', 'SSL_CERT_DIR', 'NODE_EXTRA_CA_CERTS',
+  // Auth — named, NOT the broad ANTHROPIC_* prefix: a prefix rule would
+  // smuggle model/beta/debug knobs that change eval behavior
+  'ANTHROPIC_API_KEY',   // the auth credential evals require
+  'ANTHROPIC_BASE_URL',  // API endpoint override (corp proxies)
+  'ANTHROPIC_AUTH_TOKEN', // bearer-token auth variant
+]);
+
+/** Prefix rules: eval-harness knobs + CI metadata. Deliberately NOT here:
+ * CONDUCTOR_* / CLAUDE_* (incl. CLAUDECODE, CLAUDE_CODE_ENTRYPOINT) /
+ * GSTACK_* / MCP_* / GBRAIN_* — session-context contamination; and operator
+ * credentials (GH_TOKEN, SSH_AUTH_SOCK, GIT_*, OPENAI_API_KEY,
+ * VOYAGE_API_KEY) — CI doesn't have them and eval children have no business
+ * using them. A test that legitimately needs one opts in via its own env
+ * override; a provider runner (codex/gemini) re-admits its auth vars via
+ * opts.extraAllow. */
+const ALLOW_PREFIXES = ['EVALS_', 'GITHUB_'];
+
+export interface HermeticEnvOpts {
+  /** Per-runner additional allowed names (exact match) or prefixes (entries
+   * ending in '*'). Example: codex runner passes ['OPENAI_API_KEY', 'CODEX_*']. */
+  extraAllow?: string[];
+}
+
+/** EVALS_HERMETIC !== '0'. Read at call time (see module doc — ESM hoist). */
+export function isHermeticEnabled(env: NodeJS.ProcessEnv = process.env): boolean {
+  return env.EVALS_HERMETIC !== '0';
+}
+
+/**
+ * Pure allowlist scrub. No I/O. Overrides spread LAST so per-test env
+ * (GSTACK_HOME, CONDUCTOR_WORKSPACE_PATH, GSTACK_HEADLESS opt-out) always
+ * wins over the scrub — that is the documented re-contamination escape and
+ * the wiring tripwire forbids passing raw process.env through it.
+ */
+export function buildHermeticEnv(
+  base: NodeJS.ProcessEnv,
+  hermeticVars: Record<string, string>,
+  overrides?: Record<string, string | undefined>,
+  opts?: HermeticEnvOpts,
+): Record<string, string> {
+  if (!isHermeticEnabled(base)) {
+    // Escape hatch: byte-identical to the legacy spread.
+    const legacy: Record<string, string> = {};
+    for (const [k, v] of Object.entries(base)) if (v !== undefined) legacy[k] = v;
+    for (const [k, v] of Object.entries(overrides ?? {})) if (v !== undefined) legacy[k] = v;
+    return legacy;
+  }
+
+  const promoted = promotedEnv(base);
+  const extraExact = new Set<string>();
+  const extraPrefixes: string[] = [];
+  for (const entry of opts?.extraAllow ?? []) {
+    if (entry.endsWith('*')) extraPrefixes.push(entry.slice(0, -1));
+    else extraExact.add(entry);
+  }
+
+  const out: Record<string, string> = {};
+  for (const [k, v] of Object.entries(promoted)) {
+    if (v === undefined) continue;
+    const allowed =
+      ALLOW_EXACT.has(k) ||
+      extraExact.has(k) ||
+      ALLOW_PREFIXES.some((p) => k.startsWith(p)) ||
+      extraPrefixes.some((p) => k.startsWith(p));
+    if (allowed) out[k] = v;
+  }
+  if (!out.TERM) out.TERM = 'xterm-256color';
+  Object.assign(out, hermeticVars);
+  for (const [k, v] of Object.entries(overrides ?? {})) if (v !== undefined) out[k] = v;
+  return out;
+}
+
+export interface SeedConfigOpts {
+  /** When undefined (operator has no key exported), customApiKeyResponses is
+   * omitted — the child fails auth exactly as it would today, no throw here. */
+  apiKey: string | undefined;
+  trustedDirs: string[];
+}
+
+/**
+ * Minimal $CLAUDE_CONFIG_DIR/.claude.json for fresh-config children.
+ *
+ * Empirically verified 2026-06-12 on claude 2.1.175: PRINT MODE (`claude -p`)
+ * with ANTHROPIC_API_KEY needs NO seed at all — a fresh empty config dir ran
+ * non-interactively (exit 0, real cost billed to the key). The seed exists
+ * for the PTY path, where first-run TUI prompts DO appear:
+ * - hasCompletedOnboarding: suppresses the onboarding flow
+ * - customApiKeyResponses.approved: suppresses the "use this API key?"
+ *   prompt; entries are the key's LAST 20 CHARS (shape verified against a
+ *   real ~/.claude.json)
+ * - projects[dir].hasTrustDialogAccepted: pre-trusts repo-cwd PTY sessions
+ *   (the pty-runner's 15s trust-watcher remains as fallback for temp cwds)
+ * bypassPermissionsModeAccepted was considered and dropped: absent from a
+ * real config even though --dangerously-skip-permissions is in daily use.
+ */
+export function buildSeedConfig(opts: SeedConfigOpts): Record<string, unknown> {
+  const seed: Record<string, unknown> = {
+    hasCompletedOnboarding: true,
+    projects: Object.fromEntries(
+      opts.trustedDirs.map((dir) => [
+        dir,
+        { hasTrustDialogAccepted: true, hasCompletedProjectOnboarding: true },
+      ]),
+    ),
+  };
+  if (opts.apiKey) {
+    seed.customApiKeyResponses = { approved: [opts.apiKey.slice(-20)] };
+  }
+  return seed;
+}
+
+export interface HermeticDirs {
+  /** Ends in `/.claude` — load-bearing: extractPlanFilePath in
+   * claude-pty-runner.ts:191 anchors plan-file paths on `.claude/plans/`
+   * under a /var|/tmp prefix. Renaming this segment breaks PTY plan tests. */
+  configDir: string;
+  gstackHome: string;
+  runRoot: string;
+}
+
+const DIR_PREFIX = 'gstack-hermetic-';
+
+let cachedDirs: HermeticDirs | null = null;
+
+/** Repo root for the trusted-dir seed: test files live in <root>/test/helpers. */
+function repoRoot(): string {
+  return path.resolve(__dirname, '..', '..');
+}
+
+/**
+ * Sync memoized per-process singleton — intentionally NO async gap between
+ * the cache check and create+seed, so concurrent first calls under
+ * `bun test --concurrent` cannot double-create or observe a half-seeded dir.
+ * Shared across all tests in the process: that matches CI's within-job
+ * shared /home/runner (operator isolation, not per-test isolation).
+ */
+export function getHermeticDirs(): HermeticDirs {
+  if (cachedDirs) return cachedDirs;
+
+  gcStaleHermeticDirs();
+
+  // Embed our pid so the GC of future processes can check liveness.
+  const runRoot = fs.mkdtempSync(path.join(os.tmpdir(), `${DIR_PREFIX}${process.pid}-`));
+  const configDir = path.join(runRoot, '.claude');
+  const gstackHome = path.join(runRoot, 'gstack-home');
+
+  // A half-seeded config dir means children hang on first-run prompts until
+  // the test timeout — far worse than failing loudly here. So we throw on
+  // failure, but tear down the partial dir first: an unseeded runRoot named
+  // with our (alive) pid would be skipped by this process's GC and leak until
+  // process exit, so remove it before rethrowing.
+  try {
+    fs.mkdirSync(configDir, { recursive: true });
+    fs.mkdirSync(gstackHome, { recursive: true });
+    const seed = buildSeedConfig({
+      apiKey: process.env.ANTHROPIC_API_KEY ?? process.env.GSTACK_ANTHROPIC_API_KEY,
+      trustedDirs: [repoRoot()],
+    });
+    fs.writeFileSync(path.join(configDir, '.claude.json'), JSON.stringify(seed, null, 2));
+  } catch (err) {
+    try { fs.rmSync(runRoot, { recursive: true, force: true }); } catch { /* best-effort */ }
+    throw err;
+  }
+
+  process.on('exit', () => {
+    // Exit handlers cannot await: sync best-effort removal only. Anything
+    // left behind is reclaimed by the next process's pid-aware GC.
+    try { fs.rmSync(runRoot, { recursive: true, force: true }); } catch { /* GC reclaims */ }
+  });
+
+  cachedDirs = { configDir, gstackHome, runRoot };
+  return cachedDirs;
+}
+
+/** A dir younger than this is never GC'd even if its pid looks dead — guards
+ * against PID reuse deleting a freshly-created dir whose original pid exited
+ * and was recycled to an unrelated live process between create and GC. */
+const GC_MIN_AGE_MS = 60 * 60 * 1000; // 1h
+
+/**
+ * Reclaim leftovers from crashed runs. Two signals, both required: the
+ * embedded pid is dead AND the dir is older than GC_MIN_AGE_MS. Pid-alone
+ * would risk PID-reuse false-deletes of live dirs; age-alone would delete a
+ * live >24h eval run's config out from under it. Exported for tests.
+ */
+export function gcStaleHermeticDirs(tmpDir: string = os.tmpdir()): void {
+  let entries: string[];
+  try { entries = fs.readdirSync(tmpDir); } catch { return; }
+  const now = Date.now();
+  for (const name of entries) {
+    if (!name.startsWith(DIR_PREFIX)) continue;
+    const pidStr = name.slice(DIR_PREFIX.length).split('-')[0];
+    const pid = Number(pidStr);
+    if (!Number.isInteger(pid) || pid <= 0) continue;
+    if (pid === process.pid || isProcessAlive(pid)) continue;
+    const full = path.join(tmpDir, name);
+    try {
+      if (now - fs.statSync(full).mtimeMs < GC_MIN_AGE_MS) continue; // too fresh
+    } catch { continue; } // vanished or unreadable — leave it
+    try { fs.rmSync(full, { recursive: true, force: true }); } catch { /* best-effort */ }
+  }
+}
+
+/**
+ * The composition runners use: scrub process.env, point the child at the
+ * singleton hermetic dirs, apply per-test overrides last. Returns the legacy
+ * env untouched when EVALS_HERMETIC=0 (and skips dir creation entirely).
+ */
+export function hermeticChildEnv(
+  overrides?: Record<string, string | undefined>,
+  opts?: HermeticEnvOpts,
+): Record<string, string> {
+  if (!isHermeticEnabled()) {
+    return buildHermeticEnv(process.env, {}, overrides, opts);
+  }
+  const dirs = getHermeticDirs();
+  return buildHermeticEnv(
+    process.env,
+    { CLAUDE_CONFIG_DIR: dirs.configDir, GSTACK_HOME: dirs.gstackHome },
+    overrides,
+    opts,
+  );
+}
@@ -210,7 +210,11 @@ const MONOLITH_INVARIANTS: ParityInvariant[] = [
    skill: 'review',
    mustContain: ['confidence', 'P1', 'P2'],
    mustHaveHeadings: ['## Preamble', '## When to invoke'],
-    maxSizeRatio: 1.05,
+    // The adversarial step swapped its bare `command -v codex` check for the shared
+    // codexPreflight() block (install + auth tri-state + CODEX_MODE branch prose),
+    // landing ~6.3% over the v1.53.0.0 baseline. Intentional: it adds proper
+    // not-installed vs not-authed handling, not slop.
+    maxSizeRatio: 1.08,
    minBytes: 70_000,
  },
  {
@@ -10,6 +10,7 @@ import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
 import { getProjectEvalDir } from './eval-store';
+import { hermeticChildEnv, isHermeticEnabled } from './hermetic-env';

 const GSTACK_DEV_DIR = path.join(os.homedir(), '.gstack-dev');
 const HEARTBEAT_PATH = path.join(GSTACK_DEV_DIR, 'e2e-live.json'); // heartbeat stays global
@@ -167,6 +168,10 @@ export async function runSkillTest(options: {
    '--max-turns', String(maxTurns),
    '--allowed-tools', ...allowedTools,
  ];
+  // Hermetic children get zero MCP servers (no --mcp-config is passed).
+  // Gated on the same call-time check as the env scrub so EVALS_HERMETIC=0
+  // restores operator MCP along with the operator env.
+  if (isHermeticEnabled()) args.push('--strict-mcp-config');

  // Write prompt to a temp file OUTSIDE workingDirectory to avoid race conditions
  // where afterAll cleanup deletes the dir before cat reads the file (especially
@@ -176,11 +181,14 @@ export async function runSkillTest(options: {

  const proc = Bun.spawn(['sh', '-c', `cat "${promptFile}" | claude ${args.map(a => `"${a}"`).join(' ')}`], {
    cwd: workingDirectory,
+    // Hermetic by default (see test/helpers/hermetic-env.ts): operator
+    // session context (CONDUCTOR_*, CLAUDECODE, ~/.claude config, ~/.gstack)
+    // never reaches the child; EVALS_HERMETIC=0 restores the legacy env.
    // Default GSTACK_HEADLESS=1 so eval/E2E runs classify as headless (BLOCK on an
    // AskUserQuestion failure rather than emit a prose question no human reads). A
    // suite exercising the INTERACTIVE prose-fallback path opts out by passing
    // `env: { GSTACK_HEADLESS: '' }` — extraEnv wins because it spreads last.
-    env: { ...process.env, GSTACK_HEADLESS: '1', ...extraEnv },
+    env: hermeticChildEnv({ GSTACK_HEADLESS: '1', ...extraEnv }),
    stdout: 'pipe',
    stderr: 'pipe',
  });
@@ -36,6 +36,11 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
  'browse-basic':    ['browse/src/**', 'browse/test/test-server.ts'],
  'browse-snapshot': ['browse/src/**', 'browse/test/test-server.ts'],

+  // Hermetic isolation canaries (hermetic-env.ts is also a GLOBAL touchfile;
+  // these entries exist so the canaries themselves stay tier-classified)
+  'hermetic-canary':   ['test/helpers/hermetic-env.ts', 'test/helpers/session-runner.ts', 'test/skill-e2e-hermetic-canary.test.ts', 'lib/conductor-env-shim.ts'],
+  'hermetic-sentinel': ['test/helpers/hermetic-env.ts', 'test/helpers/session-runner.ts', 'test/skill-e2e-hermetic-canary.test.ts', 'lib/conductor-env-shim.ts'],
+
  // SKILL.md setup + preamble (depend on ROOT SKILL.md + gen-skill-docs)
  'skillmd-setup-discovery':  ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
  'skillmd-no-local-binary':  ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
@@ -111,7 +116,12 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
  // written a never-ask preference, AUQ should still auto-decide rather than
  // surfacing the question. Touches the question-tuning + preference
  // infrastructure plus the resolvers that own the AUTO_DECIDE preamble.
-  'auto-decide-preserved':        ['scripts/resolvers/question-tuning.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'plan-ceo-review/**', 'bin/gstack-question-preference', 'bin/gstack-config', 'bin/gstack-slug', 'test/helpers/claude-pty-runner.ts'],
+  'auto-decide-preserved':        ['scripts/resolvers/question-tuning.ts', 'scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-preamble-bash.ts', 'scripts/resolvers/preamble/generate-completion-status.ts', 'plan-ceo-review/**', 'bin/gstack-question-preference', 'bin/gstack-config', 'bin/gstack-slug', 'hosts/claude/hooks/question-preference-hook.ts', 'lib/is-conductor.ts', 'test/helpers/claude-pty-runner.ts'],
+
+  // Conductor → prose decision brief (Conductor signal makes prose the default;
+  // the PreToolUse hook denies the flaky tool). Touches the resolver that owns
+  // the Conductor rule, the preamble signal, the hook, and the detection helper.
+  'conductor-prose':              ['scripts/resolvers/preamble/generate-ask-user-format.ts', 'scripts/resolvers/preamble/generate-preamble-bash.ts', 'scripts/resolvers/preamble.ts', 'plan-eng-review/**', 'hosts/claude/hooks/question-preference-hook.ts', 'lib/is-conductor.ts', 'test/helpers/claude-pty-runner.ts', 'test/skill-e2e-conductor-prose.test.ts'],

  // Real-PTY E2E batch (#6 new tests on the harness).
  // Each one tests behavior the SDK harness can't observe (rendered TTY,
@@ -291,6 +301,11 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
  'design-shotgun-session':         ['design-shotgun/**', 'scripts/resolvers/design.ts'],
  'design-shotgun-full':            ['design-shotgun/**', 'design/src/**', 'browse/src/**'],

+  // /diagram (diagram-render bundle consumers). Triplet = deterministic
+  // functional (gate); authoring quality = LLM-judged benchmark (periodic).
+  'diagram-triplet':            ['diagram/**', 'lib/diagram-render/**', 'browse/src/write-commands.ts', 'browse/src/read-commands.ts'],
+  'diagram-authoring-quality':  ['diagram/**', 'lib/diagram-render/**', 'test/helpers/llm-judge.ts'],
+
  // gstack-upgrade
  'gstack-upgrade-happy-path': ['gstack-upgrade/**'],

@@ -435,6 +450,11 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
  'browse-basic': 'gate',
  'browse-snapshot': 'gate',

+  // Hermetic isolation — gate (deterministic env/config assertions; if the
+  // clean room breaks, every other eval's signal is contaminated)
+  'hermetic-canary': 'gate',
+  'hermetic-sentinel': 'gate',
+
  // SKILL.md setup — gate (if setup breaks, no skill works)
  'skillmd-setup-discovery': 'gate',
  'skillmd-no-local-binary': 'gate',
@@ -508,6 +528,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
  // v1.21+ auto-mode regression tests
  'office-hours-auto-mode': 'gate',
  'auto-decide-preserved': 'periodic',
+  'conductor-prose': 'periodic',
  'e2e-harness-audit': 'gate',

  // Real-PTY E2E batch — tier classification:
@@ -659,6 +680,10 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
  'design-shotgun-session': 'gate',
  'design-shotgun-full': 'periodic',

+  // /diagram — triplet is deterministic functional, judge is a quality benchmark
+  'diagram-triplet': 'gate',
+  'diagram-authoring-quality': 'periodic',
+
  // gstack-upgrade
  'gstack-upgrade-happy-path': 'gate',

@@ -779,6 +804,7 @@ export const LLM_JUDGE_TOUCHFILES: Record<string, string[]> = {
 */
 export const GLOBAL_TOUCHFILES = [
  'test/helpers/session-runner.ts',  // All E2E tests use this runner
+  'test/helpers/hermetic-env.ts',    // Changes every E2E child's environment
  'test/helpers/eval-store.ts',      // All E2E tests store results here
  'test/helpers/touchfiles.ts',      // Self-referential — reclassifying wrong is dangerous
 ];