From 371a7e684ad54e1d751ccd6005977747f251184f Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 24 Apr 2026 01:24:12 -0700 Subject: [PATCH] =?UTF-8?q?fix(test):=20E2E=20privacy=20gate=20=E2=80=94?= =?UTF-8?q?=20ambient=20env=20+=20skill-file=20prompt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes to get the E2E actually running end-to-end (first attempt failed at the SDK auth step, second at the assertion step): 1. Don't pass an explicit `env:` object to runAgentSdkTest. The SDK's auth pipeline misses ANTHROPIC_API_KEY when env is supplied as an object (verified against the plan-mode-no-op test, which passes no env and auths cleanly). Mutate process.env before the call instead, and restore the originals in finally so other tests don't inherit the ambient mutation. 2. The "Run /learn with no arguments" user prompt was too narrow — the model reduced it to a direct action and skipped the preamble privacy-gate directives entirely, so zero AskUserQuestions fired. Mirror the plan-mode-no-op pattern: point the model at the skill file on disk and ask it to follow every preamble directive. Bumped maxTurns from 6 to 10 to give the preamble room to execute. Verified both tests pass under `EVALS=1 EVALS_TIER=periodic bun test test/skill-e2e-brain-privacy-gate.test.ts` against a real ANTHROPIC_API_KEY. Cost per run: ~$0.30-$0.50 per test. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/skill-e2e-brain-privacy-gate.test.ts | 55 +++++++++++++++++------ 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/test/skill-e2e-brain-privacy-gate.test.ts b/test/skill-e2e-brain-privacy-gate.test.ts index e4dd5e26..491e27b2 100644 --- a/test/skill-e2e-brain-privacy-gate.test.ts +++ b/test/skill-e2e-brain-privacy-gate.test.ts @@ -59,22 +59,38 @@ describeE2E('gbrain-sync privacy gate fires once via preamble', () => { const askUserQuestions: Array<{ input: Record }> = []; const binary = resolveClaudeBinary(); + // Ambient env mutations — restored in finally so other tests in the file + // don't inherit them. + const origGstackHome = process.env.GSTACK_HOME; + const origPath = process.env.PATH; + process.env.GSTACK_HOME = gstackHome; + process.env.PATH = `${fakeBinDir}:${process.env.PATH ?? '/usr/bin:/bin:/opt/homebrew/bin'}`; + try { - // Pick a small skill with the preamble. `/learn` is read-only + - // short, which keeps the token cost down. The preamble fires - // regardless of which skill we pick. + // Pick a small skill with the preamble and load it via Read to force + // the model to execute every preamble directive. A narrow "run /learn" + // prompt often gets reduced to a direct action, skipping the preamble + // gates. Mirror the plan-mode-no-op test pattern: ask the model to + // follow the skill's instructions in full. + const learnSkill = path.resolve( + import.meta.dir, + '..', + 'learn', + 'SKILL.md' + ); await runAgentSdkTest({ systemPrompt: { type: 'preset', preset: 'claude_code' }, userPrompt: - 'Run /learn with no arguments. Just report the learnings count and answer any AskUserQuestion that fires.', + `Read the skill file at ${learnSkill} and follow its instructions from the top, including every preamble directive. Execute every bash block. If any AskUserQuestion fires, present it.`, workingDirectory: gstackHome, - maxTurns: 6, + maxTurns: 10, allowedTools: ['Read', 'Grep', 'Glob', 'Bash'], - env: { - GSTACK_HOME: gstackHome, - // Prepend the fake gbrain to PATH so the preamble's detection wins. - PATH: `${fakeBinDir}:${process.env.PATH ?? '/usr/bin:/bin:/opt/homebrew/bin'}`, - }, + // NOTE: do NOT pass `env:` here. When the Agent SDK gets an explicit + // env object, its auth pipeline doesn't pick up ANTHROPIC_API_KEY the + // same way as when env is undefined (SDK-internal detail, verified + // against the plan-mode-no-op test which passes no env and auths + // cleanly). Instead, mutate process.env before the call so the SDK + // inherits our overrides ambiently. ...(binary ? { pathToClaudeCodeExecutable: binary } : {}), canUseTool: async (toolName, input) => { if (toolName === 'AskUserQuestion') { @@ -125,6 +141,11 @@ describeE2E('gbrain-sync privacy gate fires once via preamble', () => { // (The preamble is supposed to be idempotent within a session.) expect(privacyQuestions.length).toBe(1); } finally { + // Restore ambient env before other tests. + if (origGstackHome === undefined) delete process.env.GSTACK_HOME; + else process.env.GSTACK_HOME = origGstackHome; + if (origPath === undefined) delete process.env.PATH; + else process.env.PATH = origPath; fs.rmSync(gstackHome, { recursive: true, force: true }); fs.rmSync(fakeBinDir, { recursive: true, force: true }); } @@ -150,6 +171,12 @@ describeE2E('gbrain-sync privacy gate fires once via preamble', () => { const askUserQuestions: Array<{ input: Record }> = []; const binary = resolveClaudeBinary(); + // Ambient env mutations (see note on the first test). + const origGstackHome = process.env.GSTACK_HOME; + const origPath = process.env.PATH; + process.env.GSTACK_HOME = gstackHome; + process.env.PATH = `${fakeBinDir}:${process.env.PATH ?? '/usr/bin:/bin:/opt/homebrew/bin'}`; + try { await runAgentSdkTest({ systemPrompt: { type: 'preset', preset: 'claude_code' }, @@ -158,10 +185,6 @@ describeE2E('gbrain-sync privacy gate fires once via preamble', () => { workingDirectory: gstackHome, maxTurns: 4, allowedTools: ['Read', 'Grep', 'Glob', 'Bash'], - env: { - GSTACK_HOME: gstackHome, - PATH: `${fakeBinDir}:${process.env.PATH ?? '/usr/bin:/bin:/opt/homebrew/bin'}`, - }, ...(binary ? { pathToClaudeCodeExecutable: binary } : {}), canUseTool: async (toolName, input) => { if (toolName === 'AskUserQuestion') { @@ -193,6 +216,10 @@ describeE2E('gbrain-sync privacy gate fires once via preamble', () => { }); expect(privacyQuestions.length).toBe(0); } finally { + if (origGstackHome === undefined) delete process.env.GSTACK_HOME; + else process.env.GSTACK_HOME = origGstackHome; + if (origPath === undefined) delete process.env.PATH; + else process.env.PATH = origPath; fs.rmSync(gstackHome, { recursive: true, force: true }); fs.rmSync(fakeBinDir, { recursive: true, force: true }); }