diff --git a/CLAUDE.md b/CLAUDE.md index d75a7a45..9a7edc28 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -20,9 +20,11 @@ bun run eval:compare # compare two eval runs (auto-picks most recent) bun run eval:summary # aggregate stats across all eval runs ``` -`test:evals` requires `ANTHROPIC_API_KEY`. E2E tests stream progress in real-time -(tool-by-tool via `--output-format stream-json --verbose`). Results are persisted -to `~/.gstack-dev/evals/` with auto-comparison against the previous run. +`test:evals` requires `ANTHROPIC_API_KEY`. Codex E2E tests (`test/codex-e2e.test.ts`) +use Codex's own auth from `~/.codex/` config — no `OPENAI_API_KEY` env var needed. +E2E tests stream progress in real-time (tool-by-tool via `--output-format stream-json +--verbose`). Results are persisted to `~/.gstack-dev/evals/` with auto-comparison +against the previous run. **Diff-based test selection:** `test:evals` and `test:e2e` auto-select tests based on `git diff` against the base branch. Each test declares its file dependencies in diff --git a/test/codex-e2e.test.ts b/test/codex-e2e.test.ts index 91fc7abc..99fc46bb 100644 --- a/test/codex-e2e.test.ts +++ b/test/codex-e2e.test.ts @@ -7,7 +7,7 @@ * * Prerequisites: * - `codex` binary installed (npm install -g @openai/codex) - * - OPENAI_API_KEY env var set + * - Codex authenticated via ~/.codex/ config (no OPENAI_API_KEY env var needed) * - EVALS=1 env var set (same gate as Claude E2E tests) * * Skips gracefully when prerequisites are not met. @@ -34,11 +34,11 @@ const CODEX_AVAILABLE = (() => { } catch { return false; } })(); -const HAS_API_KEY = !!process.env.OPENAI_API_KEY; const evalsEnabled = !!process.env.EVALS; -// Skip all tests if codex is not available, API key is not set, or EVALS is not set -const SKIP = !CODEX_AVAILABLE || !HAS_API_KEY || !evalsEnabled; +// Skip all tests if codex is not available or EVALS is not set. +// Note: Codex uses its own auth from ~/.codex/ config — no OPENAI_API_KEY env var needed. +const SKIP = !CODEX_AVAILABLE || !evalsEnabled; const describeCodex = SKIP ? describe.skip : describe; @@ -47,8 +47,6 @@ if (!evalsEnabled) { // Silent — same as Claude E2E tests, EVALS=1 required } else if (!CODEX_AVAILABLE) { process.stderr.write('\nCodex E2E: SKIPPED — codex binary not found (install: npm i -g @openai/codex)\n'); -} else if (!HAS_API_KEY) { - process.stderr.write('\nCodex E2E: SKIPPED — OPENAI_API_KEY not set\n'); } // --- Diff-based test selection --- @@ -155,7 +153,7 @@ describeCodex('Codex E2E', () => { const result = await runCodexSkill({ skillDir, prompt: 'Run the gstack-review skill on this repository. Review the current branch diff and report your findings.', - timeoutMs: 300_000, + timeoutMs: 540_000, cwd: ROOT, skillName: 'gstack-review', }); @@ -183,5 +181,5 @@ describeCodex('Codex E2E', () => { outputLower.includes('p1') || outputLower.includes('p2'); expect(hasReviewContent).toBe(true); - }, 360_000); + }, 600_000); }); diff --git a/test/helpers/codex-session-runner.ts b/test/helpers/codex-session-runner.ts index c4d0256f..77b45020 100644 --- a/test/helpers/codex-session-runner.ts +++ b/test/helpers/codex-session-runner.ts @@ -164,10 +164,30 @@ export async function runCodexSkill(opts: { // Set up temp HOME with skill installed const tempHome = fs.mkdtempSync(path.join(os.tmpdir(), 'codex-e2e-')); + const realHome = os.homedir(); try { installSkillToTempHome(skillDir, name, tempHome); + // Symlink real Codex auth config so codex can authenticate from temp HOME. + // Codex stores auth in ~/.codex/ — we need the config but not the skills + // (we install our own test skills above). + const realCodexConfig = path.join(realHome, '.codex'); + const tempCodexDir = path.join(tempHome, '.codex'); + if (fs.existsSync(realCodexConfig)) { + // Copy auth-related files from real ~/.codex/ into temp ~/.codex/ + // (skills/ is already set up by installSkillToTempHome) + const entries = fs.readdirSync(realCodexConfig); + for (const entry of entries) { + if (entry === 'skills') continue; // don't clobber our test skills + const src = path.join(realCodexConfig, entry); + const dst = path.join(tempCodexDir, entry); + if (!fs.existsSync(dst)) { + fs.cpSync(src, dst, { recursive: true }); + } + } + } + // Build codex exec command const args = ['exec', prompt, '--json', '-s', sandbox];