mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 11:45:20 +02:00
fix: Codex E2E uses ~/.codex/ auth, not OPENAI_API_KEY
- Remove OPENAI_API_KEY gate from test prerequisites - Copy real ~/.codex/ auth config into temp HOME so codex can authenticate - Increase review test timeout to 540s (codex does thorough 60+ tool call reviews) - Document in CLAUDE.md that Codex uses its own auth config Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -20,9 +20,11 @@ bun run eval:compare # compare two eval runs (auto-picks most recent)
|
||||
bun run eval:summary # aggregate stats across all eval runs
|
||||
```
|
||||
|
||||
`test:evals` requires `ANTHROPIC_API_KEY`. E2E tests stream progress in real-time
|
||||
(tool-by-tool via `--output-format stream-json --verbose`). Results are persisted
|
||||
to `~/.gstack-dev/evals/` with auto-comparison against the previous run.
|
||||
`test:evals` requires `ANTHROPIC_API_KEY`. Codex E2E tests (`test/codex-e2e.test.ts`)
|
||||
use Codex's own auth from `~/.codex/` config — no `OPENAI_API_KEY` env var needed.
|
||||
E2E tests stream progress in real-time (tool-by-tool via `--output-format stream-json
|
||||
--verbose`). Results are persisted to `~/.gstack-dev/evals/` with auto-comparison
|
||||
against the previous run.
|
||||
|
||||
**Diff-based test selection:** `test:evals` and `test:e2e` auto-select tests based
|
||||
on `git diff` against the base branch. Each test declares its file dependencies in
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* Prerequisites:
|
||||
* - `codex` binary installed (npm install -g @openai/codex)
|
||||
* - OPENAI_API_KEY env var set
|
||||
* - Codex authenticated via ~/.codex/ config (no OPENAI_API_KEY env var needed)
|
||||
* - EVALS=1 env var set (same gate as Claude E2E tests)
|
||||
*
|
||||
* Skips gracefully when prerequisites are not met.
|
||||
@@ -34,11 +34,11 @@ const CODEX_AVAILABLE = (() => {
|
||||
} catch { return false; }
|
||||
})();
|
||||
|
||||
const HAS_API_KEY = !!process.env.OPENAI_API_KEY;
|
||||
const evalsEnabled = !!process.env.EVALS;
|
||||
|
||||
// Skip all tests if codex is not available, API key is not set, or EVALS is not set
|
||||
const SKIP = !CODEX_AVAILABLE || !HAS_API_KEY || !evalsEnabled;
|
||||
// Skip all tests if codex is not available or EVALS is not set.
|
||||
// Note: Codex uses its own auth from ~/.codex/ config — no OPENAI_API_KEY env var needed.
|
||||
const SKIP = !CODEX_AVAILABLE || !evalsEnabled;
|
||||
|
||||
const describeCodex = SKIP ? describe.skip : describe;
|
||||
|
||||
@@ -47,8 +47,6 @@ if (!evalsEnabled) {
|
||||
// Silent — same as Claude E2E tests, EVALS=1 required
|
||||
} else if (!CODEX_AVAILABLE) {
|
||||
process.stderr.write('\nCodex E2E: SKIPPED — codex binary not found (install: npm i -g @openai/codex)\n');
|
||||
} else if (!HAS_API_KEY) {
|
||||
process.stderr.write('\nCodex E2E: SKIPPED — OPENAI_API_KEY not set\n');
|
||||
}
|
||||
|
||||
// --- Diff-based test selection ---
|
||||
@@ -155,7 +153,7 @@ describeCodex('Codex E2E', () => {
|
||||
const result = await runCodexSkill({
|
||||
skillDir,
|
||||
prompt: 'Run the gstack-review skill on this repository. Review the current branch diff and report your findings.',
|
||||
timeoutMs: 300_000,
|
||||
timeoutMs: 540_000,
|
||||
cwd: ROOT,
|
||||
skillName: 'gstack-review',
|
||||
});
|
||||
@@ -183,5 +181,5 @@ describeCodex('Codex E2E', () => {
|
||||
outputLower.includes('p1') ||
|
||||
outputLower.includes('p2');
|
||||
expect(hasReviewContent).toBe(true);
|
||||
}, 360_000);
|
||||
}, 600_000);
|
||||
});
|
||||
|
||||
@@ -164,10 +164,30 @@ export async function runCodexSkill(opts: {
|
||||
|
||||
// Set up temp HOME with skill installed
|
||||
const tempHome = fs.mkdtempSync(path.join(os.tmpdir(), 'codex-e2e-'));
|
||||
const realHome = os.homedir();
|
||||
|
||||
try {
|
||||
installSkillToTempHome(skillDir, name, tempHome);
|
||||
|
||||
// Symlink real Codex auth config so codex can authenticate from temp HOME.
|
||||
// Codex stores auth in ~/.codex/ — we need the config but not the skills
|
||||
// (we install our own test skills above).
|
||||
const realCodexConfig = path.join(realHome, '.codex');
|
||||
const tempCodexDir = path.join(tempHome, '.codex');
|
||||
if (fs.existsSync(realCodexConfig)) {
|
||||
// Copy auth-related files from real ~/.codex/ into temp ~/.codex/
|
||||
// (skills/ is already set up by installSkillToTempHome)
|
||||
const entries = fs.readdirSync(realCodexConfig);
|
||||
for (const entry of entries) {
|
||||
if (entry === 'skills') continue; // don't clobber our test skills
|
||||
const src = path.join(realCodexConfig, entry);
|
||||
const dst = path.join(tempCodexDir, entry);
|
||||
if (!fs.existsSync(dst)) {
|
||||
fs.cpSync(src, dst, { recursive: true });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build codex exec command
|
||||
const args = ['exec', prompt, '--json', '-s', sandbox];
|
||||
|
||||
|
||||
Reference in New Issue
Block a user