mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-07 05:56:41 +02:00
620f5dbaea
Applied from the adversarial subagent pass during /review on this branch: - test/benchmark-cli.test.ts — new "NOT READY path fires when auth env vars are stripped" test. The default dry-run test always showed OK on dev machines with auth, hiding regressions in the remediation-hint branch. Stripped env (no auth vars, HOME→empty tmpdir) now force- exercises gpt + gemini NOT READY paths and asserts every NOT READY line includes a concrete remediation hint (install/login/export). (claude adapter's os.homedir() call is Bun-cached; the 2-of-3 adapter coverage is sufficient to exercise the branch.) - test/taste-engine.test.ts — session-cap test rewritten to seed the profile with 50 entries + one real CLI call, instead of 55 sequential subprocess spawns. Same coverage (FIFO eviction at the boundary), ~5s faster CI time. Also pins first-casing-wins on the Geist/GEIST merge assertion — bumpPref() keeps the first-arrival casing, so the test documents that policy. - test/skill-e2e-benchmark-providers.test.ts — workdir creation moved from module-load into beforeAll, cleanup added in afterAll. Previous shape leaked a /tmp/bench-e2e-* dir every CI run. - test/publish-dry-run.test.ts — removed unused empty test/helpers mkdirSync from the sandbox setup. The bin doesn't import from there, so the empty dir was a footgun for future maintainers. - test/helpers/providers/gpt.ts — expanded the inline comment on `--skip-git-repo-check` to explicitly note that `-s read-only` is now load-bearing safety (the trust prompt was the secondary boundary; removing read-only while keeping skip-git-repo-check would be unsafe). Net: 45 passing tests (was 44), session-cap test 5s faster, one real regression surface covered that didn't exist before. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
128 lines
5.0 KiB
TypeScript
128 lines
5.0 KiB
TypeScript
import type { ProviderAdapter, RunOpts, RunResult, AvailabilityCheck } from './types';
|
|
import { estimateCostUsd } from '../pricing';
|
|
import { execFileSync, spawnSync } from 'child_process';
|
|
import * as fs from 'fs';
|
|
import * as path from 'path';
|
|
import * as os from 'os';
|
|
|
|
/**
|
|
* GPT adapter — wraps the OpenAI `codex` CLI (codex exec with --json output).
|
|
*
|
|
* Codex uses ~/.codex/ for auth (not OPENAI_API_KEY). The --json flag emits
|
|
* JSONL events; we parse `turn.completed` for usage and `agent_message` / etc.
|
|
* for output aggregation.
|
|
*/
|
|
export class GptAdapter implements ProviderAdapter {
|
|
readonly name = 'gpt';
|
|
readonly family = 'gpt' as const;
|
|
|
|
async available(): Promise<AvailabilityCheck> {
|
|
const res = spawnSync('sh', ['-c', 'command -v codex'], { timeout: 2000 });
|
|
if (res.status !== 0) {
|
|
return { ok: false, reason: 'codex CLI not found on PATH. Install: npm i -g @openai/codex' };
|
|
}
|
|
// Auth sniff: ~/.codex/ should contain auth state after `codex login`
|
|
const codexDir = path.join(os.homedir(), '.codex');
|
|
if (!fs.existsSync(codexDir)) {
|
|
return { ok: false, reason: 'No ~/.codex/ found. Run `codex login` to authenticate via ChatGPT.' };
|
|
}
|
|
return { ok: true };
|
|
}
|
|
|
|
async run(opts: RunOpts): Promise<RunResult> {
|
|
const start = Date.now();
|
|
// `-s read-only` is load-bearing safety. With `--skip-git-repo-check` we
|
|
// bypass codex's interactive trust prompt for unknown directories (benchmarks
|
|
// often run in temp dirs / non-git paths), so the read-only sandbox is now
|
|
// the only boundary preventing codex from mutating the workdir. If you ever
|
|
// remove `-s read-only`, drop `--skip-git-repo-check` too.
|
|
const args = ['exec', opts.prompt, '-C', opts.workdir, '-s', 'read-only', '--skip-git-repo-check', '--json'];
|
|
if (opts.model) args.push('-m', opts.model);
|
|
if (opts.extraArgs) args.push(...opts.extraArgs);
|
|
|
|
try {
|
|
const out = execFileSync('codex', args, {
|
|
cwd: opts.workdir,
|
|
timeout: opts.timeoutMs,
|
|
encoding: 'utf-8',
|
|
maxBuffer: 32 * 1024 * 1024,
|
|
});
|
|
const parsed = this.parseJsonl(out);
|
|
return {
|
|
output: parsed.output,
|
|
tokens: parsed.tokens,
|
|
durationMs: Date.now() - start,
|
|
toolCalls: parsed.toolCalls,
|
|
modelUsed: parsed.modelUsed || opts.model || 'gpt-5.4',
|
|
};
|
|
} catch (err: unknown) {
|
|
const durationMs = Date.now() - start;
|
|
const e = err as { code?: string; stderr?: Buffer; signal?: string; message?: string };
|
|
const stderr = e.stderr?.toString() ?? '';
|
|
if (e.signal === 'SIGTERM' || e.code === 'ETIMEDOUT') {
|
|
return this.emptyResult(durationMs, { code: 'timeout', reason: `exceeded ${opts.timeoutMs}ms` }, opts.model);
|
|
}
|
|
if (/unauthorized|auth|login/i.test(stderr)) {
|
|
return this.emptyResult(durationMs, { code: 'auth', reason: stderr.slice(0, 400) }, opts.model);
|
|
}
|
|
if (/rate[- ]?limit|429/i.test(stderr)) {
|
|
return this.emptyResult(durationMs, { code: 'rate_limit', reason: stderr.slice(0, 400) }, opts.model);
|
|
}
|
|
return this.emptyResult(durationMs, { code: 'unknown', reason: (e.message ?? stderr ?? 'unknown').slice(0, 400) }, opts.model);
|
|
}
|
|
}
|
|
|
|
estimateCost(tokens: { input: number; output: number; cached?: number }, model?: string): number {
|
|
return estimateCostUsd(tokens, model ?? 'gpt-5.4');
|
|
}
|
|
|
|
/**
|
|
* Parse codex exec --json JSONL stream.
|
|
* Key events:
|
|
* - item.completed with item.type === 'agent_message' → text output
|
|
* - item.completed with item.type === 'command_execution' → tool call
|
|
* - turn.completed → usage.input_tokens, usage.output_tokens
|
|
* - thread.started → session id (not used here)
|
|
*/
|
|
private parseJsonl(raw: string): { output: string; tokens: { input: number; output: number }; toolCalls: number; modelUsed?: string } {
|
|
let output = '';
|
|
let input = 0;
|
|
let out = 0;
|
|
let toolCalls = 0;
|
|
let modelUsed: string | undefined;
|
|
for (const line of raw.split('\n')) {
|
|
const s = line.trim();
|
|
if (!s) continue;
|
|
try {
|
|
const obj = JSON.parse(s);
|
|
if (obj.type === 'item.completed' && obj.item) {
|
|
if (obj.item.type === 'agent_message' && typeof obj.item.text === 'string') {
|
|
output += (output ? '\n' : '') + obj.item.text;
|
|
} else if (obj.item.type === 'command_execution') {
|
|
toolCalls += 1;
|
|
}
|
|
} else if (obj.type === 'turn.completed') {
|
|
const u = obj.usage ?? {};
|
|
input += u.input_tokens ?? 0;
|
|
out += u.output_tokens ?? 0;
|
|
if (obj.model) modelUsed = obj.model;
|
|
}
|
|
} catch {
|
|
// skip malformed lines — codex stderr can leak in
|
|
}
|
|
}
|
|
return { output, tokens: { input, output: out }, toolCalls, modelUsed };
|
|
}
|
|
|
|
private emptyResult(durationMs: number, error: RunResult['error'], model?: string): RunResult {
|
|
return {
|
|
output: '',
|
|
tokens: { input: 0, output: 0 },
|
|
durationMs,
|
|
toolCalls: 0,
|
|
modelUsed: model ?? 'gpt-5.4',
|
|
error,
|
|
};
|
|
}
|
|
}
|