mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-30 14:59:41 +02:00
feat: Codex E2E test harness — verify skills work on Codex CLI
New test infrastructure: - CodexSessionRunner: spawns codex exec, parses JSONL stream, returns structured results (output, reasoning, toolCalls, tokens) - JSONL parser ported from Python (codex/SKILL.md.tmpl) to TypeScript - Temp HOME skill installation for Codex discovery testing E2E tests (gated behind EVALS=1 + codex + OPENAI_API_KEY): - codex-discover-skill: installs skill, verifies Codex finds it - codex-review-findings: runs gstack-review via Codex, validates output Integrates with existing eval infrastructure: - Diff-based test selection via touchfiles - Eval persistence via EvalCollector - bun run test:codex / test:codex:all convenience scripts Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+7
-5
@@ -12,11 +12,13 @@
|
||||
"gen:skill-docs": "bun run scripts/gen-skill-docs.ts",
|
||||
"dev": "bun run browse/src/cli.ts",
|
||||
"server": "bun run browse/src/server.ts",
|
||||
"test": "bun test browse/test/ test/ --ignore test/skill-e2e.test.ts --ignore test/skill-llm-eval.test.ts --ignore test/skill-routing-e2e.test.ts",
|
||||
"test:evals": "EVALS=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts test/skill-routing-e2e.test.ts",
|
||||
"test:evals:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts test/skill-routing-e2e.test.ts",
|
||||
"test:e2e": "EVALS=1 bun test test/skill-e2e.test.ts test/skill-routing-e2e.test.ts",
|
||||
"test:e2e:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-e2e.test.ts test/skill-routing-e2e.test.ts",
|
||||
"test": "bun test browse/test/ test/ --ignore test/skill-e2e.test.ts --ignore test/skill-llm-eval.test.ts --ignore test/skill-routing-e2e.test.ts --ignore test/codex-e2e.test.ts",
|
||||
"test:evals": "EVALS=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts",
|
||||
"test:evals:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts",
|
||||
"test:e2e": "EVALS=1 bun test test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts",
|
||||
"test:e2e:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts",
|
||||
"test:codex": "EVALS=1 bun test test/codex-e2e.test.ts",
|
||||
"test:codex:all": "EVALS=1 EVALS_ALL=1 bun test test/codex-e2e.test.ts",
|
||||
"skill:check": "bun run scripts/skill-check.ts",
|
||||
"dev:skill": "bun run scripts/dev-skill.ts",
|
||||
"start": "bun run browse/src/server.ts",
|
||||
|
||||
Reference in New Issue
Block a user