mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-01 19:25:10 +02:00
feat: wire runId + testName + diagnostics through all E2E tests
Generate per-session runId, pass testName + runId to every runSkillTest() call, wire exit_reason/timeout_at_turn/last_tool_call through recordE2E(). Add eval:watch script entry to package.json. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+2
-1
@@ -20,7 +20,8 @@
|
||||
"start": "bun run browse/src/server.ts",
|
||||
"eval:list": "bun run scripts/eval-list.ts",
|
||||
"eval:compare": "bun run scripts/eval-compare.ts",
|
||||
"eval:summary": "bun run scripts/eval-summary.ts"
|
||||
"eval:summary": "bun run scripts/eval-summary.ts",
|
||||
"eval:watch": "bun run scripts/eval-watch.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"playwright": "^1.58.2",
|
||||
|
||||
Reference in New Issue
Block a user