mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-01 19:25:10 +02:00
feat: eval CLI tools + docs cleanup
Add eval:list, eval:compare, eval:summary CLI scripts for exploring eval history from ~/.gstack-dev/evals/. eval:compare reuses the shared comparison functions from eval-store.ts. - eval:list: sorted table with branch/tier/cost filters - eval:compare: thin wrapper around compareEvalResults + formatComparison - eval:summary: aggregate stats, flaky test detection, branch rankings - Remove unused @anthropic-ai/claude-agent-sdk from devDependencies - Update CLAUDE.md: streaming docs, eval CLI commands, remove Agent SDK refs - Add GH Actions eval upload (P2) and web dashboard (P3) to TODOS.md Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+4
-2
@@ -17,7 +17,10 @@
|
||||
"test:e2e": "EVALS=1 bun test test/skill-e2e.test.ts",
|
||||
"skill:check": "bun run scripts/skill-check.ts",
|
||||
"dev:skill": "bun run scripts/dev-skill.ts",
|
||||
"start": "bun run browse/src/server.ts"
|
||||
"start": "bun run browse/src/server.ts",
|
||||
"eval:list": "bun run scripts/eval-list.ts",
|
||||
"eval:compare": "bun run scripts/eval-compare.ts",
|
||||
"eval:summary": "bun run scripts/eval-summary.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"playwright": "^1.58.2",
|
||||
@@ -37,7 +40,6 @@
|
||||
"devtools"
|
||||
],
|
||||
"devDependencies": {
|
||||
"@anthropic-ai/claude-agent-sdk": "^0.2.75",
|
||||
"@anthropic-ai/sdk": "^0.78.0"
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user