feat: diff-based test selection for E2E and LLM-judge evals

Each test declares file dependencies in a TOUCHFILES map. The test runner
checks git diff against the base branch and only runs tests whose
dependencies were modified. Global touchfiles (session-runner, eval-store,
gen-skill-docs) trigger all tests.

New scripts: test:e2e:all, test:evals:all, eval:select

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-03-17 11:28:03 -07:00
parent c99757b522
commit ad41c5d853
7 changed files with 650 additions and 49 deletions
+4 -1
View File
@@ -14,14 +14,17 @@
"server": "bun run browse/src/server.ts",
"test": "bun test browse/test/ test/ --ignore test/skill-e2e.test.ts --ignore test/skill-llm-eval.test.ts",
"test:evals": "EVALS=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts",
"test:evals:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts",
"test:e2e": "EVALS=1 bun test test/skill-e2e.test.ts",
"test:e2e:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-e2e.test.ts",
"skill:check": "bun run scripts/skill-check.ts",
"dev:skill": "bun run scripts/dev-skill.ts",
"start": "bun run browse/src/server.ts",
"eval:list": "bun run scripts/eval-list.ts",
"eval:compare": "bun run scripts/eval-compare.ts",
"eval:summary": "bun run scripts/eval-summary.ts",
"eval:watch": "bun run scripts/eval-watch.ts"
"eval:watch": "bun run scripts/eval-watch.ts",
"eval:select": "bun run scripts/eval-select.ts"
},
"dependencies": {
"playwright": "^1.58.2",