test: E2E + LLM-judge evals for deploy skills

- 4 E2E tests: land-and-deploy (Fly.io detection + deploy report),
  canary (monitoring report structure), benchmark (perf report schema),
  setup-deploy (platform detection → CLAUDE.md config)
- 4 LLM-judge evals: workflow quality for all 4 new skills
- Touchfile entries for diff-based test selection (E2E + LLM-judge)
- 460 free tests pass, 0 fail

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-03-20 07:16:45 -07:00
parent 198cd2dcad
commit 0d1d2e970b
3 changed files with 344 additions and 1 deletions
+12
View File
@@ -98,6 +98,12 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
// gstack-upgrade
'gstack-upgrade-happy-path': ['gstack-upgrade/**'],
// Deploy skills
'land-and-deploy-workflow': ['land-and-deploy/**', 'scripts/gen-skill-docs.ts'],
'canary-workflow': ['canary/**', 'browse/src/**'],
'benchmark-workflow': ['benchmark/**', 'browse/src/**'],
'setup-deploy-workflow': ['setup-deploy/**', 'scripts/gen-skill-docs.ts'],
// Skill routing — journey-stage tests (depend on ALL skill descriptions)
'journey-ideation': ['*/SKILL.md.tmpl', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
'journey-plan-eng': ['*/SKILL.md.tmpl', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
@@ -140,6 +146,12 @@ export const LLM_JUDGE_TOUCHFILES: Record<string, string[]> = {
'design-review/SKILL.md fix loop': ['design-review/SKILL.md', 'design-review/SKILL.md.tmpl'],
'design-consultation/SKILL.md research': ['design-consultation/SKILL.md', 'design-consultation/SKILL.md.tmpl'],
// Deploy skills
'land-and-deploy/SKILL.md workflow': ['land-and-deploy/SKILL.md', 'land-and-deploy/SKILL.md.tmpl'],
'canary/SKILL.md monitoring loop': ['canary/SKILL.md', 'canary/SKILL.md.tmpl'],
'benchmark/SKILL.md perf collection': ['benchmark/SKILL.md', 'benchmark/SKILL.md.tmpl'],
'setup-deploy/SKILL.md platform setup': ['setup-deploy/SKILL.md', 'setup-deploy/SKILL.md.tmpl'],
// Other skills
'retro/SKILL.md instructions': ['retro/SKILL.md', 'retro/SKILL.md.tmpl'],
'qa-only/SKILL.md workflow': ['qa-only/SKILL.md', 'qa-only/SKILL.md.tmpl'],