test: add validation + E2E tests for spec review, sketch, benefits-from

Unit tests (32 new assertions):
- SPEC_REVIEW_LOOP: 5 dimensions, Agent dispatch, 3 iterations, quality
  score, metrics path, convergence guard, graceful failure
- DESIGN_SKETCH: DESIGN.md awareness, wireframe, $B goto/screenshot,
  rough aesthetic, skip conditions
- BENEFITS_FROM: prerequisite offer in CEO + eng review, graceful
  decline, skills without benefits-from don't get offer
- office-hours structure: spec review loop, adversarial dimensions,
  visual sketch section

E2E tests (2 new):
- office-hours-spec-review: verifies agent understands the spec review
  loop from SKILL.md
- plan-ceo-review-benefits: verifies agent understands the skill
  chaining offer

Touchfiles updated for diff-based test selection.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-03-19 21:10:37 -07:00
parent ab82e21957
commit b8dcca0b5b
4 changed files with 291 additions and 0 deletions
+8
View File
@@ -57,9 +57,13 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
'review-base-branch': ['review/**'],
'review-design-lite': ['review/**', 'test/fixtures/review-eval-design-slop.*'],
// Office Hours
'office-hours-spec-review': ['office-hours/**', 'scripts/gen-skill-docs.ts'],
// Plan reviews
'plan-ceo-review': ['plan-ceo-review/**'],
'plan-ceo-review-selective': ['plan-ceo-review/**'],
'plan-ceo-review-benefits': ['plan-ceo-review/**', 'scripts/gen-skill-docs.ts'],
'plan-eng-review': ['plan-eng-review/**'],
'plan-eng-review-artifact': ['plan-eng-review/**'],
@@ -136,6 +140,10 @@ export const LLM_JUDGE_TOUCHFILES: Record<string, string[]> = {
'design-review/SKILL.md fix loop': ['design-review/SKILL.md', 'design-review/SKILL.md.tmpl'],
'design-consultation/SKILL.md research': ['design-consultation/SKILL.md', 'design-consultation/SKILL.md.tmpl'],
// Office Hours
'office-hours/SKILL.md spec review': ['office-hours/SKILL.md', 'office-hours/SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
'office-hours/SKILL.md design sketch': ['office-hours/SKILL.md', 'office-hours/SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
// Other skills
'retro/SKILL.md instructions': ['retro/SKILL.md', 'retro/SKILL.md.tmpl'],
'qa-only/SKILL.md workflow': ['qa-only/SKILL.md', 'qa-only/SKILL.md.tmpl'],