From bbe88b3f71c8a796f0e1b0a08ba773665feff756 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Tue, 17 Mar 2026 11:01:38 -0700 Subject: [PATCH] fix: raise test gen cap to 20, add validation tests for user flow coverage - Raise Step 3.4 test generation cap from 10 to 20 (code + user flow combined) - Add 3 validation tests: codepath tracing, user flow mapping, diagram sections --- ship/SKILL.md | 2 +- ship/SKILL.md.tmpl | 2 +- test/skill-validation.test.ts | 24 ++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/ship/SKILL.md b/ship/SKILL.md index 9c48b942..8a3e8e2c 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -557,7 +557,7 @@ If test framework detected (or bootstrapped in Step 2.5): - Run each test. Passes → commit as `test: coverage for {feature}` - Fails → fix once. Still fails → revert, note gap in diagram. -Caps: 30 code paths max, 10 tests generated max, 2-min per-test exploration cap. +Caps: 30 code paths max, 20 tests generated max (code + user flow combined), 2-min per-test exploration cap. If no test framework AND user declined bootstrap → diagram only, no generation. Note: "Test generation skipped — no test framework configured." diff --git a/ship/SKILL.md.tmpl b/ship/SKILL.md.tmpl index cbf487ce..2c9707c1 100644 --- a/ship/SKILL.md.tmpl +++ b/ship/SKILL.md.tmpl @@ -288,7 +288,7 @@ If test framework detected (or bootstrapped in Step 2.5): - Run each test. Passes → commit as `test: coverage for {feature}` - Fails → fix once. Still fails → revert, note gap in diagram. -Caps: 30 code paths max, 10 tests generated max, 2-min per-test exploration cap. +Caps: 30 code paths max, 20 tests generated max (code + user flow combined), 2-min per-test exploration cap. If no test framework AND user declined bootstrap → diagram only, no generation. Note: "Test generation skipped — no test framework configured." diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts index 62ec3fdc..54e03a4d 100644 --- a/test/skill-validation.test.ts +++ b/test/skill-validation.test.ts @@ -891,6 +891,30 @@ describe('Step 3.4 test coverage audit', () => { const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); expect(content).toContain('vibe coding becomes yolo coding'); }); + + test('Step 3.4 traces actual codepaths, not just syntax', () => { + const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); + expect(content).toContain('Trace every codepath'); + expect(content).toContain('Trace data flow'); + expect(content).toContain('Diagram the execution'); + }); + + test('Step 3.4 maps user flows and interaction edge cases', () => { + const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); + expect(content).toContain('Map user flows'); + expect(content).toContain('Interaction edge cases'); + expect(content).toContain('Double-click'); + expect(content).toContain('Navigate away'); + expect(content).toContain('Error states the user can see'); + expect(content).toContain('Empty/zero/boundary states'); + }); + + test('Step 3.4 diagram includes USER FLOW COVERAGE section', () => { + const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); + expect(content).toContain('USER FLOW COVERAGE'); + expect(content).toContain('Code paths:'); + expect(content).toContain('User flows:'); + }); }); // --- Retro test health validation ---