diff --git a/test/skill-e2e-office-hours-phase4.test.ts b/test/skill-e2e-office-hours-phase4.test.ts index 7f2481cc..adc7c25c 100644 --- a/test/skill-e2e-office-hours-phase4.test.ts +++ b/test/skill-e2e-office-hours-phase4.test.ts @@ -35,8 +35,12 @@ import * as os from 'os'; const evalCollector = createEvalCollector('e2e-office-hours-phase4'); -// Format predicates — same shape as skill-e2e-plan-format.test.ts. -const RECOMMENDATION_RE = /[Rr]ecommendation:[*\s]*Choose/; +// Format predicates. The strict `Recommendation:[*\s]*Choose` regex used by +// skill-e2e-plan-format pins down a specific template-example wording ("Choose +// [X]"). The format spec at scripts/resolvers/preamble/generate-ask-user-format.ts +// only requires `Recommendation: because ` — `` can +// be the bare option label. judgeRecommendation.present (deterministic) checks +// this canonical shape correctly; we don't need a redundant strict regex here. const BECAUSE_RE = /\bbecause\b/i; // At least 2 numbered/lettered options (A/B or 1/2). Office-hours Phase 4 says // "2-3 distinct alternatives," so 2+ is the minimum bar. @@ -123,8 +127,8 @@ After writing the file with that ONE Phase 4 question, stop. Do not continue to const captured = fs.readFileSync(outFile, 'utf-8'); expect(captured.length).toBeGreaterThan(100); - // Format-spec compliance. - expect(captured).toMatch(RECOMMENDATION_RE); + // Format-spec compliance. judgeRecommendation below covers the + // Recommendation: line itself; these regexes catch cheap structural shape. expect(captured).toMatch(BECAUSE_RE); expect(captured).toMatch(TWO_OPTIONS_RE); // Phase-4 specificity: prevents a stray earlier-phase AUQ from false-passing. diff --git a/test/skill-e2e-plan-format.test.ts b/test/skill-e2e-plan-format.test.ts index a2f65c85..4038afa3 100644 --- a/test/skill-e2e-plan-format.test.ts +++ b/test/skill-e2e-plan-format.test.ts @@ -34,11 +34,13 @@ import * as os from 'os'; const evalCollector = createEvalCollector('e2e-plan-format'); // Regex predicates applied to captured AskUserQuestion content. -// RECOMMENDATION regex is lenient on intervening markdown markers (e.g. -// agent writes `**RECOMMENDATION:** Choose` — the `**` closers are benign). -// Post v1.7.0.0: "Recommendation:" (mixed-case) is the canonical form per -// the Pros/Cons format; accept both cases for backward compatibility. -const RECOMMENDATION_RE = /[Rr]ecommendation:[*\s]*Choose/; +// Recommendation-line presence + substance is now graded by judgeRecommendation +// (deterministic regex for present/commits/has_because, Haiku for substance); +// the prior strict `[Rr]ecommendation:[*\s]*Choose` regex pinned down a +// template-example wording ("Choose [X]") that the format spec doesn't require +// — the canonical form per generate-ask-user-format.ts is just +// `Recommendation: because `, where is the bare +// option label. judgeRecommendation.present covers the canonical shape. const COMPLETENESS_RE = /Completeness:\s*\d{1,2}\/10/; const KIND_NOTE_RE = /options differ in kind/i; @@ -142,9 +144,8 @@ After writing the file, stop. Do not continue the review.`, const captured = fs.readFileSync(outFile, 'utf-8'); expect(captured.length).toBeGreaterThan(100); - // Kind-differentiated: RECOMMENDATION required, Completeness: N/10 must NOT appear, - // "options differ in kind" note must appear. - expect(captured).toMatch(RECOMMENDATION_RE); + // Kind-differentiated: Completeness: N/10 must NOT appear, "options differ + // in kind" note must appear. Recommendation presence is checked by the judge. expect(captured).not.toMatch(COMPLETENESS_RE); expect(captured).toMatch(KIND_NOTE_RE); @@ -212,8 +213,8 @@ After writing the file, stop. Do not continue the review.`, const captured = fs.readFileSync(outFile, 'utf-8'); expect(captured.length).toBeGreaterThan(100); - // Coverage-differentiated: both RECOMMENDATION and Completeness: N/10 required. - expect(captured).toMatch(RECOMMENDATION_RE); + // Coverage-differentiated: Completeness: N/10 required. Recommendation + // presence checked by the judge. expect(captured).toMatch(COMPLETENESS_RE); const recScore = await judgeRecommendation(captured); @@ -281,8 +282,8 @@ After writing the file with that ONE question, stop. Do not continue the review. const captured = fs.readFileSync(outFile, 'utf-8'); expect(captured.length).toBeGreaterThan(100); - // Coverage-differentiated: both RECOMMENDATION and Completeness: N/10 required. - expect(captured).toMatch(RECOMMENDATION_RE); + // Coverage-differentiated: Completeness: N/10 required. Recommendation + // presence checked by the judge. expect(captured).toMatch(COMPLETENESS_RE); const recScore = await judgeRecommendation(captured); @@ -347,9 +348,8 @@ After writing the file with that ONE question, stop. Do not continue the review. const captured = fs.readFileSync(outFile, 'utf-8'); expect(captured.length).toBeGreaterThan(100); - // Kind-differentiated: RECOMMENDATION required, Completeness: N/10 must NOT appear, - // "options differ in kind" note must appear. - expect(captured).toMatch(RECOMMENDATION_RE); + // Kind-differentiated: Completeness: N/10 must NOT appear, "options differ + // in kind" note must appear. Recommendation presence checked by the judge. expect(captured).not.toMatch(COMPLETENESS_RE); expect(captured).toMatch(KIND_NOTE_RE);