From 7658179879945f3b12c242a0f15405c243f57eb4 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 1 May 2026 18:16:11 -0700 Subject: [PATCH] test(judge): pin every hedging-regex alternate with a fixture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coverage audit flagged 5 unpinned alternates in the choice-portion hedging regex (depends? on, depending, if .+ then, or maybe, whichever). Only "either" was previously exercised, leaving 5 deterministic regex branches with no fixture — a typo in any alternate would have shipped silently. Add one fixture per hedge form. Mix of has-because (LLM call) and no-because (deterministic-only) cases keeps total Haiku cost at ~$0.015 extra per fixture run while taking branch coverage from 9/14 → 14/14. Fixture passes 30/30 expect() calls in 20.7s. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/llm-judge-recommendation.test.ts | 28 ++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/test/llm-judge-recommendation.test.ts b/test/llm-judge-recommendation.test.ts index bc4ac84c..2145088a 100644 --- a/test/llm-judge-recommendation.test.ts +++ b/test/llm-judge-recommendation.test.ts @@ -104,14 +104,24 @@ Net: ...`); expect(noRec.has_because).toBe(false); expect(noRec.reason_substance).toBe(1); - // HEDGING: "either A or B" — fails commits. - const hedging = await judgeRecommendation(buildAUQ( - 'Recommendation: Choose either B or C because both ship faster than A.', - )); - expect(hedging.present).toBe(true); - expect( - hedging.commits, - `expected commits=false for "either B or C"; got ${hedging.commits}: ${hedging.reasoning}`, - ).toBe(false); + // HEDGING: each alternate in the hedging regex is exercised separately. + // Each is deterministic — `commits` short-circuits the LLM call when the + // choice portion contains hedge vocabulary, so these are free at API cost. + const hedgeForms = [ + ['either B or C', 'Recommendation: Choose either B or C because both ship faster than A.'], + ['depends on traffic', 'Recommendation: A depends on traffic — pick B if read-heavy.'], + ['depending on the team', 'Recommendation: depending on the team, A or B is fine.'], + ['if X then Y', 'Recommendation: if low-traffic then A, otherwise B because both work.'], + ['or maybe', 'Recommendation: A or maybe B because both ship in V1.'], + ['whichever fits', 'Recommendation: whichever fits the team — A or B both work.'], + ]; + for (const [label, text] of hedgeForms) { + const score = await judgeRecommendation(buildAUQ(text)); + expect(score.present, `[hedge:${label}] present should be true`).toBe(true); + expect( + score.commits, + `[hedge:${label}] expected commits=false; got ${score.commits}. text="${text}"`, + ).toBe(false); + } }, 240_000); });