diff --git a/test/helpers/llm-judge.ts b/test/helpers/llm-judge.ts
index 6ce4ca67..5647cb31 100644
--- a/test/helpers/llm-judge.ts
+++ b/test/helpers/llm-judge.ts
@@ -2,7 +2,9 @@
  * Shared LLM-as-judge helpers for eval and E2E tests.
  *
  * Provides callJudge (generic JSON-from-LLM), judge (doc quality scorer),
- * and outcomeJudge (planted-bug detection scorer).
+ * outcomeJudge (planted-bug detection scorer), judgePosture (mode-posture
+ * regression scorer), and judgeRecommendation (AskUserQuestion recommendation
+ * substance scorer).
  *
  * Requires: ANTHROPIC_API_KEY env var
  */
@@ -33,15 +35,32 @@ export interface PostureScore {
 
 export type PostureMode = 'expansion' | 'forcing' | 'builder';
 
+export interface RecommendationScore {
+  /** Deterministic: a "Recommendation:" / "RECOMMENDATION:" line is present. */
+  present: boolean;
+  /** Deterministic: the recommendation names exactly one option (no hedging). */
+  commits: boolean;
+  /** Deterministic: the literal token "because " follows the choice. */
+  has_because: boolean;
+  /** Haiku judge, 1-5: specificity of the because-clause. See rubric in judgeRecommendation. */
+  reason_substance: number;
+  /** Extracted because-clause text, for diagnostics in test output. */
+  reason_text: string;
+  /** Judge's brief explanation. Empty when judge was skipped (no because-clause). */
+  reasoning: string;
+}
+
 /**
- * Call claude-sonnet-4-6 with a prompt, extract JSON response.
- * Retries once on 429 rate limit errors.
+ * Call an Anthropic model with a prompt, extract JSON response.
+ * Retries once on 429 rate limit errors. Defaults to Sonnet 4.6 for
+ * existing callers; pass a model id (e.g. claude-haiku-4-5-20251001)
+ * for cheaper bounded judgments like judgeRecommendation.
  */
-export async function callJudge<T>(prompt: string): Promise<T> {
+export async function callJudge<T>(prompt: string, model: string = 'claude-sonnet-4-6'): Promise<T> {
   const client = new Anthropic();
 
   const makeRequest = () => client.messages.create({
-    model: 'claude-sonnet-4-6',
+    model,
     max_tokens: 1024,
     messages: [{ role: 'user', content: prompt }],
   });
@@ -190,3 +209,93 @@ Here is the output to evaluate:
 
 ${text}`);
 }
+
+/**
+ * Score the quality of an AskUserQuestion's recommendation line.
+ *
+ * Layered design:
+ * 1. Deterministic regex parse for present / commits / has_because. These
+ *    don't need an LLM.
+ * 2. Haiku 4.5 judges only the 1-5 reason_substance axis on a tight rubric
+ *    scoped to the because-clause itself (with the menu as context).
+ *
+ * Returns reason_substance = 1 with diagnostic reasoning when the because-clause
+ * is missing — no LLM call needed; substance is implicitly absent.
+ *
+ * Format spec: scripts/resolvers/preamble/generate-ask-user-format.ts
+ *   Recommendation: <choice> because <one-line reason>
+ */
+export async function judgeRecommendation(askUserText: string): Promise<RecommendationScore> {
+  // Deterministic checks. The format spec requires:
+  //   "Recommendation: <choice> because <reason>"
+  // Match case-insensitive on the leading word, allow optional markdown
+  // emphasis markers (** or __) the agent sometimes adds.
+  const recLine = askUserText.match(
+    /^[*_]*\s*recommendation\s*[*_]*\s*:\s*(.+)$/im,
+  );
+  const present = !!recLine;
+  const recBody = recLine?.[1]?.trim() ?? '';
+
+  // commits: reject obvious hedging language. The format-spec self-check
+  // requires the recommendation to name ONE choice; "either", "depending on",
+  // "if X then" all signal the model bailed on the commitment.
+  const commits = present && !/\b(either|depends? on|depending|if .+ then|or maybe|whichever)\b/i.test(recBody);
+
+  // has_because: literal "because" token in the body, per the format spec.
+  const becauseMatch = recBody.match(/\bbecause\s+(.+?)$/i);
+  const has_because = !!becauseMatch;
+  const reason_text = becauseMatch?.[1]?.trim() ?? '';
+
+  // If the because-clause is absent, the substance score is implicitly 1.
+  // Skip the LLM call — there is nothing to grade.
+  if (!present || !has_because || !reason_text) {
+    return {
+      present,
+      commits,
+      has_because,
+      reason_substance: 1,
+      reason_text,
+      reasoning: present
+        ? 'No "because <reason>" clause found in recommendation line — substance scored 1 by deterministic check.'
+        : 'No "Recommendation:" line found in captured text — substance scored 1 by deterministic check.',
+    };
+  }
+
+  // LLM judge: rate the because-clause specifically, 1-5.
+  // The full askUserText is included as context so the judge can tell whether
+  // the reason names a tradeoff specific to the chosen option vs an alternative,
+  // but the score is about the because-clause itself, not the surrounding menu.
+  const prompt = `You are scoring the quality of one specific line in an AskUserQuestion: the "Recommendation: <choice> because <reason>" line. Score the because-clause substance on a 1-5 scale.
+
+Rubric:
+- 5: Reason names a SPECIFIC TRADEOFF that distinguishes the chosen option from at least one alternative (e.g. "because hybrid ships V1 in gstack-only without blocking on cross-repo gbrain coordination", "because Postgres preserves ACID guarantees the workflow already depends on").
+- 4: Reason is concrete and option-specific but does NOT explicitly compare against an alternative (e.g. "because Redis gives sub-millisecond reads under load", "because the new schema removes the JOIN we were paying for").
+- 3: Reason is real but generic — could apply to many options ("because it's faster", "because it's simpler", "because it ships sooner").
+- 2: Reason restates the option label or is near-tautological ("because it's the hybrid one", "because that's the recommended approach").
+- 1: Reason is boilerplate / empty ("because it's better", "because it works", "because it's the right choice").
+
+You are scoring the because-clause itself, not the surrounding pros/cons or option labels. The menu is context only.
+
+Extracted because-clause:
+${reason_text}
+
+Full AskUserQuestion (context only — do NOT score this):
+${askUserText.slice(0, 4000)}
+
+Respond with ONLY valid JSON:
+{"reason_substance": N, "reasoning": "one sentence explanation citing the specific words that drove the score"}`;
+
+  const out = await callJudge<{ reason_substance: number; reasoning: string }>(
+    prompt,
+    'claude-haiku-4-5-20251001',
+  );
+
+  return {
+    present,
+    commits,
+    has_because,
+    reason_substance: out.reason_substance,
+    reason_text,
+    reasoning: out.reasoning,
+  };
+}