From 6b99df9df7127de38ec676f8cf8cec8a359837f4 Mon Sep 17 00:00:00 2001
From: Garry Tan <garrytan@gmail.com>
Date: Thu, 23 Apr 2026 16:41:36 -0700
Subject: [PATCH] feat(preamble): upgrade AskUserQuestion format to Pros/Cons
 decision brief
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Part 4 of 4 (plan: ~/.claude/plans/system-instruction-you-are-working-polymorphic-twilight.md).

Every AskUserQuestion now renders as a decision brief, not a bullet list:
D-numbered header, ELI10, Stakes-if-we-pick-wrong, Recommendation, Pros/Cons
with ✅/❌ markers per option, closing Net: tradeoff synthesis.

scripts/resolvers/preamble/generate-ask-user-format.ts
- Full rewrite. Preserves prior rules (Re-ground, ELI10, Recommend,
  Completeness, Options) and adds:
  - D-numbering per skill invocation (model-level, not runtime state)
  - Stakes line (pain avoided / capability unlocked / consequence named)
  - Pros/Cons block with min 2 ✅ + 1 ❌ per option, min 40 chars/bullet
  - Hard-stop escape: "✅ No cons — this is a hard-stop choice" for
    genuine one-sided choices (destructive-action confirmations)
  - Neutral-posture handling (CT1-compliant): (recommended) label
    STAYS on default option to preserve AUTO_DECIDE contract; neutrality
    expressed as prose in Recommendation line only
  - Net line closes the decision with a one-sentence tradeoff frame
  - Rule 11: tool_use mandate (prose "Question:" blocks don't count)
  - Self-check list before emitting

test/skill-validation.test.ts
- Update format assertions to check for new Pros/Cons tokens
  (Pros / cons:, Recommendation: <choice>, Net:, ELI10, Stakes if we
  pick wrong:, ✅, ❌) across all tier-2+ skills
- Old "RECOMMENDATION: Choose" expectation removed (the new format uses
  mixed-case "Recommendation:" with no literal "Choose")

test/skill-e2e-plan-format.test.ts
- Add v1.7.0.0 format token regexes (PROS_CONS_HEADER_RE, PRO_BULLET_RE,
  CON_BULLET_RE, NET_LINE_RE, D_NUMBER_RE, STAKES_RE)
- Existing RECOMMENDATION_RE loosened to accept mixed-case "Recommendation:"
  (canonical v1.7.0.0 form) alongside all-caps (legacy). Tests are
  additive — the strict new-format gate is the upcoming cadence eval.

Regenerated 30 SKILL.md files via bun run gen:skill-docs.

Verified:
- bun test: 319 pass (1 pre-existing security-bench fixture oversize
  failure on main, unrelated — confirmed via git stash test on main HEAD)
- New format tokens render in all tier-2+ skills (plan-ceo-review,
  plan-eng-review, ship, office-hours verified)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 autoplan/SKILL.md                             | 131 +++++++++++++++--
 canary/SKILL.md                               | 131 +++++++++++++++--
 codex/SKILL.md                                | 131 +++++++++++++++--
 context-restore/SKILL.md                      | 131 +++++++++++++++--
 context-save/SKILL.md                         | 131 +++++++++++++++--
 cso/SKILL.md                                  | 131 +++++++++++++++--
 design-consultation/SKILL.md                  | 131 +++++++++++++++--
 design-html/SKILL.md                          | 131 +++++++++++++++--
 design-review/SKILL.md                        | 131 +++++++++++++++--
 design-shotgun/SKILL.md                       | 131 +++++++++++++++--
 devex-review/SKILL.md                         | 131 +++++++++++++++--
 document-release/SKILL.md                     | 131 +++++++++++++++--
 health/SKILL.md                               | 131 +++++++++++++++--
 investigate/SKILL.md                          | 131 +++++++++++++++--
 land-and-deploy/SKILL.md                      | 131 +++++++++++++++--
 learn/SKILL.md                                | 131 +++++++++++++++--
 office-hours/SKILL.md                         | 131 +++++++++++++++--
 open-gstack-browser/SKILL.md                  | 131 +++++++++++++++--
 pair-agent/SKILL.md                           | 131 +++++++++++++++--
 plan-ceo-review/SKILL.md                      | 131 +++++++++++++++--
 plan-design-review/SKILL.md                   | 131 +++++++++++++++--
 plan-devex-review/SKILL.md                    | 131 +++++++++++++++--
 plan-eng-review/SKILL.md                      | 131 +++++++++++++++--
 plan-tune/SKILL.md                            | 131 +++++++++++++++--
 qa-only/SKILL.md                              | 131 +++++++++++++++--
 qa/SKILL.md                                   | 131 +++++++++++++++--
 retro/SKILL.md                                | 131 +++++++++++++++--
 review/SKILL.md                               | 131 +++++++++++++++--
 .../preamble/generate-ask-user-format.ts      | 132 ++++++++++++++++--
 setup-deploy/SKILL.md                         | 131 +++++++++++++++--
 ship/SKILL.md                                 | 131 +++++++++++++++--
 test/skill-e2e-plan-format.test.ts            |  17 ++-
 test/skill-validation.test.ts                 |  15 +-
 33 files changed, 3842 insertions(+), 252 deletions(-)
diff --git a/autoplan/SKILL.md b/autoplan/SKILL.md
index f8fe68c8..64ce55b8 100644
--- a/autoplan/SKILL.md
+++ b/autoplan/SKILL.md
@@ -360,17 +360,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/canary/SKILL.md b/canary/SKILL.md
index 9772ac1d..5aee0a95 100644
--- a/canary/SKILL.md
+++ b/canary/SKILL.md
@@ -352,17 +352,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/codex/SKILL.md b/codex/SKILL.md
index 73e13b20..d9be0205 100644
--- a/codex/SKILL.md
+++ b/codex/SKILL.md
@@ -354,17 +354,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/context-restore/SKILL.md b/context-restore/SKILL.md
index bf064120..f5857992 100644
--- a/context-restore/SKILL.md
+++ b/context-restore/SKILL.md
@@ -356,17 +356,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/context-save/SKILL.md b/context-save/SKILL.md
index f5857d29..911ffba1 100644
--- a/context-save/SKILL.md
+++ b/context-save/SKILL.md
@@ -356,17 +356,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/cso/SKILL.md b/cso/SKILL.md
index 36f2424a..65e77cc1 100644
--- a/cso/SKILL.md
+++ b/cso/SKILL.md
@@ -357,17 +357,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/design-consultation/SKILL.md b/design-consultation/SKILL.md
index a24b0d75..fb1edb4b 100644
--- a/design-consultation/SKILL.md
+++ b/design-consultation/SKILL.md
@@ -357,17 +357,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/design-html/SKILL.md b/design-html/SKILL.md
index db74d537..fe9d733f 100644
--- a/design-html/SKILL.md
+++ b/design-html/SKILL.md
@@ -359,17 +359,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/design-review/SKILL.md b/design-review/SKILL.md
index dbbfd3e0..25f89ae6 100644
--- a/design-review/SKILL.md
+++ b/design-review/SKILL.md
@@ -357,17 +357,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/design-shotgun/SKILL.md b/design-shotgun/SKILL.md
index 6c83f505..3a82c947 100644
--- a/design-shotgun/SKILL.md
+++ b/design-shotgun/SKILL.md
@@ -354,17 +354,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/devex-review/SKILL.md b/devex-review/SKILL.md
index 0ffebe9a..5c127e1d 100644
--- a/devex-review/SKILL.md
+++ b/devex-review/SKILL.md
@@ -357,17 +357,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/document-release/SKILL.md b/document-release/SKILL.md
index 79512453..0ec61add 100644
--- a/document-release/SKILL.md
+++ b/document-release/SKILL.md
@@ -354,17 +354,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/health/SKILL.md b/health/SKILL.md
index 777ece54..346f3804 100644
--- a/health/SKILL.md
+++ b/health/SKILL.md
@@ -354,17 +354,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/investigate/SKILL.md b/investigate/SKILL.md
index 40aa1258..faee2d17 100644
--- a/investigate/SKILL.md
+++ b/investigate/SKILL.md
@@ -371,17 +371,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/land-and-deploy/SKILL.md b/land-and-deploy/SKILL.md
index 4c92d221..a5a5ccdb 100644
--- a/land-and-deploy/SKILL.md
+++ b/land-and-deploy/SKILL.md
@@ -351,17 +351,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/learn/SKILL.md b/learn/SKILL.md
index 8fa8d854..64fdcaf5 100644
--- a/learn/SKILL.md
+++ b/learn/SKILL.md
@@ -354,17 +354,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/office-hours/SKILL.md b/office-hours/SKILL.md
index fa020ea1..ddf4e6c5 100644
--- a/office-hours/SKILL.md
+++ b/office-hours/SKILL.md
@@ -362,17 +362,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/open-gstack-browser/SKILL.md b/open-gstack-browser/SKILL.md
index acde2c80..35400dcc 100644
--- a/open-gstack-browser/SKILL.md
+++ b/open-gstack-browser/SKILL.md
@@ -351,17 +351,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/pair-agent/SKILL.md b/pair-agent/SKILL.md
index bbffa9f0..bd1e2d00 100644
--- a/pair-agent/SKILL.md
+++ b/pair-agent/SKILL.md
@@ -352,17 +352,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md
index e9239452..faab9932 100644
--- a/plan-ceo-review/SKILL.md
+++ b/plan-ceo-review/SKILL.md
@@ -358,17 +358,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/plan-design-review/SKILL.md b/plan-design-review/SKILL.md
index d464b20a..4925123c 100644
--- a/plan-design-review/SKILL.md
+++ b/plan-design-review/SKILL.md
@@ -355,17 +355,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/plan-devex-review/SKILL.md b/plan-devex-review/SKILL.md
index bf62fed1..7d71c55b 100644
--- a/plan-devex-review/SKILL.md
+++ b/plan-devex-review/SKILL.md
@@ -359,17 +359,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md
index 387564b5..b3bebe2e 100644
--- a/plan-eng-review/SKILL.md
+++ b/plan-eng-review/SKILL.md
@@ -357,17 +357,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/plan-tune/SKILL.md b/plan-tune/SKILL.md
index 907284a3..2f709ec8 100644
--- a/plan-tune/SKILL.md
+++ b/plan-tune/SKILL.md
@@ -365,17 +365,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/qa-only/SKILL.md b/qa-only/SKILL.md
index 01b0d87f..d5715712 100644
--- a/qa-only/SKILL.md
+++ b/qa-only/SKILL.md
@@ -353,17 +353,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/qa/SKILL.md b/qa/SKILL.md
index a7a7fd4b..a465d8e8 100644
--- a/qa/SKILL.md
+++ b/qa/SKILL.md
@@ -359,17 +359,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/retro/SKILL.md b/retro/SKILL.md
index 395b6833..a809018d 100644
--- a/retro/SKILL.md
+++ b/retro/SKILL.md
@@ -352,17 +352,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/review/SKILL.md b/review/SKILL.md
index 9e1fa083..91456e31 100644
--- a/review/SKILL.md
+++ b/review/SKILL.md
@@ -356,17 +356,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/scripts/resolvers/preamble/generate-ask-user-format.ts b/scripts/resolvers/preamble/generate-ask-user-format.ts
index 58ec324d..e06e7022 100644
--- a/scripts/resolvers/preamble/generate-ask-user-format.ts
+++ b/scripts/resolvers/preamble/generate-ask-user-format.ts
@@ -3,16 +3,130 @@ import type { TemplateContext } from '../types';
 export function generateAskUserFormat(_ctx: TemplateContext): string {
   return `## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the \`_BRANCH\` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with \`RECOMMENDATION: Choose [X] because [one-line reason]\` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with \`Completeness: N/10\` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip \`Completeness: N/10\` entirely and write one line: \`Note: options differ in kind, not coverage — no completeness score.\` Do not fabricate filler scores.
-5. **Options:** Lettered options: \`A) ... B) ... C) ...\` — when an option involves effort, show both scales: \`(human: ~X / CC: ~Y)\`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.`;
+\`\`\`
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+\`\`\`
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is \`D1\`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., \`/plan-ceo-review\` running \`/office-hours\` inline) starts its own
+   D1; label as \`D1 (office-hours)\` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   \`_BRANCH\` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** \`Recommendation: <choice> because <one-line
+   reason>\` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The \`(recommended)\` label on the
+   option is REQUIRED — \`scripts/resolvers/question-tuning.ts\` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each \`Completeness: N/10\` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   \`Note: options differ in kind, not coverage — no completeness score.\`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** \`✅ Simple\` is not a pro. \`✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser\` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet \`✅ No cons — this is a
+     hard-stop choice\` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: \`Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way\`. The \`(recommended)\` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   \`— this is a taste call\` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: \`(human: ~2 days / CC: ~15 min)\`.
+
+11. **Tool_use, not prose.** A markdown block labeled \`Question:\` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the \`options\` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.`;
 }
-
diff --git a/setup-deploy/SKILL.md b/setup-deploy/SKILL.md
index 81554e6d..52671f72 100644
--- a/setup-deploy/SKILL.md
+++ b/setup-deploy/SKILL.md
@@ -355,17 +355,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/ship/SKILL.md b/ship/SKILL.md
index ab9c1430..a8f5e5a4 100644
--- a/ship/SKILL.md
+++ b/ship/SKILL.md
@@ -357,17 +357,132 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
 
 ## AskUserQuestion Format
 
-**ALWAYS follow this structure for every AskUserQuestion call. All four elements are non-skippable. If you find yourself about to skip any of them, stop and back up.**
+**ALWAYS follow this structure for every AskUserQuestion call. Every element is non-skippable. If you find yourself about to skip any of them, stop and back up.**
 
-1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
-2. **Simplify (ELI10, ALWAYS):** Explain what's happening in plain English a smart 16-year-old could follow. Concrete examples and analogies, not function names or internal jargon. Say what it DOES, not what it's called. State the stakes: what breaks if we pick wrong. This is NOT optional verbosity and it is NOT preamble — the user is about to make a decision and needs context. Even if you'd normally stay terse, emit the ELI10 paragraph. The user will ask for it anyway; do it the first time.
-3. **Recommend (ALWAYS):** Every question ends with `RECOMMENDATION: Choose [X] because [one-line reason]` on its own line. Never omit it. Never collapse it into the options list. Required for every AskUserQuestion, regardless of whether the options are coverage-differentiated or different-in-kind.
-4. **Score completeness (when meaningful):** When options differ in coverage (e.g. full test coverage vs happy path vs shortcut, complete error handling vs partial), score each with `Completeness: N/10` on its own line. Calibration: 10 = complete (all edge cases, full coverage), 7 = happy path only, 3 = shortcut. Flag any option ≤5 where a higher-completeness option exists. When options differ in kind (picking a review posture, picking an architectural approach, cherry-pick Add/Defer/Skip, choosing between two different kinds of systems), the completeness axis doesn't apply — skip `Completeness: N/10` entirely and write one line: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate filler scores.
-5. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)`
+### Required shape
 
-Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+Every AskUserQuestion reads like a decision brief, not a bullet list:
 
-Per-skill instructions may add additional formatting rules on top of this baseline.
+```
+D<N> — <one-line question title>
+
+ELI10: <plain English a 16-year-old could follow, 2-4 sentences, name the stakes>
+
+Stakes if we pick wrong: <one sentence on what breaks, what user sees, what's lost>
+
+Recommendation: <choice> because <one-line reason>
+
+Completeness: A=X/10, B=Y/10   (or: Note: options differ in kind, not coverage — no completeness score)
+
+Pros / cons:
+
+A) <option label> (recommended)
+  ✅ <pro — concrete, observable, ≥40 chars>
+  ✅ <pro>
+  ❌ <con — honest, ≥40 chars>
+
+B) <option label>
+  ✅ <pro>
+  ❌ <con>
+
+Net: <one-line synthesis of what you're actually trading off>
+```
+
+### Element rules
+
+1. **D-numbering.** First question in a skill invocation is `D1`. Increment per
+   question within the same skill. This is a model-level instruction, not a
+   runtime counter — you count your own questions. Nested skill invocation
+   (e.g., `/plan-ceo-review` running `/office-hours` inline) starts its own
+   D1; label as `D1 (office-hours)` to disambiguate when the user will see
+   both. Drift is expected over long sessions; minor inconsistency is fine.
+
+2. **Re-ground.** Before ELI10, state the project, current branch (use the
+   `_BRANCH` value from the preamble, NOT conversation history or gitStatus),
+   and the current plan/task. 1-2 sentences. Assume the user hasn't looked at
+   this window in 20 minutes.
+
+3. **ELI10 (ALWAYS).** Explain in plain English a smart 16-year-old could
+   follow. Concrete examples and analogies, not function names. Say what it
+   DOES, not what it's called. This is not preamble — the user is about to
+   make a decision and needs context. Even in terse mode, emit the ELI10.
+
+4. **Stakes if we pick wrong (ALWAYS).** One sentence naming what breaks in
+   concrete terms (pain avoided / capability unlocked / consequence named).
+   "Users see a 3-second spinner" beats "performance may degrade." Forces
+   the trade-off to be real.
+
+5. **Recommendation (ALWAYS).** `Recommendation: <choice> because <one-line
+   reason>` on its own line. Never omit it. Required for every AskUserQuestion,
+   even when neutral-posture (see rule 8). The `(recommended)` label on the
+   option is REQUIRED — `scripts/resolvers/question-tuning.ts` reads it to
+   power the AUTO_DECIDE path. Omitting it breaks auto-decide.
+
+6. **Completeness scoring (when meaningful).** When options differ in
+   coverage (full test coverage vs happy path vs shortcut, complete error
+   handling vs partial), score each `Completeness: N/10` on its own line.
+   Calibration: 10 = complete, 7 = happy path only, 3 = shortcut. Flag any
+   option ≤5 where a higher-completeness option exists. When options differ
+   in kind (review posture, architectural A-vs-B, cherry-pick Add/Defer/Skip,
+   two different kinds of systems), SKIP the score and write one line:
+   `Note: options differ in kind, not coverage — no completeness score.`
+   Do NOT fabricate filler scores — empty 10/10 on every option is worse
+   than no score.
+
+7. **Pros / cons block.** Every option gets per-bullet ✅ (pro) and ❌ (con)
+   markers. Rules:
+   - **Minimum 2 pros and 1 con per option.** If you can't name a con for
+     the recommended option, the recommendation is hollow — go find one. If
+     you can't name a pro for the rejected option, the question isn't real.
+   - **Minimum 40 characters per bullet.** `✅ Simple` is not a pro. `✅
+     Reuses the YAML frontmatter format already in MEMORY.md, zero new
+     parser` is a pro. Concrete, observable, specific.
+   - **Hard-stop escape** for genuinely one-sided choices (destructive-action
+     confirmation, one-way doors): a single bullet `✅ No cons — this is a
+     hard-stop choice` satisfies the rule. Use sparingly; overuse flips a
+     decision brief into theater.
+
+8. **Net line (ALWAYS).** Closes the decision with a one-sentence synthesis
+   of what the user is actually trading off. From the reference screenshot:
+   *"The new-format case is speculative. The copy-format case is immediate
+   leverage. Copy now, evolve later if a real pattern emerges."* Not a
+   summary — a verdict frame.
+
+9. **Neutral-posture handling.** When the skill explicitly says "neutral
+   recommendation posture" (SELECTIVE EXPANSION cherry-picks, taste calls,
+   kind-differentiated choices where neither side dominates), the
+   Recommendation line reads: `Recommendation: <default-choice> — this is a
+   taste call, no strong preference either way`. The `(recommended)` label
+   STAYS on the default option (machine-readable hint for AUTO_DECIDE). The
+   `— this is a taste call` prose is the human-readable neutrality signal.
+   Both coexist.
+
+10. **Effort both-scales.** When an option involves effort, show both human
+    and CC scales: `(human: ~2 days / CC: ~15 min)`.
+
+11. **Tool_use, not prose.** A markdown block labeled `Question:` is not a
+    question — the user never sees it as interactive. If you wrote one in
+    prose, stop and reissue as an actual AskUserQuestion tool_use. The rich
+    markdown goes in the question body; the `options` array stays short
+    labels (A, B, C).
+
+### Self-check before emitting
+
+Before calling AskUserQuestion, verify:
+- [ ] D<N> header present
+- [ ] ELI10 paragraph present (stakes line too)
+- [ ] Recommendation line present with concrete reason
+- [ ] Completeness scored (coverage) OR kind-note present (kind)
+- [ ] Every option has ≥2 ✅ and ≥1 ❌, each ≥40 chars (or hard-stop escape)
+- [ ] (recommended) label on one option (even for neutral-posture — see rule 9)
+- [ ] Net line closes the decision
+- [ ] You are calling the tool, not writing prose
+
+If you'd need to read the source to understand your own explanation, it's
+too complex — simplify before emitting.
+
+Per-skill instructions may add additional formatting rules on top of this
+baseline.
 
 ## Model-Specific Behavioral Patch (claude)
 
diff --git a/test/skill-e2e-plan-format.test.ts b/test/skill-e2e-plan-format.test.ts
index da1a1102..0532ca24 100644
--- a/test/skill-e2e-plan-format.test.ts
+++ b/test/skill-e2e-plan-format.test.ts
@@ -35,10 +35,25 @@ const evalCollector = createEvalCollector('e2e-plan-format');
 // Regex predicates applied to captured AskUserQuestion content.
 // RECOMMENDATION regex is lenient on intervening markdown markers (e.g.
 // agent writes `**RECOMMENDATION:** Choose` — the `**` closers are benign).
-const RECOMMENDATION_RE = /RECOMMENDATION:[*\s]*Choose/;
+// Post v1.7.0.0: "Recommendation:" (mixed-case) is the canonical form per
+// the Pros/Cons format; accept both cases for backward compatibility.
+const RECOMMENDATION_RE = /[Rr]ecommendation:[*\s]*Choose/;
 const COMPLETENESS_RE = /Completeness:\s*\d{1,2}\/10/;
 const KIND_NOTE_RE = /options differ in kind/i;
 
+// v1.7.0.0 Pros/Cons format tokens. Tests are additive: existing
+// RECOMMENDATION / Completeness / kind-note assertions still hold; new
+// format tokens are asserted ONLY when the capture is from a v1.7+
+// skill rendering. Presence is optional for backward compatibility during
+// rollout; the periodic-tier cadence+format eval (see skill-e2e-plan-cadence)
+// is the strict gate for the new format.
+const PROS_CONS_HEADER_RE = /Pros\s*\/\s*cons:/i;
+const PRO_BULLET_RE = /^\s*✅\s+\S/m;
+const CON_BULLET_RE = /^\s*❌\s+\S/m;
+const NET_LINE_RE = /^Net:\s+\S/m;
+const D_NUMBER_RE = /^D\d+\s+—/m;
+const STAKES_RE = /Stakes if we pick wrong:/i;
+
 const SAMPLE_PLAN = `# Plan: Add User Dashboard
 
 ## Context
diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts
index ecbd81e5..625bc0a1 100644
--- a/test/skill-validation.test.ts
+++ b/test/skill-validation.test.ts
@@ -566,10 +566,21 @@ describe('v0.4.1 preamble features', () => {
   const skillsWithPreamble = [...tier1Skills, ...tier2PlusSkills];
 
   for (const skill of tier2PlusSkills) {
-    test(`${skill} contains RECOMMENDATION format`, () => {
+    test(`${skill} contains AskUserQuestion Pros/Cons format`, () => {
       const content = fs.readFileSync(path.join(ROOT, skill), 'utf-8');
-      expect(content).toContain('RECOMMENDATION: Choose');
+      // v1.7.0.0 Pros/Cons format tokens. The preamble resolver
+      // (generate-ask-user-format.ts) injects all of these into every
+      // tier-2+ skill. Drop any of them and the test catches it on the
+      // next `bun test` run.
       expect(content).toContain('AskUserQuestion');
+      expect(content).toContain('Pros / cons:');
+      expect(content).toContain('Recommendation: <choice>');
+      expect(content).toContain('Net:');
+      expect(content).toContain('ELI10');
+      expect(content).toContain('Stakes if we pick wrong:');
+      // Concrete format markers must be documented in the resolver text
+      expect(content).toMatch(/✅/);
+      expect(content).toMatch(/❌/);
     });
   }