Merge remote-tracking branch 'origin/main' into garrytan/plan-flag-unresolved-issues

This commit is contained in:
Garry Tan
2026-06-07 22:49:09 -07:00
103 changed files with 5273 additions and 2281 deletions
+63 -8
View File
@@ -1,7 +1,25 @@
name: PR Title Sync
# WHY pull_request_target (not pull_request): the default GITHUB_TOKEN is
# READ-ONLY on fork PRs under `pull_request`, so the title-sync backstop could
# never `gh pr edit` a fork/agent PR. `pull_request_target` runs in the base-repo
# context with a write token, which fixes fork coverage.
#
# WHY this is SAFE (pull_request_target is the most dangerous trigger):
# - We check out the BASE repo (no `ref:`), so the only code we execute is
# trusted base-repo infra (bin/gstack-pr-title-rewrite.sh). We NEVER check
# out or run PR-head/fork code.
# - Every attacker-controlled PR field (title, head repo, head sha) arrives via
# `env:` and is referenced as a shell-quoted "$VAR". We NEVER inline a
# `${{ github.event.pull_request.* }}` expression inside the run: script
# (that would execute a crafted title as shell).
# - The PR-head VERSION is read as DATA via the API (raw media type), from the
# head repo at the head sha — never by checking out the head.
# test/pr-title-sync-workflow-safety.test.ts is the static tripwire for all of
# the above and fails CI if any of it regresses.
on:
pull_request:
pull_request_target:
types: [opened, synchronize, edited]
paths:
- 'VERSION'
@@ -19,25 +37,62 @@ jobs:
pull-requests: write
if: github.actor != 'github-actions[bot]'
steps:
- name: Checkout PR head
# Base repo only — trusted infra (the rewrite helper). No PR-head checkout.
- name: Checkout base repo (trusted)
uses: actions/checkout@v4
with:
fetch-depth: 1
ref: ${{ github.event.pull_request.head.sha }}
- name: Rewrite PR title to match VERSION
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUM: ${{ github.event.pull_request.number }}
# Attacker-controlled on fork PRs — env-only, never inlined into run:.
OLD_TITLE: ${{ github.event.pull_request.title }}
BASE_REPO: ${{ github.repository }}
HEAD_REPO: ${{ github.event.pull_request.head.repo.full_name }}
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
run: |
set -euo pipefail
chmod +x ./bin/gstack-pr-title-rewrite.sh
VERSION=$(cat VERSION | tr -d '[:space:]')
NEW_TITLE=$(./bin/gstack-pr-title-rewrite.sh "$VERSION" "$OLD_TITLE")
if [ "$NEW_TITLE" = "$OLD_TITLE" ]; then
echo "Title already correct; no change."
if [ "$HEAD_REPO" = "$BASE_REPO" ]; then IS_FORK=0; else IS_FORK=1; fi
# Read the PR-head VERSION as data (raw bytes), from the head repo at
# the head sha. Guard the assignment itself: under `set -e` a bare
# `VERSION=$(...)` would abort the step before any later [ -z ] check.
if ! VERSION=$(gh api -H "Accept: application/vnd.github.raw" \
"repos/$HEAD_REPO/contents/VERSION?ref=$HEAD_SHA" 2>/dev/null | tr -d '[:space:]'); then
VERSION=""
fi
if [ -z "$VERSION" ]; then
# Same-repo read failure should never happen — fail loudly so we
# notice. A fork miss (public-contents quirk, private fork) is a
# convenience gap, not a gate — warn and skip so the check stays green.
if [ "$IS_FORK" = "0" ]; then
echo "::error::Could not read VERSION from same-repo PR head ($HEAD_SHA)."
exit 1
fi
echo "::warning::Could not read VERSION from fork $HEAD_REPO ($HEAD_SHA); skipping title sync."
exit 0
fi
# The helper rejects a malformed VERSION (exit 2). Same policy: loud for
# same-repo, soft for forks. Never echo the raw (attacker-controlled)
# title — Actions still parses ::workflow-command:: from stdout.
if ! NEW_TITLE=$(./bin/gstack-pr-title-rewrite.sh "$VERSION" "$OLD_TITLE"); then
if [ "$IS_FORK" = "0" ]; then
echo "::error::Could not compute title for VERSION '$VERSION' on PR #$PR_NUM."
exit 1
fi
echo "::warning::Could not compute title for fork PR #$PR_NUM; skipping."
exit 0
fi
if [ "$NEW_TITLE" = "$OLD_TITLE" ]; then
echo "PR #$PR_NUM title already correct; no change."
exit 0
fi
echo "Rewriting: $OLD_TITLE -> $NEW_TITLE"
gh pr edit "$PR_NUM" --title "$NEW_TITLE"
echo "PR #$PR_NUM title synced to VERSION."
+179
View File
@@ -1,5 +1,184 @@
# Changelog
## [1.57.3.0] - 2026-06-07
## **Every PR `/ship` opens gets the version stamped into its title, fork and agent PRs included.**
## **The rule rides in the always-loaded part of the skill now, and a guard keeps it there.**
`/ship` stamps `vX.Y.Z.W` onto the title of every PR or MR it creates or updates, so
the version is the first thing you read in the PR list. That rule now lives in the
always-loaded core of the ship skill instead of an on-demand section, so the agent
applies it whether or not it opened the section that spells out the full procedure.
A CI workflow backs this up: it rewrites a title to match VERSION on every PR that
bumps the version, and it now reaches fork and agent PRs too, which a read-only token
could never touch before. Two free tests lock the behavior in so it cannot drift on
the next refactor.
### The numbers that matter
Reproduce with `bun test test/carve-section-ordering.test.ts test/pr-title-sync-workflow-safety.test.ts`
and `bun run eval:select`.
| Property | Before | After |
|---|---|---|
| Where the title rule loads | on-demand section only (since v1.54.0.0) | always-loaded skeleton + on-demand detail |
| Fork / agent PR title sync | none (read-only token under `pull_request`) | covered via hardened `pull_request_target` |
| Test proving the rule stays put | none | carve-guard registry asserts it on every PR |
| CI injection guard for the title workflow | none | static tripwire fails CI on unsafe patterns |
The title workflow now runs with a write token in the base-repo context but never
checks out or executes PR-head code, and every attacker-controlled field reaches the
script through `env:`, never inlined. A static test fails CI if either rule regresses.
### What this means for you
Ship a branch and the PR shows up titled `v1.57.3.0 fix: ...` without you touching it,
even when the PR came from a fork. The agent no longer needs to read the right section
at the right moment for the version to land in the title, and the next person who slims
the ship skill cannot quietly strand the rule again, because a free test on every PR
checks that it is still there.
### Itemized changes
#### Added
- Carve-guard coverage for the ship PR-title invariant: the registry now asserts the
`v$NEW_VERSION` rule and the title helper stay in the always-loaded skeleton, while
the full create and update procedure stays in the on-demand section.
- Static CI-safety test for the title-sync workflow that fails the build if it checks
out PR-head code or inlines an attacker-controlled PR field into a shell step.
#### Changed
- The PR/MR title-version rule is always-loaded in `/ship` again, so the version
prefix lands on every PR the workflow creates or updates.
- The PR title-sync CI workflow now covers fork and agent PRs through a hardened
`pull_request_target` trigger (base-repo checkout only, PR fields passed via `env:`,
VERSION read as data from the PR head).
#### Fixed
- A path token in the ship PR-body section that rendered literally instead of resolving
now uses the correct helper path, so the Linked Spec auto-detect step runs as written.
## [1.57.2.0] - 2026-06-08
## **When the question picker breaks mid-skill, gstack asks in plain text instead of stalling.**
## **Every skill detects a dead AskUserQuestion and falls back to a full decision brief you answer by typing a letter.**
AskUserQuestion is how every gstack skill asks you to decide. When the host's question
tool fails at runtime, which Conductor's MCP integration currently does intermittently,
skills used to stall or hard-block. Now each skill detects the failure, works out
whether a human is actually present, and if so re-renders the exact same decision as a
text message: a plain-English explanation of the issue, a completeness score on each
choice, and a recommendation with its reason, one paragraph per choice. You answer by
typing a single letter. Headless eval runs still block cleanly (no human to answer);
orchestrator sessions keep auto-choosing. This whole release was built and reviewed
through that fallback, because the Conductor tool was down the entire session.
### The numbers that matter
No production benchmark for a reliability path like this. These are the behavior and
coverage facts, verifiable with `bun test test/gstack-session-kind.test.ts
test/resolver-ask-user-format.test.ts test/auq-error-fallback-hook.test.ts`.
| When AskUserQuestion fails | Before | After |
|---|---|---|
| Interactive session (human present) | stall / hard BLOCK | full prose decision brief, answer by letter |
| Headless eval / CI | BLOCK | BLOCK (unchanged, correct) |
| Orchestrator (OpenClaw) session | undefined | auto-choose recommended (contract kept) |
| Session kinds detected | 0 | 3 (interactive / headless / spawned) |
| New tests guarding the path | 0 | 34 |
The text brief is not a degraded stub. It carries the same three things the picker
shows: a clear explanation of what is being decided, a `Completeness: X/10` on every
choice, and a recommendation with the reason it wins.
### What this means for you
If your host's question tool flakes out, a skill no longer dies on you. You get the
same decision to make, in text, and you reply with a letter. Nothing changes when the
tool works normally. If you run gstack headless, those sessions still block on a needed
question exactly as before, so eval determinism is intact.
### Itemized changes
#### Added
- `gstack-session-kind` classifies each session as interactive, headless, or spawned,
echoed as `SESSION_KIND` at skill start so any skill can branch on it.
- Plain-text fallback for AskUserQuestion: on a tool failure in an interactive session,
the skill renders the full decision brief (issue ELI10 + per-choice completeness +
recommendation) as markdown you answer by typing a letter, then stops and waits.
- A defensive hook that, when an AskUserQuestion call errors, reminds the agent to run
the fallback for the current session kind.
#### Changed
- AskUserQuestion is still sent as a normal tool call; the prose path applies only when
the tool is unavailable or erroring, and never on a `[plan-tune auto-decide]` result.
#### Fixed
- Section-loading tests use the canonical kebab test names, so the test-coverage gate
matches them.
- External-host doc-freshness checks are deterministic, no longer dependent on a prior
full regeneration.
#### For contributors
- The eval/E2E runners set `GSTACK_HEADLESS=1` so headless runs classify correctly;
interactive-path suites opt out per-run.
- Per-skill `maxSizeRatio` override in the carve-guards registry; `document-release`
gets 1.08 headroom for the cross-cutting preamble addition while every other skill
keeps the 1.05 ceiling.
## [1.57.0.0] - 2026-06-07
## **Three more heavyweight skills load lighter, and every carved skill finally has a test that proves it loads.**
## **`/cso`, `/document-release`, and `/design-consultation` shed ~49KB of always-loaded prose; CI now blocks any carve that ships without its guards.**
gstack splits its biggest skills into a small always-loaded skeleton plus on-demand
sections that load only when a step needs them. This release carves three more,
`/document-release`, `/design-consultation`, and `/cso`, so the first time you invoke
them the agent reads far less. It also closes a gap from the earlier carves: only two
of six already-carved skills had a test proving an agent actually reads the section it
was told to read. Now all nine carved skills are guarded the same way, and CI blocks
any future carve that ships without its guards. `/cso` got extra care: its mode
dispatch and false-positive-filtering rules stay always-loaded, so a security audit
can never run with a rule stranded in an unread section.
### The numbers that matter
Measured with `wc -c <skill>/SKILL.md`; the skeleton+sections union is reproduced by
`bun test test/parity-suite.test.ts test/skill-size-budget.test.ts`.
| Skill | Always-loaded before | After | Δ |
|---|---|---|---|
| /design-consultation | 80,719 B | 59,229 B | **27%** |
| /document-release | 59,256 B | 45,797 B | **23%** |
| /cso | 79,383 B | 65,117 B | **18%** |
| Carved skills with a section-load guard | 2 of 6 | 9 of 9 | **full coverage** |
Total always-loaded prose across the three skills drops about 49KB (~12K tokens) on
first invoke, with nothing lost: every line moved into an on-demand section the
skeleton points at, and the parity suite checks the union still contains it.
### What this means for you
Run `/cso`, `/document-release`, or `/design-consultation` and the agent does less
reading before it starts working, so the session stays leaner. The carve pattern is
now safe to extend: a free static test runs on every PR and a behavioral test runs
weekly to prove the agent reads each section, so future slimming can't quietly drop
behavior. Nothing about how you invoke these skills changed.
### Itemized changes
#### Added
- Canonical carved-skill guard registry (`test/helpers/carve-guards.ts`): one source of truth for which skills are carved and what each must preserve. `parity-harness.ts` and `skill-size-budget.ts` derive their carved-skill lists from it.
- Carve guard suite: data-driven static ordering test, behavioral section-loading test (periodic), a completeness meta-guard that fails CI if a carved skill lacks its guards, and negative tests proving the guards actually fire.
- `/cso`, `/document-release`, and `/design-consultation` carved into skeleton + on-demand sections.
#### Changed
- `/cso` keeps its mode dispatch (`## Arguments`, `## Mode Resolution`), always-run phases, and false-positive-filtering exceptions always-loaded; an earliest-use invariant enforces that dispatch appears before any on-demand read.
#### For contributors
- Redaction, taxonomy, and parity content tests now read the skeleton+sections union so relocated prose still counts toward coverage.
- Real-session section-read canary deferred to TODOS (the deterministic guards ship first).
## [1.56.1.0] - 2026-06-03
## **`/sync-gbrain` can no longer delete your repo. Cleanup now refuses any directory it cannot prove it created.**
+4 -1
View File
@@ -45,6 +45,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -124,7 +127,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
+51
View File
@@ -2283,3 +2283,54 @@ into `test/helpers/fake-gbrain.ts` when the second consumer arrives
runs).
**Depends on:** None.
### P2: Real-session carve canary (E3, deferred from carve-guard plan)
**What:** Wire a real-session section-Read-miss canary on top of the
carved skills. When a real user session drives a carved skill and the
agent does NOT Read a section the skeleton's STOP directive pointed it
at, log it (salted, content-free) to
`~/.gstack/analytics/section-reads.jsonl` and surface drift via
`bun run eval:summary`. Non-blocking alert, never a merge gate
(real-session data is non-deterministic).
**Why:** The static (E2) + behavioral (T2) guards prove carves are
structurally sound and that a real agent Reads sections in a controlled
eval. They do NOT see production drift — a prompt-context change that
makes live agents start skipping a section. The canary is the only
mechanism that catches that, from real usage.
**Context:** Deferred from the carve-guard-hardening plan (D5→T2, codex
outside-voice #7). `test/helpers/transcript-section-logger.ts` exists but
is built for deterministic test transcripts + ship action fingerprints,
NOT real-session drift — it needs rework before it can back this. Ship
the deterministic guards first; add this once they've proven useful. The
carved-skill set + each skill's `requiredReads` are already declared in
`test/helpers/carve-guards.ts`, so the canary reads its expectations
from there.
**Effort:** M (human ~2d, CC ~4h).
**Depends on:** `transcript-section-logger.ts` real-session-drift rework.
### P2: Harden behavioral section-loading test hermeticity
**What:** `captureSectionReads` in `test/helpers/auq-sdk-capture.ts` accepts ANY
Read whose path matches `sections/<file>.md`. The skeleton's STOP-Read directive
points at the gstack-root install path (`scripts/resolvers/sections.ts` builds it
from `ctx.paths.skillRoot`), not the planted fixture copy. So a run can satisfy
the section-read assertion by reading the GLOBAL install's section instead of the
hermetic fixture.
**Why:** A behavioral test that passes by reading the global install doesn't prove
THIS branch's carved section loads. If the fixture's section were broken but the
global install's weren't, the test would still pass.
**Context:** Codex outside-voice finding on the carve-guard ship (v1.57.0.0).
Pre-existing in `auq-sdk-capture.ts` — affects `skill-e2e-ship-section-loading`,
`skill-e2e-plan-ceo-review-section-loading`, and the new
`carve-section-loading.test.ts`. Fix: match the fixture's ABSOLUTE sections path
(the `planDir` copy), not a bare `sections/<file>.md` regex; or rewrite the STOP
path to the fixture during the run.
**Effort:** S (human ~3h, CC ~30min). **Depends on:** None.
+1 -1
View File
@@ -1 +1 @@
1.56.1.0
1.57.3.0
+27 -4
View File
@@ -54,6 +54,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -133,7 +136,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -305,11 +308,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -389,7 +412,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+4 -1
View File
@@ -48,6 +48,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -127,7 +130,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
+4 -1
View File
@@ -48,6 +48,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -127,7 +130,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
+53
View File
@@ -0,0 +1,53 @@
#!/usr/bin/env bash
# gstack-session-kind — classify the current agent session so skills know whether
# a human can answer an interactive prompt (AskUserQuestion).
#
# Usage: gstack-session-kind → prints one of: spawned | headless | interactive
#
# Used by the preamble (generate-preamble-bash.ts) which echoes
# SESSION_KIND: <value>
# so the AskUserQuestion-failure fallback rule can branch without a shell-out at
# failure time:
# spawned → orchestrator session (OpenClaw). Auto-choose recommended option
# per the skill's SPAWNED_SESSION block. Never prose, never BLOCKED.
# headless → no human present (claude -p evals / CI). BLOCK on AUQ failure.
# interactive → a human is present. Prose-fallback on AUQ failure.
#
# Detection is best-effort. On ANY ambiguity it prints `interactive` — BLOCK only on
# a positive headless signal, since a stray prose message in an unmarked one-shot
# `-p` run just ends the turn (harmless), whereas wrongly BLOCKING a real human is not.
#
# Why env vars and not TTY/entrypoint: an interactive Conductor session reports
# CLAUDE_CODE_ENTRYPOINT=sdk-ts with no TTY — identical to a headless SDK eval. The
# signals that actually discriminate are the host/orchestrator/CI env markers below.
set -euo pipefail
# 1. Orchestrator-spawned session (OpenClaw). Authoritative block lives in the skill;
# we only surface the classification.
if [ -n "${OPENCLAW_SESSION:-}" ]; then
echo "spawned"
exit 0
fi
# 2. Explicit headless override (set by the eval/E2E harness for determinism).
if [ -n "${GSTACK_HEADLESS:-}" ]; then
echo "headless"
exit 0
fi
# 3. Positive interactive-host signals: a human-driven host is present.
# - Conductor app sets CONDUCTOR_* workspace vars.
# - Plain interactive `claude` CLI sets CLAUDE_CODE_ENTRYPOINT=cli.
if [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ] || [ "${CLAUDE_CODE_ENTRYPOINT:-}" = "cli" ]; then
echo "interactive"
exit 0
fi
# 4. CI / automation markers with no interactive host → headless.
if [ -n "${CI:-}" ] || [ -n "${GITHUB_ACTIONS:-}" ]; then
echo "headless"
exit 0
fi
# 5. No positive headless signal → assume a human is present (degrade-safe default).
echo "interactive"
+4 -1
View File
@@ -46,6 +46,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -125,7 +128,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
+27 -4
View File
@@ -46,6 +46,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -125,7 +128,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -297,11 +300,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -381,7 +404,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -49,6 +49,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -128,7 +131,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -300,11 +303,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -384,7 +407,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -50,6 +50,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -129,7 +132,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -301,11 +304,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -385,7 +408,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -49,6 +49,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -128,7 +131,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -300,11 +303,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -384,7 +407,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+41 -253
View File
@@ -52,6 +52,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -131,7 +134,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -303,11 +306,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -387,7 +410,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
@@ -746,6 +769,18 @@ When the user types `/cso`, run this skill.
6. Phases 0, 1, 12, 13, 14 ALWAYS run regardless of scope flag.
7. If WebSearch is unavailable, skip checks that require it and note: "WebSearch unavailable — proceeding with local-only analysis."
---
## Section index — Read each section when its situation applies
This skill is a decision-tree skeleton. The steps below point to on-demand
sections. Read a section in full before doing its step; do not work from memory.
| When | Read this section |
|------|-------------------|
| running the scope-dependent audit phases (Phases 2-11) selected by the resolved mode, after the Phase 0 stack detection and Phase 1 attack-surface census | `sections/audit-phases.md` |
---
## Important: Use the Grep tool for all code searches
The bash blocks throughout this skill show WHAT patterns to search for, not HOW to run them. Use Claude Code's Grep tool (which handles permissions and access correctly) rather than raw bash grep. The bash blocks are illustrative examples — do NOT copy-paste them into a terminal. Do NOT use `| head` to truncate results.
@@ -870,255 +905,8 @@ INFRASTRUCTURE SURFACE
Secret management: [env vars | KMS | vault | unknown]
```
### Phase 2: Secrets Archaeology
Scan git history for leaked credentials, check tracked `.env` files, find CI configs with inline secrets.
**Canonical pattern catalog.** The HIGH-tier credential prefixes the archaeology
greps below target (AKIA, ghp_, sk-ant-, sk_live_, xoxb-, `-----BEGIN ... PRIVATE
KEY-----`, etc.) are the same set `/spec`'s in-flight redaction blocks on. The full
3-tier taxonomy (HIGH credentials, MEDIUM PII/legal/internal, LOW) is generated from
and lives in `lib/redact-patterns.ts` — the single source of truth shared by the
`gstack-redact` engine, `/spec`, `/ship`, and the `/document-*` skills.
**Git history — known secret prefixes:**
```bash
git log -p --all -S "AKIA" --diff-filter=A -- "*.env" "*.yml" "*.yaml" "*.json" "*.toml" 2>/dev/null
git log -p --all -S "sk-" --diff-filter=A -- "*.env" "*.yml" "*.json" "*.ts" "*.js" "*.py" 2>/dev/null
git log -p --all -G "ghp_|gho_|github_pat_" 2>/dev/null
git log -p --all -G "xoxb-|xoxp-|xapp-" 2>/dev/null
git log -p --all -G "password|secret|token|api_key" -- "*.env" "*.yml" "*.json" "*.conf" 2>/dev/null
```
**.env files tracked by git:**
```bash
git ls-files '*.env' '.env.*' 2>/dev/null | grep -v '.example\|.sample\|.template'
grep -q "^\.env$\|^\.env\.\*" .gitignore 2>/dev/null && echo ".env IS gitignored" || echo "WARNING: .env NOT in .gitignore"
```
**CI configs with inline secrets (not using secret stores):**
```bash
for f in $(find .github/workflows -maxdepth 1 \( -name '*.yml' -o -name '*.yaml' \) 2>/dev/null) .gitlab-ci.yml .circleci/config.yml; do
[ -f "$f" ] && grep -n "password:\|token:\|secret:\|api_key:" "$f" | grep -v '\${{' | grep -v 'secrets\.'
done 2>/dev/null
```
**Severity:** CRITICAL for active secret patterns in git history (AKIA, sk_live_, ghp_, xoxb-). HIGH for .env tracked by git, CI configs with inline credentials. MEDIUM for suspicious .env.example values.
**FP rules:** Placeholders ("your_", "changeme", "TODO") excluded. Test fixtures excluded unless same value in non-test code. Rotated secrets still flagged (they were exposed). `.env.local` in `.gitignore` is expected.
**Diff mode:** Replace `git log -p --all` with `git log -p <base>..HEAD`.
### Phase 3: Dependency Supply Chain
Goes beyond `npm audit`. Checks actual supply chain risk.
**Package manager detection:**
```bash
[ -f package.json ] && echo "DETECTED: npm/yarn/bun"
[ -f Gemfile ] && echo "DETECTED: bundler"
[ -f requirements.txt ] || [ -f pyproject.toml ] && echo "DETECTED: pip"
[ -f Cargo.toml ] && echo "DETECTED: cargo"
[ -f go.mod ] && echo "DETECTED: go"
```
**Standard vulnerability scan:** Run whichever package manager's audit tool is available. Each tool is optional — if not installed, note it in the report as "SKIPPED — tool not installed" with install instructions. This is informational, NOT a finding. The audit continues with whatever tools ARE available.
**Install scripts in production deps (supply chain attack vector):** For Node.js projects with hydrated `node_modules`, check production dependencies for `preinstall`, `postinstall`, or `install` scripts.
**Lockfile integrity:** Check that lockfiles exist AND are tracked by git.
**Severity:** CRITICAL for known CVEs (high/critical) in direct deps. HIGH for install scripts in prod deps / missing lockfile. MEDIUM for abandoned packages / medium CVEs / lockfile not tracked.
**FP rules:** devDependency CVEs are MEDIUM max. `node-gyp`/`cmake` install scripts expected (MEDIUM not HIGH). No-fix-available advisories without known exploits excluded. Missing lockfile for library repos (not apps) is NOT a finding.
### Phase 4: CI/CD Pipeline Security
Check who can modify workflows and what secrets they can access.
**GitHub Actions analysis:** For each workflow file, check for:
- Unpinned third-party actions (not SHA-pinned) — use Grep for `uses:` lines missing `@[sha]`
- `pull_request_target` (dangerous: fork PRs get write access)
- Script injection via `${{ github.event.* }}` in `run:` steps
- Secrets as env vars (could leak in logs)
- CODEOWNERS protection on workflow files
**Severity:** CRITICAL for `pull_request_target` + checkout of PR code / script injection via `${{ github.event.*.body }}` in `run:` steps. HIGH for unpinned third-party actions / secrets as env vars without masking. MEDIUM for missing CODEOWNERS on workflow files.
**FP rules:** First-party `actions/*` unpinned = MEDIUM not HIGH. `pull_request_target` without PR ref checkout is safe (precedent #11). Secrets in `with:` blocks (not `env:`/`run:`) are handled by runtime.
### Phase 5: Infrastructure Shadow Surface
Find shadow infrastructure with excessive access.
**Dockerfiles:** For each Dockerfile, check for missing `USER` directive (runs as root), secrets passed as `ARG`, `.env` files copied into images, exposed ports.
**Config files with prod credentials:** Use Grep to search for database connection strings (postgres://, mysql://, mongodb://, redis://) in config files, excluding localhost/127.0.0.1/example.com. Check for staging/dev configs referencing prod.
**IaC security:** For Terraform files, check for `"*"` in IAM actions/resources, hardcoded secrets in `.tf`/`.tfvars`. For K8s manifests, check for privileged containers, hostNetwork, hostPID.
**Severity:** CRITICAL for prod DB URLs with credentials in committed config / `"*"` IAM on sensitive resources / secrets baked into Docker images. HIGH for root containers in prod / staging with prod DB access / privileged K8s. MEDIUM for missing USER directive / exposed ports without documented purpose.
**FP rules:** `docker-compose.yml` for local dev with localhost = not a finding (precedent #12). Terraform `"*"` in `data` sources (read-only) excluded. K8s manifests in `test/`/`dev/`/`local/` with localhost networking excluded.
### Phase 6: Webhook & Integration Audit
Find inbound endpoints that accept anything.
**Webhook routes:** Use Grep to find files containing webhook/hook/callback route patterns. For each file, check whether it also contains signature verification (signature, hmac, verify, digest, x-hub-signature, stripe-signature, svix). Files with webhook routes but NO signature verification are findings.
**TLS verification disabled:** Use Grep to search for patterns like `verify.*false`, `VERIFY_NONE`, `InsecureSkipVerify`, `NODE_TLS_REJECT_UNAUTHORIZED.*0`.
**OAuth scope analysis:** Use Grep to find OAuth configurations and check for overly broad scopes.
**Verification approach (code-tracing only — NO live requests):** For webhook findings, trace the handler code to determine if signature verification exists anywhere in the middleware chain (parent router, middleware stack, API gateway config). Do NOT make actual HTTP requests to webhook endpoints.
**Severity:** CRITICAL for webhooks without any signature verification. HIGH for TLS verification disabled in prod code / overly broad OAuth scopes. MEDIUM for undocumented outbound data flows to third parties.
**FP rules:** TLS disabled in test code excluded. Internal service-to-service webhooks on private networks = MEDIUM max. Webhook endpoints behind API gateway that handles signature verification upstream are NOT findings — but require evidence.
### Phase 7: LLM & AI Security
Check for AI/LLM-specific vulnerabilities. This is a new attack class.
Use Grep to search for these patterns:
- **Prompt injection vectors:** User input flowing into system prompts or tool schemas — look for string interpolation near system prompt construction
- **Unsanitized LLM output:** `dangerouslySetInnerHTML`, `v-html`, `innerHTML`, `.html()`, `raw()` rendering LLM responses
- **Tool/function calling without validation:** `tool_choice`, `function_call`, `tools=`, `functions=`
- **AI API keys in code (not env vars):** `sk-` patterns, hardcoded API key assignments
- **Eval/exec of LLM output:** `eval()`, `exec()`, `Function()`, `new Function` processing AI responses
**Key checks (beyond grep):**
- Trace user content flow — does it enter system prompts or tool schemas?
- RAG poisoning: can external documents influence AI behavior via retrieval?
- Tool calling permissions: are LLM tool calls validated before execution?
- Output sanitization: is LLM output treated as trusted (rendered as HTML, executed as code)?
- Cost/resource attacks: can a user trigger unbounded LLM calls?
**Severity:** CRITICAL for user input in system prompts / unsanitized LLM output rendered as HTML / eval of LLM output. HIGH for missing tool call validation / exposed AI API keys. MEDIUM for unbounded LLM calls / RAG without input validation.
**FP rules:** User content in the user-message position of an AI conversation is NOT prompt injection (precedent #13). Only flag when user content enters system prompts, tool schemas, or function-calling contexts.
### Phase 8: Skill Supply Chain
Scan installed Claude Code skills for malicious patterns. 36% of published skills have security flaws, 13.4% are outright malicious (Snyk ToxicSkills research).
**Tier 1 — repo-local (automatic):** Scan the repo's local skills directory for suspicious patterns:
```bash
ls -la .claude/skills/ 2>/dev/null
```
Use Grep to search all local skill SKILL.md files for suspicious patterns:
- `curl`, `wget`, `fetch`, `http`, `exfiltrat` (network exfiltration)
- `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `env.`, `process.env` (credential access)
- `IGNORE PREVIOUS`, `system override`, `disregard`, `forget your instructions` (prompt injection)
**Tier 2 — global skills (requires permission):** Before scanning globally installed skills or user settings, use AskUserQuestion:
"Phase 8 can scan your globally installed AI coding agent skills and hooks for malicious patterns. This reads files outside the repo. Want to include this?"
Options: A) Yes — scan global skills too B) No — repo-local only
If approved, run the same Grep patterns on globally installed skill files and check hooks in user settings.
**Severity:** CRITICAL for credential exfiltration attempts / prompt injection in skill files. HIGH for suspicious network calls / overly broad tool permissions. MEDIUM for skills from unverified sources without review.
**FP rules:** gstack's own skills are trusted (check if skill path resolves to a known repo). Skills that use `curl` for legitimate purposes (downloading tools, health checks) need context — only flag when the target URL is suspicious or when the command includes credential variables.
### Phase 9: OWASP Top 10 Assessment
For each OWASP category, perform targeted analysis. Use the Grep tool for all searches — scope file extensions to detected stacks from Phase 0.
#### A01: Broken Access Control
- Check for missing auth on controllers/routes (skip_before_action, skip_authorization, public, no_auth)
- Check for direct object reference patterns (params[:id], req.params.id, request.args.get)
- Can user A access user B's resources by changing IDs?
- Is there horizontal/vertical privilege escalation?
#### A02: Cryptographic Failures
- Weak crypto (MD5, SHA1, DES, ECB) or hardcoded secrets
- Is sensitive data encrypted at rest and in transit?
- Are keys/secrets properly managed (env vars, not hardcoded)?
#### A03: Injection
- SQL injection: raw queries, string interpolation in SQL
- Command injection: system(), exec(), spawn(), popen
- Template injection: render with params, eval(), html_safe, raw()
- LLM prompt injection: see Phase 7 for comprehensive coverage
#### A04: Insecure Design
- Rate limits on authentication endpoints?
- Account lockout after failed attempts?
- Business logic validated server-side?
#### A05: Security Misconfiguration
- CORS configuration (wildcard origins in production?)
- CSP headers present?
- Debug mode / verbose errors in production?
#### A06: Vulnerable and Outdated Components
See **Phase 3 (Dependency Supply Chain)** for comprehensive component analysis.
#### A07: Identification and Authentication Failures
- Session management: creation, storage, invalidation
- Password policy: complexity, rotation, breach checking
- MFA: available? enforced for admin?
- Token management: JWT expiration, refresh rotation
#### A08: Software and Data Integrity Failures
See **Phase 4 (CI/CD Pipeline Security)** for pipeline protection analysis.
- Deserialization inputs validated?
- Integrity checking on external data?
#### A09: Security Logging and Monitoring Failures
- Authentication events logged?
- Authorization failures logged?
- Admin actions audit-trailed?
- Logs protected from tampering?
#### A10: Server-Side Request Forgery (SSRF)
- URL construction from user input?
- Internal service reachability from user-controlled URLs?
- Allowlist/blocklist enforcement on outbound requests?
### Phase 10: STRIDE Threat Model
For each major component identified in Phase 0, evaluate:
```
COMPONENT: [Name]
Spoofing: Can an attacker impersonate a user/service?
Tampering: Can data be modified in transit/at rest?
Repudiation: Can actions be denied? Is there an audit trail?
Information Disclosure: Can sensitive data leak?
Denial of Service: Can the component be overwhelmed?
Elevation of Privilege: Can a user gain unauthorized access?
```
### Phase 11: Data Classification
Classify all data handled by the application:
```
DATA CLASSIFICATION
═══════════════════
RESTRICTED (breach = legal liability):
- Passwords/credentials: [where stored, how protected]
- Payment data: [where stored, PCI compliance status]
- PII: [what types, where stored, retention policy]
CONFIDENTIAL (breach = business damage):
- API keys: [where stored, rotation policy]
- Business logic: [trade secrets in code?]
- User behavior data: [analytics, tracking]
INTERNAL (breach = embarrassment):
- System logs: [what they contain, who can access]
- Configuration: [what's exposed in error messages]
PUBLIC:
- Marketing content, documentation, public APIs
```
> **STOP.** Before running the scope-dependent audit phases (Phases 2-11) selected by the resolved mode, after the Phase 0 stack detection and Phase 1 attack-surface census, Read `~/.claude/skills/gstack/cso/sections/audit-phases.md` and execute it
> in full. Do not work from memory — that section is the source of truth for this step.
### Phase 12: False Positive Filtering + Active Verification
Before producing findings, run every candidate through this filter.
+6 -249
View File
@@ -67,6 +67,11 @@ When the user types `/cso`, run this skill.
6. Phases 0, 1, 12, 13, 14 ALWAYS run regardless of scope flag.
7. If WebSearch is unavailable, skip checks that require it and note: "WebSearch unavailable — proceeding with local-only analysis."
---
{{SECTION_INDEX:cso}}
---
## Important: Use the Grep tool for all code searches
The bash blocks throughout this skill show WHAT patterns to search for, not HOW to run them. Use Claude Code's Grep tool (which handles permissions and access correctly) rather than raw bash grep. The bash blocks are illustrative examples — do NOT copy-paste them into a terminal. Do NOT use `| head` to truncate results.
@@ -155,255 +160,7 @@ INFRASTRUCTURE SURFACE
Secret management: [env vars | KMS | vault | unknown]
```
### Phase 2: Secrets Archaeology
Scan git history for leaked credentials, check tracked `.env` files, find CI configs with inline secrets.
**Canonical pattern catalog.** The HIGH-tier credential prefixes the archaeology
greps below target (AKIA, ghp_, sk-ant-, sk_live_, xoxb-, `-----BEGIN ... PRIVATE
KEY-----`, etc.) are the same set `/spec`'s in-flight redaction blocks on. The full
3-tier taxonomy (HIGH credentials, MEDIUM PII/legal/internal, LOW) is generated from
and lives in `lib/redact-patterns.ts` — the single source of truth shared by the
`gstack-redact` engine, `/spec`, `/ship`, and the `/document-*` skills.
**Git history — known secret prefixes:**
```bash
git log -p --all -S "AKIA" --diff-filter=A -- "*.env" "*.yml" "*.yaml" "*.json" "*.toml" 2>/dev/null
git log -p --all -S "sk-" --diff-filter=A -- "*.env" "*.yml" "*.json" "*.ts" "*.js" "*.py" 2>/dev/null
git log -p --all -G "ghp_|gho_|github_pat_" 2>/dev/null
git log -p --all -G "xoxb-|xoxp-|xapp-" 2>/dev/null
git log -p --all -G "password|secret|token|api_key" -- "*.env" "*.yml" "*.json" "*.conf" 2>/dev/null
```
**.env files tracked by git:**
```bash
git ls-files '*.env' '.env.*' 2>/dev/null | grep -v '.example\|.sample\|.template'
grep -q "^\.env$\|^\.env\.\*" .gitignore 2>/dev/null && echo ".env IS gitignored" || echo "WARNING: .env NOT in .gitignore"
```
**CI configs with inline secrets (not using secret stores):**
```bash
for f in $(find .github/workflows -maxdepth 1 \( -name '*.yml' -o -name '*.yaml' \) 2>/dev/null) .gitlab-ci.yml .circleci/config.yml; do
[ -f "$f" ] && grep -n "password:\|token:\|secret:\|api_key:" "$f" | grep -v '\${{' | grep -v 'secrets\.'
done 2>/dev/null
```
**Severity:** CRITICAL for active secret patterns in git history (AKIA, sk_live_, ghp_, xoxb-). HIGH for .env tracked by git, CI configs with inline credentials. MEDIUM for suspicious .env.example values.
**FP rules:** Placeholders ("your_", "changeme", "TODO") excluded. Test fixtures excluded unless same value in non-test code. Rotated secrets still flagged (they were exposed). `.env.local` in `.gitignore` is expected.
**Diff mode:** Replace `git log -p --all` with `git log -p <base>..HEAD`.
### Phase 3: Dependency Supply Chain
Goes beyond `npm audit`. Checks actual supply chain risk.
**Package manager detection:**
```bash
[ -f package.json ] && echo "DETECTED: npm/yarn/bun"
[ -f Gemfile ] && echo "DETECTED: bundler"
[ -f requirements.txt ] || [ -f pyproject.toml ] && echo "DETECTED: pip"
[ -f Cargo.toml ] && echo "DETECTED: cargo"
[ -f go.mod ] && echo "DETECTED: go"
```
**Standard vulnerability scan:** Run whichever package manager's audit tool is available. Each tool is optional — if not installed, note it in the report as "SKIPPED — tool not installed" with install instructions. This is informational, NOT a finding. The audit continues with whatever tools ARE available.
**Install scripts in production deps (supply chain attack vector):** For Node.js projects with hydrated `node_modules`, check production dependencies for `preinstall`, `postinstall`, or `install` scripts.
**Lockfile integrity:** Check that lockfiles exist AND are tracked by git.
**Severity:** CRITICAL for known CVEs (high/critical) in direct deps. HIGH for install scripts in prod deps / missing lockfile. MEDIUM for abandoned packages / medium CVEs / lockfile not tracked.
**FP rules:** devDependency CVEs are MEDIUM max. `node-gyp`/`cmake` install scripts expected (MEDIUM not HIGH). No-fix-available advisories without known exploits excluded. Missing lockfile for library repos (not apps) is NOT a finding.
### Phase 4: CI/CD Pipeline Security
Check who can modify workflows and what secrets they can access.
**GitHub Actions analysis:** For each workflow file, check for:
- Unpinned third-party actions (not SHA-pinned) — use Grep for `uses:` lines missing `@[sha]`
- `pull_request_target` (dangerous: fork PRs get write access)
- Script injection via `${{ github.event.* }}` in `run:` steps
- Secrets as env vars (could leak in logs)
- CODEOWNERS protection on workflow files
**Severity:** CRITICAL for `pull_request_target` + checkout of PR code / script injection via `${{ github.event.*.body }}` in `run:` steps. HIGH for unpinned third-party actions / secrets as env vars without masking. MEDIUM for missing CODEOWNERS on workflow files.
**FP rules:** First-party `actions/*` unpinned = MEDIUM not HIGH. `pull_request_target` without PR ref checkout is safe (precedent #11). Secrets in `with:` blocks (not `env:`/`run:`) are handled by runtime.
### Phase 5: Infrastructure Shadow Surface
Find shadow infrastructure with excessive access.
**Dockerfiles:** For each Dockerfile, check for missing `USER` directive (runs as root), secrets passed as `ARG`, `.env` files copied into images, exposed ports.
**Config files with prod credentials:** Use Grep to search for database connection strings (postgres://, mysql://, mongodb://, redis://) in config files, excluding localhost/127.0.0.1/example.com. Check for staging/dev configs referencing prod.
**IaC security:** For Terraform files, check for `"*"` in IAM actions/resources, hardcoded secrets in `.tf`/`.tfvars`. For K8s manifests, check for privileged containers, hostNetwork, hostPID.
**Severity:** CRITICAL for prod DB URLs with credentials in committed config / `"*"` IAM on sensitive resources / secrets baked into Docker images. HIGH for root containers in prod / staging with prod DB access / privileged K8s. MEDIUM for missing USER directive / exposed ports without documented purpose.
**FP rules:** `docker-compose.yml` for local dev with localhost = not a finding (precedent #12). Terraform `"*"` in `data` sources (read-only) excluded. K8s manifests in `test/`/`dev/`/`local/` with localhost networking excluded.
### Phase 6: Webhook & Integration Audit
Find inbound endpoints that accept anything.
**Webhook routes:** Use Grep to find files containing webhook/hook/callback route patterns. For each file, check whether it also contains signature verification (signature, hmac, verify, digest, x-hub-signature, stripe-signature, svix). Files with webhook routes but NO signature verification are findings.
**TLS verification disabled:** Use Grep to search for patterns like `verify.*false`, `VERIFY_NONE`, `InsecureSkipVerify`, `NODE_TLS_REJECT_UNAUTHORIZED.*0`.
**OAuth scope analysis:** Use Grep to find OAuth configurations and check for overly broad scopes.
**Verification approach (code-tracing only — NO live requests):** For webhook findings, trace the handler code to determine if signature verification exists anywhere in the middleware chain (parent router, middleware stack, API gateway config). Do NOT make actual HTTP requests to webhook endpoints.
**Severity:** CRITICAL for webhooks without any signature verification. HIGH for TLS verification disabled in prod code / overly broad OAuth scopes. MEDIUM for undocumented outbound data flows to third parties.
**FP rules:** TLS disabled in test code excluded. Internal service-to-service webhooks on private networks = MEDIUM max. Webhook endpoints behind API gateway that handles signature verification upstream are NOT findings — but require evidence.
### Phase 7: LLM & AI Security
Check for AI/LLM-specific vulnerabilities. This is a new attack class.
Use Grep to search for these patterns:
- **Prompt injection vectors:** User input flowing into system prompts or tool schemas — look for string interpolation near system prompt construction
- **Unsanitized LLM output:** `dangerouslySetInnerHTML`, `v-html`, `innerHTML`, `.html()`, `raw()` rendering LLM responses
- **Tool/function calling without validation:** `tool_choice`, `function_call`, `tools=`, `functions=`
- **AI API keys in code (not env vars):** `sk-` patterns, hardcoded API key assignments
- **Eval/exec of LLM output:** `eval()`, `exec()`, `Function()`, `new Function` processing AI responses
**Key checks (beyond grep):**
- Trace user content flow — does it enter system prompts or tool schemas?
- RAG poisoning: can external documents influence AI behavior via retrieval?
- Tool calling permissions: are LLM tool calls validated before execution?
- Output sanitization: is LLM output treated as trusted (rendered as HTML, executed as code)?
- Cost/resource attacks: can a user trigger unbounded LLM calls?
**Severity:** CRITICAL for user input in system prompts / unsanitized LLM output rendered as HTML / eval of LLM output. HIGH for missing tool call validation / exposed AI API keys. MEDIUM for unbounded LLM calls / RAG without input validation.
**FP rules:** User content in the user-message position of an AI conversation is NOT prompt injection (precedent #13). Only flag when user content enters system prompts, tool schemas, or function-calling contexts.
### Phase 8: Skill Supply Chain
Scan installed Claude Code skills for malicious patterns. 36% of published skills have security flaws, 13.4% are outright malicious (Snyk ToxicSkills research).
**Tier 1 — repo-local (automatic):** Scan the repo's local skills directory for suspicious patterns:
```bash
ls -la .claude/skills/ 2>/dev/null
```
Use Grep to search all local skill SKILL.md files for suspicious patterns:
- `curl`, `wget`, `fetch`, `http`, `exfiltrat` (network exfiltration)
- `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `env.`, `process.env` (credential access)
- `IGNORE PREVIOUS`, `system override`, `disregard`, `forget your instructions` (prompt injection)
**Tier 2 — global skills (requires permission):** Before scanning globally installed skills or user settings, use AskUserQuestion:
"Phase 8 can scan your globally installed AI coding agent skills and hooks for malicious patterns. This reads files outside the repo. Want to include this?"
Options: A) Yes — scan global skills too B) No — repo-local only
If approved, run the same Grep patterns on globally installed skill files and check hooks in user settings.
**Severity:** CRITICAL for credential exfiltration attempts / prompt injection in skill files. HIGH for suspicious network calls / overly broad tool permissions. MEDIUM for skills from unverified sources without review.
**FP rules:** gstack's own skills are trusted (check if skill path resolves to a known repo). Skills that use `curl` for legitimate purposes (downloading tools, health checks) need context — only flag when the target URL is suspicious or when the command includes credential variables.
### Phase 9: OWASP Top 10 Assessment
For each OWASP category, perform targeted analysis. Use the Grep tool for all searches — scope file extensions to detected stacks from Phase 0.
#### A01: Broken Access Control
- Check for missing auth on controllers/routes (skip_before_action, skip_authorization, public, no_auth)
- Check for direct object reference patterns (params[:id], req.params.id, request.args.get)
- Can user A access user B's resources by changing IDs?
- Is there horizontal/vertical privilege escalation?
#### A02: Cryptographic Failures
- Weak crypto (MD5, SHA1, DES, ECB) or hardcoded secrets
- Is sensitive data encrypted at rest and in transit?
- Are keys/secrets properly managed (env vars, not hardcoded)?
#### A03: Injection
- SQL injection: raw queries, string interpolation in SQL
- Command injection: system(), exec(), spawn(), popen
- Template injection: render with params, eval(), html_safe, raw()
- LLM prompt injection: see Phase 7 for comprehensive coverage
#### A04: Insecure Design
- Rate limits on authentication endpoints?
- Account lockout after failed attempts?
- Business logic validated server-side?
#### A05: Security Misconfiguration
- CORS configuration (wildcard origins in production?)
- CSP headers present?
- Debug mode / verbose errors in production?
#### A06: Vulnerable and Outdated Components
See **Phase 3 (Dependency Supply Chain)** for comprehensive component analysis.
#### A07: Identification and Authentication Failures
- Session management: creation, storage, invalidation
- Password policy: complexity, rotation, breach checking
- MFA: available? enforced for admin?
- Token management: JWT expiration, refresh rotation
#### A08: Software and Data Integrity Failures
See **Phase 4 (CI/CD Pipeline Security)** for pipeline protection analysis.
- Deserialization inputs validated?
- Integrity checking on external data?
#### A09: Security Logging and Monitoring Failures
- Authentication events logged?
- Authorization failures logged?
- Admin actions audit-trailed?
- Logs protected from tampering?
#### A10: Server-Side Request Forgery (SSRF)
- URL construction from user input?
- Internal service reachability from user-controlled URLs?
- Allowlist/blocklist enforcement on outbound requests?
### Phase 10: STRIDE Threat Model
For each major component identified in Phase 0, evaluate:
```
COMPONENT: [Name]
Spoofing: Can an attacker impersonate a user/service?
Tampering: Can data be modified in transit/at rest?
Repudiation: Can actions be denied? Is there an audit trail?
Information Disclosure: Can sensitive data leak?
Denial of Service: Can the component be overwhelmed?
Elevation of Privilege: Can a user gain unauthorized access?
```
### Phase 11: Data Classification
Classify all data handled by the application:
```
DATA CLASSIFICATION
═══════════════════
RESTRICTED (breach = legal liability):
- Passwords/credentials: [where stored, how protected]
- Payment data: [where stored, PCI compliance status]
- PII: [what types, where stored, retention policy]
CONFIDENTIAL (breach = business damage):
- API keys: [where stored, rotation policy]
- Business logic: [trade secrets in code?]
- User behavior data: [analytics, tracking]
INTERNAL (breach = embarrassment):
- System logs: [what they contain, who can access]
- Configuration: [what's exposed in error messages]
PUBLIC:
- Marketing content, documentation, public APIs
```
{{SECTION:audit-phases}}
### Phase 12: False Positive Filtering + Active Verification
Before producing findings, run every candidate through this filter.
+253
View File
@@ -0,0 +1,253 @@
<!-- AUTO-GENERATED from audit-phases.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
**Scope gate (read first).** This section holds every scope-dependent phase (2-11), but you run ONLY the phases your resolved mode selected back in `## Mode Resolution` (always-loaded in the skeleton). Phases 0, 1, 12, 13, 14 always run; Phases 2-11 are scope-gated. "Execute in full" means work through this section applying that selection, NOT run a phase your mode did not select just because its prose lives here. Example: `--owasp` runs Phase 9 from this section, not Phases 2-8/10/11.
### Phase 2: Secrets Archaeology
Scan git history for leaked credentials, check tracked `.env` files, find CI configs with inline secrets.
**Canonical pattern catalog.** The HIGH-tier credential prefixes the archaeology
greps below target (AKIA, ghp_, sk-ant-, sk_live_, xoxb-, `-----BEGIN ... PRIVATE
KEY-----`, etc.) are the same set `/spec`'s in-flight redaction blocks on. The full
3-tier taxonomy (HIGH credentials, MEDIUM PII/legal/internal, LOW) is generated from
and lives in `lib/redact-patterns.ts` — the single source of truth shared by the
`gstack-redact` engine, `/spec`, `/ship`, and the `/document-*` skills.
**Git history — known secret prefixes:**
```bash
git log -p --all -S "AKIA" --diff-filter=A -- "*.env" "*.yml" "*.yaml" "*.json" "*.toml" 2>/dev/null
git log -p --all -S "sk-" --diff-filter=A -- "*.env" "*.yml" "*.json" "*.ts" "*.js" "*.py" 2>/dev/null
git log -p --all -G "ghp_|gho_|github_pat_" 2>/dev/null
git log -p --all -G "xoxb-|xoxp-|xapp-" 2>/dev/null
git log -p --all -G "password|secret|token|api_key" -- "*.env" "*.yml" "*.json" "*.conf" 2>/dev/null
```
**.env files tracked by git:**
```bash
git ls-files '*.env' '.env.*' 2>/dev/null | grep -v '.example\|.sample\|.template'
grep -q "^\.env$\|^\.env\.\*" .gitignore 2>/dev/null && echo ".env IS gitignored" || echo "WARNING: .env NOT in .gitignore"
```
**CI configs with inline secrets (not using secret stores):**
```bash
for f in $(find .github/workflows -maxdepth 1 \( -name '*.yml' -o -name '*.yaml' \) 2>/dev/null) .gitlab-ci.yml .circleci/config.yml; do
[ -f "$f" ] && grep -n "password:\|token:\|secret:\|api_key:" "$f" | grep -v '\${{' | grep -v 'secrets\.'
done 2>/dev/null
```
**Severity:** CRITICAL for active secret patterns in git history (AKIA, sk_live_, ghp_, xoxb-). HIGH for .env tracked by git, CI configs with inline credentials. MEDIUM for suspicious .env.example values.
**FP rules:** Placeholders ("your_", "changeme", "TODO") excluded. Test fixtures excluded unless same value in non-test code. Rotated secrets still flagged (they were exposed). `.env.local` in `.gitignore` is expected.
**Diff mode:** Replace `git log -p --all` with `git log -p <base>..HEAD`.
### Phase 3: Dependency Supply Chain
Goes beyond `npm audit`. Checks actual supply chain risk.
**Package manager detection:**
```bash
[ -f package.json ] && echo "DETECTED: npm/yarn/bun"
[ -f Gemfile ] && echo "DETECTED: bundler"
[ -f requirements.txt ] || [ -f pyproject.toml ] && echo "DETECTED: pip"
[ -f Cargo.toml ] && echo "DETECTED: cargo"
[ -f go.mod ] && echo "DETECTED: go"
```
**Standard vulnerability scan:** Run whichever package manager's audit tool is available. Each tool is optional — if not installed, note it in the report as "SKIPPED — tool not installed" with install instructions. This is informational, NOT a finding. The audit continues with whatever tools ARE available.
**Install scripts in production deps (supply chain attack vector):** For Node.js projects with hydrated `node_modules`, check production dependencies for `preinstall`, `postinstall`, or `install` scripts.
**Lockfile integrity:** Check that lockfiles exist AND are tracked by git.
**Severity:** CRITICAL for known CVEs (high/critical) in direct deps. HIGH for install scripts in prod deps / missing lockfile. MEDIUM for abandoned packages / medium CVEs / lockfile not tracked.
**FP rules:** devDependency CVEs are MEDIUM max. `node-gyp`/`cmake` install scripts expected (MEDIUM not HIGH). No-fix-available advisories without known exploits excluded. Missing lockfile for library repos (not apps) is NOT a finding.
### Phase 4: CI/CD Pipeline Security
Check who can modify workflows and what secrets they can access.
**GitHub Actions analysis:** For each workflow file, check for:
- Unpinned third-party actions (not SHA-pinned) — use Grep for `uses:` lines missing `@[sha]`
- `pull_request_target` (dangerous: fork PRs get write access)
- Script injection via `${{ github.event.* }}` in `run:` steps
- Secrets as env vars (could leak in logs)
- CODEOWNERS protection on workflow files
**Severity:** CRITICAL for `pull_request_target` + checkout of PR code / script injection via `${{ github.event.*.body }}` in `run:` steps. HIGH for unpinned third-party actions / secrets as env vars without masking. MEDIUM for missing CODEOWNERS on workflow files.
**FP rules:** First-party `actions/*` unpinned = MEDIUM not HIGH. `pull_request_target` without PR ref checkout is safe (precedent #11). Secrets in `with:` blocks (not `env:`/`run:`) are handled by runtime.
### Phase 5: Infrastructure Shadow Surface
Find shadow infrastructure with excessive access.
**Dockerfiles:** For each Dockerfile, check for missing `USER` directive (runs as root), secrets passed as `ARG`, `.env` files copied into images, exposed ports.
**Config files with prod credentials:** Use Grep to search for database connection strings (postgres://, mysql://, mongodb://, redis://) in config files, excluding localhost/127.0.0.1/example.com. Check for staging/dev configs referencing prod.
**IaC security:** For Terraform files, check for `"*"` in IAM actions/resources, hardcoded secrets in `.tf`/`.tfvars`. For K8s manifests, check for privileged containers, hostNetwork, hostPID.
**Severity:** CRITICAL for prod DB URLs with credentials in committed config / `"*"` IAM on sensitive resources / secrets baked into Docker images. HIGH for root containers in prod / staging with prod DB access / privileged K8s. MEDIUM for missing USER directive / exposed ports without documented purpose.
**FP rules:** `docker-compose.yml` for local dev with localhost = not a finding (precedent #12). Terraform `"*"` in `data` sources (read-only) excluded. K8s manifests in `test/`/`dev/`/`local/` with localhost networking excluded.
### Phase 6: Webhook & Integration Audit
Find inbound endpoints that accept anything.
**Webhook routes:** Use Grep to find files containing webhook/hook/callback route patterns. For each file, check whether it also contains signature verification (signature, hmac, verify, digest, x-hub-signature, stripe-signature, svix). Files with webhook routes but NO signature verification are findings.
**TLS verification disabled:** Use Grep to search for patterns like `verify.*false`, `VERIFY_NONE`, `InsecureSkipVerify`, `NODE_TLS_REJECT_UNAUTHORIZED.*0`.
**OAuth scope analysis:** Use Grep to find OAuth configurations and check for overly broad scopes.
**Verification approach (code-tracing only — NO live requests):** For webhook findings, trace the handler code to determine if signature verification exists anywhere in the middleware chain (parent router, middleware stack, API gateway config). Do NOT make actual HTTP requests to webhook endpoints.
**Severity:** CRITICAL for webhooks without any signature verification. HIGH for TLS verification disabled in prod code / overly broad OAuth scopes. MEDIUM for undocumented outbound data flows to third parties.
**FP rules:** TLS disabled in test code excluded. Internal service-to-service webhooks on private networks = MEDIUM max. Webhook endpoints behind API gateway that handles signature verification upstream are NOT findings — but require evidence.
### Phase 7: LLM & AI Security
Check for AI/LLM-specific vulnerabilities. This is a new attack class.
Use Grep to search for these patterns:
- **Prompt injection vectors:** User input flowing into system prompts or tool schemas — look for string interpolation near system prompt construction
- **Unsanitized LLM output:** `dangerouslySetInnerHTML`, `v-html`, `innerHTML`, `.html()`, `raw()` rendering LLM responses
- **Tool/function calling without validation:** `tool_choice`, `function_call`, `tools=`, `functions=`
- **AI API keys in code (not env vars):** `sk-` patterns, hardcoded API key assignments
- **Eval/exec of LLM output:** `eval()`, `exec()`, `Function()`, `new Function` processing AI responses
**Key checks (beyond grep):**
- Trace user content flow — does it enter system prompts or tool schemas?
- RAG poisoning: can external documents influence AI behavior via retrieval?
- Tool calling permissions: are LLM tool calls validated before execution?
- Output sanitization: is LLM output treated as trusted (rendered as HTML, executed as code)?
- Cost/resource attacks: can a user trigger unbounded LLM calls?
**Severity:** CRITICAL for user input in system prompts / unsanitized LLM output rendered as HTML / eval of LLM output. HIGH for missing tool call validation / exposed AI API keys. MEDIUM for unbounded LLM calls / RAG without input validation.
**FP rules:** User content in the user-message position of an AI conversation is NOT prompt injection (precedent #13). Only flag when user content enters system prompts, tool schemas, or function-calling contexts.
### Phase 8: Skill Supply Chain
Scan installed Claude Code skills for malicious patterns. 36% of published skills have security flaws, 13.4% are outright malicious (Snyk ToxicSkills research).
**Tier 1 — repo-local (automatic):** Scan the repo's local skills directory for suspicious patterns:
```bash
ls -la .claude/skills/ 2>/dev/null
```
Use Grep to search all local skill SKILL.md files for suspicious patterns:
- `curl`, `wget`, `fetch`, `http`, `exfiltrat` (network exfiltration)
- `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `env.`, `process.env` (credential access)
- `IGNORE PREVIOUS`, `system override`, `disregard`, `forget your instructions` (prompt injection)
**Tier 2 — global skills (requires permission):** Before scanning globally installed skills or user settings, use AskUserQuestion:
"Phase 8 can scan your globally installed AI coding agent skills and hooks for malicious patterns. This reads files outside the repo. Want to include this?"
Options: A) Yes — scan global skills too B) No — repo-local only
If approved, run the same Grep patterns on globally installed skill files and check hooks in user settings.
**Severity:** CRITICAL for credential exfiltration attempts / prompt injection in skill files. HIGH for suspicious network calls / overly broad tool permissions. MEDIUM for skills from unverified sources without review.
**FP rules:** gstack's own skills are trusted (check if skill path resolves to a known repo). Skills that use `curl` for legitimate purposes (downloading tools, health checks) need context — only flag when the target URL is suspicious or when the command includes credential variables.
### Phase 9: OWASP Top 10 Assessment
For each OWASP category, perform targeted analysis. Use the Grep tool for all searches — scope file extensions to detected stacks from Phase 0.
#### A01: Broken Access Control
- Check for missing auth on controllers/routes (skip_before_action, skip_authorization, public, no_auth)
- Check for direct object reference patterns (params[:id], req.params.id, request.args.get)
- Can user A access user B's resources by changing IDs?
- Is there horizontal/vertical privilege escalation?
#### A02: Cryptographic Failures
- Weak crypto (MD5, SHA1, DES, ECB) or hardcoded secrets
- Is sensitive data encrypted at rest and in transit?
- Are keys/secrets properly managed (env vars, not hardcoded)?
#### A03: Injection
- SQL injection: raw queries, string interpolation in SQL
- Command injection: system(), exec(), spawn(), popen
- Template injection: render with params, eval(), html_safe, raw()
- LLM prompt injection: see Phase 7 for comprehensive coverage
#### A04: Insecure Design
- Rate limits on authentication endpoints?
- Account lockout after failed attempts?
- Business logic validated server-side?
#### A05: Security Misconfiguration
- CORS configuration (wildcard origins in production?)
- CSP headers present?
- Debug mode / verbose errors in production?
#### A06: Vulnerable and Outdated Components
See **Phase 3 (Dependency Supply Chain)** for comprehensive component analysis.
#### A07: Identification and Authentication Failures
- Session management: creation, storage, invalidation
- Password policy: complexity, rotation, breach checking
- MFA: available? enforced for admin?
- Token management: JWT expiration, refresh rotation
#### A08: Software and Data Integrity Failures
See **Phase 4 (CI/CD Pipeline Security)** for pipeline protection analysis.
- Deserialization inputs validated?
- Integrity checking on external data?
#### A09: Security Logging and Monitoring Failures
- Authentication events logged?
- Authorization failures logged?
- Admin actions audit-trailed?
- Logs protected from tampering?
#### A10: Server-Side Request Forgery (SSRF)
- URL construction from user input?
- Internal service reachability from user-controlled URLs?
- Allowlist/blocklist enforcement on outbound requests?
### Phase 10: STRIDE Threat Model
For each major component identified in Phase 0, evaluate:
```
COMPONENT: [Name]
Spoofing: Can an attacker impersonate a user/service?
Tampering: Can data be modified in transit/at rest?
Repudiation: Can actions be denied? Is there an audit trail?
Information Disclosure: Can sensitive data leak?
Denial of Service: Can the component be overwhelmed?
Elevation of Privilege: Can a user gain unauthorized access?
```
### Phase 11: Data Classification
Classify all data handled by the application:
```
DATA CLASSIFICATION
═══════════════════
RESTRICTED (breach = legal liability):
- Passwords/credentials: [where stored, how protected]
- Payment data: [where stored, PCI compliance status]
- PII: [what types, where stored, retention policy]
CONFIDENTIAL (breach = business damage):
- API keys: [where stored, rotation policy]
- Business logic: [trade secrets in code?]
- User behavior data: [analytics, tracking]
INTERNAL (breach = embarrassment):
- System logs: [what they contain, who can access]
- Configuration: [what's exposed in error messages]
PUBLIC:
- Marketing content, documentation, public APIs
```
+251
View File
@@ -0,0 +1,251 @@
**Scope gate (read first).** This section holds every scope-dependent phase (2-11), but you run ONLY the phases your resolved mode selected back in `## Mode Resolution` (always-loaded in the skeleton). Phases 0, 1, 12, 13, 14 always run; Phases 2-11 are scope-gated. "Execute in full" means work through this section applying that selection, NOT run a phase your mode did not select just because its prose lives here. Example: `--owasp` runs Phase 9 from this section, not Phases 2-8/10/11.
### Phase 2: Secrets Archaeology
Scan git history for leaked credentials, check tracked `.env` files, find CI configs with inline secrets.
**Canonical pattern catalog.** The HIGH-tier credential prefixes the archaeology
greps below target (AKIA, ghp_, sk-ant-, sk_live_, xoxb-, `-----BEGIN ... PRIVATE
KEY-----`, etc.) are the same set `/spec`'s in-flight redaction blocks on. The full
3-tier taxonomy (HIGH credentials, MEDIUM PII/legal/internal, LOW) is generated from
and lives in `lib/redact-patterns.ts` — the single source of truth shared by the
`gstack-redact` engine, `/spec`, `/ship`, and the `/document-*` skills.
**Git history — known secret prefixes:**
```bash
git log -p --all -S "AKIA" --diff-filter=A -- "*.env" "*.yml" "*.yaml" "*.json" "*.toml" 2>/dev/null
git log -p --all -S "sk-" --diff-filter=A -- "*.env" "*.yml" "*.json" "*.ts" "*.js" "*.py" 2>/dev/null
git log -p --all -G "ghp_|gho_|github_pat_" 2>/dev/null
git log -p --all -G "xoxb-|xoxp-|xapp-" 2>/dev/null
git log -p --all -G "password|secret|token|api_key" -- "*.env" "*.yml" "*.json" "*.conf" 2>/dev/null
```
**.env files tracked by git:**
```bash
git ls-files '*.env' '.env.*' 2>/dev/null | grep -v '.example\|.sample\|.template'
grep -q "^\.env$\|^\.env\.\*" .gitignore 2>/dev/null && echo ".env IS gitignored" || echo "WARNING: .env NOT in .gitignore"
```
**CI configs with inline secrets (not using secret stores):**
```bash
for f in $(find .github/workflows -maxdepth 1 \( -name '*.yml' -o -name '*.yaml' \) 2>/dev/null) .gitlab-ci.yml .circleci/config.yml; do
[ -f "$f" ] && grep -n "password:\|token:\|secret:\|api_key:" "$f" | grep -v '\${{' | grep -v 'secrets\.'
done 2>/dev/null
```
**Severity:** CRITICAL for active secret patterns in git history (AKIA, sk_live_, ghp_, xoxb-). HIGH for .env tracked by git, CI configs with inline credentials. MEDIUM for suspicious .env.example values.
**FP rules:** Placeholders ("your_", "changeme", "TODO") excluded. Test fixtures excluded unless same value in non-test code. Rotated secrets still flagged (they were exposed). `.env.local` in `.gitignore` is expected.
**Diff mode:** Replace `git log -p --all` with `git log -p <base>..HEAD`.
### Phase 3: Dependency Supply Chain
Goes beyond `npm audit`. Checks actual supply chain risk.
**Package manager detection:**
```bash
[ -f package.json ] && echo "DETECTED: npm/yarn/bun"
[ -f Gemfile ] && echo "DETECTED: bundler"
[ -f requirements.txt ] || [ -f pyproject.toml ] && echo "DETECTED: pip"
[ -f Cargo.toml ] && echo "DETECTED: cargo"
[ -f go.mod ] && echo "DETECTED: go"
```
**Standard vulnerability scan:** Run whichever package manager's audit tool is available. Each tool is optional — if not installed, note it in the report as "SKIPPED — tool not installed" with install instructions. This is informational, NOT a finding. The audit continues with whatever tools ARE available.
**Install scripts in production deps (supply chain attack vector):** For Node.js projects with hydrated `node_modules`, check production dependencies for `preinstall`, `postinstall`, or `install` scripts.
**Lockfile integrity:** Check that lockfiles exist AND are tracked by git.
**Severity:** CRITICAL for known CVEs (high/critical) in direct deps. HIGH for install scripts in prod deps / missing lockfile. MEDIUM for abandoned packages / medium CVEs / lockfile not tracked.
**FP rules:** devDependency CVEs are MEDIUM max. `node-gyp`/`cmake` install scripts expected (MEDIUM not HIGH). No-fix-available advisories without known exploits excluded. Missing lockfile for library repos (not apps) is NOT a finding.
### Phase 4: CI/CD Pipeline Security
Check who can modify workflows and what secrets they can access.
**GitHub Actions analysis:** For each workflow file, check for:
- Unpinned third-party actions (not SHA-pinned) — use Grep for `uses:` lines missing `@[sha]`
- `pull_request_target` (dangerous: fork PRs get write access)
- Script injection via `${{ github.event.* }}` in `run:` steps
- Secrets as env vars (could leak in logs)
- CODEOWNERS protection on workflow files
**Severity:** CRITICAL for `pull_request_target` + checkout of PR code / script injection via `${{ github.event.*.body }}` in `run:` steps. HIGH for unpinned third-party actions / secrets as env vars without masking. MEDIUM for missing CODEOWNERS on workflow files.
**FP rules:** First-party `actions/*` unpinned = MEDIUM not HIGH. `pull_request_target` without PR ref checkout is safe (precedent #11). Secrets in `with:` blocks (not `env:`/`run:`) are handled by runtime.
### Phase 5: Infrastructure Shadow Surface
Find shadow infrastructure with excessive access.
**Dockerfiles:** For each Dockerfile, check for missing `USER` directive (runs as root), secrets passed as `ARG`, `.env` files copied into images, exposed ports.
**Config files with prod credentials:** Use Grep to search for database connection strings (postgres://, mysql://, mongodb://, redis://) in config files, excluding localhost/127.0.0.1/example.com. Check for staging/dev configs referencing prod.
**IaC security:** For Terraform files, check for `"*"` in IAM actions/resources, hardcoded secrets in `.tf`/`.tfvars`. For K8s manifests, check for privileged containers, hostNetwork, hostPID.
**Severity:** CRITICAL for prod DB URLs with credentials in committed config / `"*"` IAM on sensitive resources / secrets baked into Docker images. HIGH for root containers in prod / staging with prod DB access / privileged K8s. MEDIUM for missing USER directive / exposed ports without documented purpose.
**FP rules:** `docker-compose.yml` for local dev with localhost = not a finding (precedent #12). Terraform `"*"` in `data` sources (read-only) excluded. K8s manifests in `test/`/`dev/`/`local/` with localhost networking excluded.
### Phase 6: Webhook & Integration Audit
Find inbound endpoints that accept anything.
**Webhook routes:** Use Grep to find files containing webhook/hook/callback route patterns. For each file, check whether it also contains signature verification (signature, hmac, verify, digest, x-hub-signature, stripe-signature, svix). Files with webhook routes but NO signature verification are findings.
**TLS verification disabled:** Use Grep to search for patterns like `verify.*false`, `VERIFY_NONE`, `InsecureSkipVerify`, `NODE_TLS_REJECT_UNAUTHORIZED.*0`.
**OAuth scope analysis:** Use Grep to find OAuth configurations and check for overly broad scopes.
**Verification approach (code-tracing only — NO live requests):** For webhook findings, trace the handler code to determine if signature verification exists anywhere in the middleware chain (parent router, middleware stack, API gateway config). Do NOT make actual HTTP requests to webhook endpoints.
**Severity:** CRITICAL for webhooks without any signature verification. HIGH for TLS verification disabled in prod code / overly broad OAuth scopes. MEDIUM for undocumented outbound data flows to third parties.
**FP rules:** TLS disabled in test code excluded. Internal service-to-service webhooks on private networks = MEDIUM max. Webhook endpoints behind API gateway that handles signature verification upstream are NOT findings — but require evidence.
### Phase 7: LLM & AI Security
Check for AI/LLM-specific vulnerabilities. This is a new attack class.
Use Grep to search for these patterns:
- **Prompt injection vectors:** User input flowing into system prompts or tool schemas — look for string interpolation near system prompt construction
- **Unsanitized LLM output:** `dangerouslySetInnerHTML`, `v-html`, `innerHTML`, `.html()`, `raw()` rendering LLM responses
- **Tool/function calling without validation:** `tool_choice`, `function_call`, `tools=`, `functions=`
- **AI API keys in code (not env vars):** `sk-` patterns, hardcoded API key assignments
- **Eval/exec of LLM output:** `eval()`, `exec()`, `Function()`, `new Function` processing AI responses
**Key checks (beyond grep):**
- Trace user content flow — does it enter system prompts or tool schemas?
- RAG poisoning: can external documents influence AI behavior via retrieval?
- Tool calling permissions: are LLM tool calls validated before execution?
- Output sanitization: is LLM output treated as trusted (rendered as HTML, executed as code)?
- Cost/resource attacks: can a user trigger unbounded LLM calls?
**Severity:** CRITICAL for user input in system prompts / unsanitized LLM output rendered as HTML / eval of LLM output. HIGH for missing tool call validation / exposed AI API keys. MEDIUM for unbounded LLM calls / RAG without input validation.
**FP rules:** User content in the user-message position of an AI conversation is NOT prompt injection (precedent #13). Only flag when user content enters system prompts, tool schemas, or function-calling contexts.
### Phase 8: Skill Supply Chain
Scan installed Claude Code skills for malicious patterns. 36% of published skills have security flaws, 13.4% are outright malicious (Snyk ToxicSkills research).
**Tier 1 — repo-local (automatic):** Scan the repo's local skills directory for suspicious patterns:
```bash
ls -la .claude/skills/ 2>/dev/null
```
Use Grep to search all local skill SKILL.md files for suspicious patterns:
- `curl`, `wget`, `fetch`, `http`, `exfiltrat` (network exfiltration)
- `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `env.`, `process.env` (credential access)
- `IGNORE PREVIOUS`, `system override`, `disregard`, `forget your instructions` (prompt injection)
**Tier 2 — global skills (requires permission):** Before scanning globally installed skills or user settings, use AskUserQuestion:
"Phase 8 can scan your globally installed AI coding agent skills and hooks for malicious patterns. This reads files outside the repo. Want to include this?"
Options: A) Yes — scan global skills too B) No — repo-local only
If approved, run the same Grep patterns on globally installed skill files and check hooks in user settings.
**Severity:** CRITICAL for credential exfiltration attempts / prompt injection in skill files. HIGH for suspicious network calls / overly broad tool permissions. MEDIUM for skills from unverified sources without review.
**FP rules:** gstack's own skills are trusted (check if skill path resolves to a known repo). Skills that use `curl` for legitimate purposes (downloading tools, health checks) need context — only flag when the target URL is suspicious or when the command includes credential variables.
### Phase 9: OWASP Top 10 Assessment
For each OWASP category, perform targeted analysis. Use the Grep tool for all searches — scope file extensions to detected stacks from Phase 0.
#### A01: Broken Access Control
- Check for missing auth on controllers/routes (skip_before_action, skip_authorization, public, no_auth)
- Check for direct object reference patterns (params[:id], req.params.id, request.args.get)
- Can user A access user B's resources by changing IDs?
- Is there horizontal/vertical privilege escalation?
#### A02: Cryptographic Failures
- Weak crypto (MD5, SHA1, DES, ECB) or hardcoded secrets
- Is sensitive data encrypted at rest and in transit?
- Are keys/secrets properly managed (env vars, not hardcoded)?
#### A03: Injection
- SQL injection: raw queries, string interpolation in SQL
- Command injection: system(), exec(), spawn(), popen
- Template injection: render with params, eval(), html_safe, raw()
- LLM prompt injection: see Phase 7 for comprehensive coverage
#### A04: Insecure Design
- Rate limits on authentication endpoints?
- Account lockout after failed attempts?
- Business logic validated server-side?
#### A05: Security Misconfiguration
- CORS configuration (wildcard origins in production?)
- CSP headers present?
- Debug mode / verbose errors in production?
#### A06: Vulnerable and Outdated Components
See **Phase 3 (Dependency Supply Chain)** for comprehensive component analysis.
#### A07: Identification and Authentication Failures
- Session management: creation, storage, invalidation
- Password policy: complexity, rotation, breach checking
- MFA: available? enforced for admin?
- Token management: JWT expiration, refresh rotation
#### A08: Software and Data Integrity Failures
See **Phase 4 (CI/CD Pipeline Security)** for pipeline protection analysis.
- Deserialization inputs validated?
- Integrity checking on external data?
#### A09: Security Logging and Monitoring Failures
- Authentication events logged?
- Authorization failures logged?
- Admin actions audit-trailed?
- Logs protected from tampering?
#### A10: Server-Side Request Forgery (SSRF)
- URL construction from user input?
- Internal service reachability from user-controlled URLs?
- Allowlist/blocklist enforcement on outbound requests?
### Phase 10: STRIDE Threat Model
For each major component identified in Phase 0, evaluate:
```
COMPONENT: [Name]
Spoofing: Can an attacker impersonate a user/service?
Tampering: Can data be modified in transit/at rest?
Repudiation: Can actions be denied? Is there an audit trail?
Information Disclosure: Can sensitive data leak?
Denial of Service: Can the component be overwhelmed?
Elevation of Privilege: Can a user gain unauthorized access?
```
### Phase 11: Data Classification
Classify all data handled by the application:
```
DATA CLASSIFICATION
═══════════════════
RESTRICTED (breach = legal liability):
- Passwords/credentials: [where stored, how protected]
- Payment data: [where stored, PCI compliance status]
- PII: [what types, where stored, retention policy]
CONFIDENTIAL (breach = business damage):
- API keys: [where stored, rotation policy]
- Business logic: [trade secrets in code?]
- User behavior data: [analytics, tracking]
INTERNAL (breach = embarrassment):
- System logs: [what they contain, who can access]
- Configuration: [what's exposed in error messages]
PUBLIC:
- Marketing content, documentation, public APIs
```
+14
View File
@@ -0,0 +1,14 @@
{
"$schema": "https://gstack.dev/schemas/section-manifest.json",
"skill": "cso",
"version": 1,
"note": "PASSIVE registry (v2 plan T9 / CM2). id/file/title/trigger text ONLY. Mode dispatch (## Arguments, ## Mode Resolution), always-run phases (0,1), and FP-filtering exceptions (Phase 12) stay in the always-loaded skeleton; only the scope-dependent audit phases are on demand.",
"sections": [
{
"id": "audit-phases",
"file": "audit-phases.md",
"title": "Scope-dependent audit phases: secrets, dependencies, CI/CD, infra, webhooks, LLM/AI, skill supply chain, OWASP Top 10, STRIDE, data classification (Phases 2-11)",
"trigger": "running the scope-dependent audit phases (Phases 2-11) selected by the resolved mode, after the Phase 0 stack detection and Phase 1 attack-surface census"
}
]
}
+40 -410
View File
@@ -72,6 +72,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -151,7 +154,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -323,11 +326,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -407,7 +430,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
@@ -921,6 +944,17 @@ matches a past learning, display:
This makes the compounding visible. The user should see that gstack is getting
smarter on their codebase over time.
## Section index — Read each section when its situation applies
This skill is a decision-tree skeleton. The steps below point to on-demand
sections. Read a section in full before doing its step; do not work from memory.
| When | Read this section |
|------|-------------------|
| building the complete design-system proposal, drill-downs, the design preview, and writing DESIGN.md (Phases 3-6, after product context and research) | `sections/proposal-and-preview.md` |
---
## Phase 1: Product Context
Ask the user a single question that covers everything you need to know. Pre-fill what you can infer from the codebase.
@@ -1106,412 +1140,8 @@ Present subagent output under a `CLAUDE SUBAGENT (design direction):` header.
```
Replace STATUS with "clean" or "issues_found", SOURCE with "codex+subagent", "codex-only", "subagent-only", or "unavailable".
## Phase 3: The Complete Proposal
This is the soul of the skill. Propose EVERYTHING as one coherent package.
**AskUserQuestion Q2 — present the full proposal with SAFE/RISK breakdown:**
```
Based on [product context] and [research findings / my design knowledge]:
AESTHETIC: [direction] — [one-line rationale]
DECORATION: [level] — [why this pairs with the aesthetic]
LAYOUT: [approach] — [why this fits the product type]
COLOR: [approach] + proposed palette (hex values) — [rationale]
TYPOGRAPHY: [3 font recommendations with roles] — [why these fonts]
SPACING: [base unit + density] — [rationale]
MOTION: [approach] — [rationale]
This system is coherent because [explain how choices reinforce each other].
SAFE CHOICES (category baseline — your users expect these):
- [2-3 decisions that match category conventions, with rationale for playing safe]
RISKS (where your product gets its own face):
- [2-3 deliberate departures from convention]
- For each risk: what it is, why it works, what you gain, what it costs
The safe choices keep you literate in your category. The risks are where
your product becomes memorable. Which risks appeal to you? Want to see
different ones? Or adjust anything else?
```
The SAFE/RISK breakdown is critical. Design coherence is table stakes — every product in a category can be coherent and still look identical. The real question is: where do you take creative risks? The agent should always propose at least 2 risks, each with a clear rationale for why the risk is worth taking and what the user gives up. Risks might include: an unexpected typeface for the category, a bold accent color nobody else uses, tighter or looser spacing than the norm, a layout approach that breaks from convention, motion choices that add personality.
**Options:** A) Looks great — generate the preview page. B) I want to adjust [section]. C) I want different risks — show me wilder options. D) Start over with a different direction. E) Skip the preview, just write DESIGN.md.
### Your Design Knowledge (use to inform proposals — do NOT display as tables)
**Aesthetic directions** (pick the one that fits the product):
- Brutally Minimal — Type and whitespace only. No decoration. Modernist.
- Maximalist Chaos — Dense, layered, pattern-heavy. Y2K meets contemporary.
- Retro-Futuristic — Vintage tech nostalgia. CRT glow, pixel grids, warm monospace.
- Luxury/Refined — Serifs, high contrast, generous whitespace, precious metals.
- Playful/Toy-like — Rounded, bouncy, bold primaries. Approachable and fun.
- Editorial/Magazine — Strong typographic hierarchy, asymmetric grids, pull quotes.
- Brutalist/Raw — Exposed structure, system fonts, visible grid, no polish.
- Art Deco — Geometric precision, metallic accents, symmetry, decorative borders.
- Organic/Natural — Earth tones, rounded forms, hand-drawn texture, grain.
- Industrial/Utilitarian — Function-first, data-dense, monospace accents, muted palette.
**Decoration levels:** minimal (typography does all the work) / intentional (subtle texture, grain, or background treatment) / expressive (full creative direction, layered depth, patterns)
**Layout approaches:** grid-disciplined (strict columns, predictable alignment) / creative-editorial (asymmetry, overlap, grid-breaking) / hybrid (grid for app, creative for marketing)
**Color approaches:** restrained (1 accent + neutrals, color is rare and meaningful) / balanced (primary + secondary, semantic colors for hierarchy) / expressive (color as a primary design tool, bold palettes)
**Motion approaches:** minimal-functional (only transitions that aid comprehension) / intentional (subtle entrance animations, meaningful state transitions) / expressive (full choreography, scroll-driven, playful)
**Font recommendations by purpose:**
- Display/Hero: Satoshi, General Sans, Instrument Serif, Fraunces, Clash Grotesk, Cabinet Grotesk
- Body: Instrument Sans, DM Sans, Source Sans 3, Geist, Plus Jakarta Sans, Outfit
- Data/Tables: Geist (tabular-nums), DM Sans (tabular-nums), JetBrains Mono, IBM Plex Mono
- Code: JetBrains Mono, Fira Code, Berkeley Mono, Geist Mono
**Font blacklist** (never recommend):
Papyrus, Comic Sans, Lobster, Impact, Jokerman, Bleeding Cowboys, Permanent Marker, Bradley Hand, Brush Script, Hobo, Trajan, Raleway, Clash Display, Courier New (for body)
**Overused fonts** (never recommend as primary — use only if user specifically requests):
Inter, Roboto, Arial, Helvetica, Open Sans, Lato, Montserrat, Poppins, Space Grotesk.
Space Grotesk is on the list specifically because every AI design tool converges on it
as "the safe alternative to Inter." That's the convergence trap. Treat it the same as
Inter: only use if the user asks for it by name.
**Anti-convergence directive:** Across multiple generations in the same project, VARY
light/dark, fonts, and aesthetic directions. Never propose the same choices twice
without explicit justification. If the user's prior session used Geist + dark + editorial,
propose something different this time (or explicitly acknowledge you're doubling down
because it fits the brief). Convergence across generations is slop.
**AI slop anti-patterns** (never include in your recommendations):
- Purple/violet gradients as default accent
- 3-column feature grid with icons in colored circles
- Centered everything with uniform spacing
- Uniform bubbly border-radius on all elements
- Gradient buttons as the primary CTA pattern
- Generic stock-photo-style hero sections
- system-ui / -apple-system as the primary display or body font (the "I gave up on typography" signal)
- "Built for X" / "Designed for Y" marketing copy patterns
### Coherence Validation
When the user overrides one section, check if the rest still coheres. Flag mismatches with a gentle nudge — never block:
- Brutalist/Minimal aesthetic + expressive motion → "Heads up: brutalist aesthetics usually pair with minimal motion. Your combo is unusual — which is fine if intentional. Want me to suggest motion that fits, or keep it?"
- Expressive color + restrained decoration → "Bold palette with minimal decoration can work, but the colors will carry a lot of weight. Want me to suggest decoration that supports the palette?"
- Creative-editorial layout + data-heavy product → "Editorial layouts are gorgeous but can fight data density. Want me to show how a hybrid approach keeps both?"
- Always accept the user's final choice. Never refuse to proceed.
---
## Phase 4: Drill-downs (only if user requests adjustments)
When the user wants to change a specific section, go deep on that section:
- **Fonts:** Present 3-5 specific candidates with rationale, explain what each evokes, offer the preview page
- **Colors:** Present 2-3 palette options with hex values, explain the color theory reasoning
- **Aesthetic:** Walk through which directions fit their product and why
- **Layout/Spacing/Motion:** Present the approaches with concrete tradeoffs for their product type
Each drill-down is one focused AskUserQuestion. After the user decides, re-check coherence with the rest of the system.
---
## Phase 5: Design System Preview (default ON)
This phase generates visual previews of the proposed design system. Two paths depending on whether the gstack designer is available.
### Path A: AI Mockups (if DESIGN_READY)
Generate AI-rendered mockups showing the proposed design system applied to realistic screens for this product. This is far more powerful than an HTML preview — the user sees what their product could actually look like.
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)"
_DESIGN_DIR="$HOME/.gstack/projects/$SLUG/designs/design-system-$(date +%Y%m%d)"
mkdir -p "$_DESIGN_DIR"
echo "DESIGN_DIR: $_DESIGN_DIR"
```
Construct a design brief from the Phase 3 proposal (aesthetic, colors, typography, spacing, layout) and the product context from Phase 1:
```bash
$D variants --brief "<product name: [name]. Product type: [type]. Aesthetic: [direction]. Colors: primary [hex], secondary [hex], neutrals [range]. Typography: display [font], body [font]. Layout: [approach]. Show a realistic [page type] screen with [specific content for this product].>" --count 3 --output-dir "$_DESIGN_DIR/"
```
Run quality check on each variant:
```bash
$D check --image "$_DESIGN_DIR/variant-A.png" --brief "<the original brief>"
```
Show each variant inline (Read tool on each PNG) for instant preview.
**Before presenting to the user, self-gate:** For each variant, ask yourself: *"Would
a human designer be embarrassed to put their name on this?"* If yes, discard the
variant and regenerate. This is a hard gate. A mediocre AI mockup is worse than no
mockup. Embarrassment triggers include: purple gradient hero, 3-column SaaS grid,
centered-everything, Inter body text, generic stock-photo vibe, system-ui font,
gradient CTA button, bubble-radius everything. Any of those = reject and regenerate.
Tell the user: "I've generated 3 visual directions applying your design system to a realistic [product type] screen. Pick your favorite in the comparison board that just opened in your browser. You can also remix elements across variants."
### Comparison Board + Feedback Loop
Create the comparison board and serve it over HTTP:
```bash
$D compare --images "$_DESIGN_DIR/variant-A.png,$_DESIGN_DIR/variant-B.png,$_DESIGN_DIR/variant-C.png" --output "$_DESIGN_DIR/design-board.html" --serve
```
This command generates the board HTML, starts an HTTP server on a random port,
and opens it in the user's default browser. **Run it in the background** with `&`
because the server needs to stay running while the user interacts with the board.
Parse the board URL from stderr output. Default daemon path:
`BOARD_URL: http://127.0.0.1:N/boards/<id>/` (already includes the per-board
path; use this for the AskUserQuestion URL AND as the base for the reload
endpoint). Legacy `--no-daemon` path emits `SERVE_STARTED: port=XXXXX` and
serves a single board at `/`, with reload at `/api/reload` — only relevant
when an external caller explicitly passes `--no-daemon`.
**PRIMARY WAIT: AskUserQuestion with board URL**
After the board is serving, use AskUserQuestion to wait for the user. Include the
board URL so they can click it if they lost the browser tab:
"I've opened a comparison board with the design variants:
<BOARD_URL> — Rate them, leave comments, remix
elements you like, and click Submit when you're done. Let me know when you've
submitted your feedback (or paste your preferences here). If you clicked
Regenerate or Remix on the board, tell me and I'll generate new variants."
Substitute `<BOARD_URL>` with the URL parsed from stderr (the daemon path
emits `BOARD_URL: http://127.0.0.1:N/boards/<id>/`).
**Do NOT use AskUserQuestion to ask which variant the user prefers.** The comparison
board IS the chooser. AskUserQuestion is just the blocking wait mechanism.
**After the user responds to AskUserQuestion:**
Check for feedback files next to the board HTML:
- `$_DESIGN_DIR/feedback.json` — written when user clicks Submit (final choice)
- `$_DESIGN_DIR/feedback-pending.json` — written when user clicks Regenerate/Remix/More Like This
```bash
if [ -f "$_DESIGN_DIR/feedback.json" ]; then
echo "SUBMIT_RECEIVED"
cat "$_DESIGN_DIR/feedback.json"
elif [ -f "$_DESIGN_DIR/feedback-pending.json" ]; then
echo "REGENERATE_RECEIVED"
cat "$_DESIGN_DIR/feedback-pending.json"
rm "$_DESIGN_DIR/feedback-pending.json"
else
echo "NO_FEEDBACK_FILE"
fi
```
The feedback JSON has this shape:
```json
{
"preferred": "A",
"ratings": { "A": 4, "B": 3, "C": 2 },
"comments": { "A": "Love the spacing" },
"overall": "Go with A, bigger CTA",
"regenerated": false
}
```
**If `feedback.json` found:** The user clicked Submit on the board.
Read `preferred`, `ratings`, `comments`, `overall` from the JSON. Proceed with
the approved variant.
**If `feedback-pending.json` found:** The user clicked Regenerate/Remix on the board.
1. Read `regenerateAction` from the JSON (`"different"`, `"match"`, `"more_like_B"`,
`"remix"`, or custom text)
2. If `regenerateAction` is `"remix"`, read `remixSpec` (e.g. `{"layout":"A","colors":"B"}`)
3. Generate new variants with `$D iterate` or `$D variants` using updated brief
4. Create new board: `$D compare --images "..." --output "$_DESIGN_DIR/design-board.html"`
5. Reload the board in the user's browser (same tab) — the URL is per-board
under daemon mode, so use `<BOARD_URL>` (from the `BOARD_URL:` stderr
line) as the base:
`curl -s -X POST "${BOARD_URL}api/reload" -H 'Content-Type: application/json' -d '{"html":"$_DESIGN_DIR/design-board.html"}'`
Under `--no-daemon` the reload endpoint is `/api/reload` at the legacy
port; this path only matters if the caller explicitly opted out of the
daemon.
6. The board auto-refreshes. **AskUserQuestion again** with the same board URL to
wait for the next round of feedback. Repeat until `feedback.json` appears.
**If `NO_FEEDBACK_FILE`:** The user typed their preferences directly in the
AskUserQuestion response instead of using the board. Use their text response
as the feedback.
**POLLING FALLBACK:** Only use polling if `$D serve` fails (no port available).
In that case, show each variant inline using the Read tool (so the user can see them),
then use AskUserQuestion:
"The comparison board server failed to start. I've shown the variants above.
Which do you prefer? Any feedback?"
**After receiving feedback (any path):** Output a clear summary confirming
what was understood:
"Here's what I understood from your feedback:
PREFERRED: Variant [X]
RATINGS: [list]
YOUR NOTES: [comments]
DIRECTION: [overall]
Is this right?"
Use AskUserQuestion to verify before proceeding.
**Save the approved choice:**
```bash
echo '{"approved_variant":"<V>","feedback":"<FB>","date":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","screen":"<SCREEN>","branch":"'$(git branch --show-current 2>/dev/null)'"}' > "$_DESIGN_DIR/approved.json"
```
After the user picks a direction:
- Use `$D extract --image "$_DESIGN_DIR/variant-<CHOSEN>.png"` to analyze the approved mockup and extract design tokens (colors, typography, spacing) that will populate DESIGN.md in Phase 6. This grounds the design system in what was actually approved visually, not just what was described in text.
- If the user wants to iterate further: `$D iterate --feedback "<user's feedback>" --output "$_DESIGN_DIR/refined.png"`
**Plan mode vs. implementation mode:**
- **If in plan mode:** Add the approved mockup path (the full `$_DESIGN_DIR` path) and extracted tokens to the plan file under an "## Approved Design Direction" section. The design system gets written to DESIGN.md when the plan is implemented.
- **If NOT in plan mode:** Proceed directly to Phase 6 and write DESIGN.md with the extracted tokens.
### Path B: HTML Preview Page (fallback if DESIGN_NOT_AVAILABLE)
Generate a polished HTML preview page and open it in the user's browser. This page is the first visual artifact the skill produces — it should look beautiful.
```bash
PREVIEW_FILE="/tmp/design-consultation-preview-$(date +%s).html"
```
Write the preview HTML to `$PREVIEW_FILE`, then open it:
```bash
open "$PREVIEW_FILE"
```
### Preview Page Requirements (Path B only)
The agent writes a **single, self-contained HTML file** (no framework dependencies) that:
1. **Loads proposed fonts** from Google Fonts (or Bunny Fonts) via `<link>` tags
2. **Uses the proposed color palette** throughout — dogfood the design system
3. **Shows the product name** (not "Lorem Ipsum") as the hero heading
4. **Font specimen section:**
- Each font candidate shown in its proposed role (hero heading, body paragraph, button label, data table row)
- Side-by-side comparison if multiple candidates for one role
- Real content that matches the product (e.g., civic tech → government data examples)
5. **Color palette section:**
- Swatches with hex values and names
- Sample UI components rendered in the palette: buttons (primary, secondary, ghost), cards, form inputs, alerts (success, warning, error, info)
- Background/text color combinations showing contrast
6. **Realistic product mockups** — this is what makes the preview page powerful. Based on the project type from Phase 1, render 2-3 realistic page layouts using the full design system:
- **Dashboard / web app:** sample data table with metrics, sidebar nav, header with user avatar, stat cards
- **Marketing site:** hero section with real copy, feature highlights, testimonial block, CTA
- **Settings / admin:** form with labeled inputs, toggle switches, dropdowns, save button
- **Auth / onboarding:** login form with social buttons, branding, input validation states
- Use the product name, realistic content for the domain, and the proposed spacing/layout/border-radius. The user should see their product (roughly) before writing any code.
7. **Light/dark mode toggle** using CSS custom properties and a JS toggle button
8. **Clean, professional layout** — the preview page IS a taste signal for the skill
9. **Responsive** — looks good on any screen width
The page should make the user think "oh nice, they thought of this." It's selling the design system by showing what the product could feel like, not just listing hex codes and font names.
If `open` fails (headless environment), tell the user: *"I wrote the preview to [path] — open it in your browser to see the fonts and colors rendered."*
If the user says skip the preview, go directly to Phase 6.
---
## Phase 6: Write DESIGN.md & Confirm
If `$D extract` was used in Phase 5 (Path A), use the extracted tokens as the primary source for DESIGN.md values — colors, typography, and spacing grounded in the approved mockup rather than text descriptions alone. Merge extracted tokens with the Phase 3 proposal (the proposal provides rationale and context; the extraction provides exact values).
**If in plan mode:** Write the DESIGN.md content into the plan file as a "## Proposed DESIGN.md" section. Do NOT write the actual file — that happens at implementation time.
**If NOT in plan mode:** Write `DESIGN.md` to the repo root with this structure:
```markdown
# Design System — [Project Name]
## Product Context
- **What this is:** [1-2 sentence description]
- **Who it's for:** [target users]
- **Space/industry:** [category, peers]
- **Project type:** [web app / dashboard / marketing site / editorial / internal tool]
## Aesthetic Direction
- **Direction:** [name]
- **Decoration level:** [minimal / intentional / expressive]
- **Mood:** [1-2 sentence description of how the product should feel]
- **Reference sites:** [URLs, if research was done]
## Typography
- **Display/Hero:** [font name] — [rationale]
- **Body:** [font name] — [rationale]
- **UI/Labels:** [font name or "same as body"]
- **Data/Tables:** [font name] — [rationale, must support tabular-nums]
- **Code:** [font name]
- **Loading:** [CDN URL or self-hosted strategy]
- **Scale:** [modular scale with specific px/rem values for each level]
## Color
- **Approach:** [restrained / balanced / expressive]
- **Primary:** [hex] — [what it represents, usage]
- **Secondary:** [hex] — [usage]
- **Neutrals:** [warm/cool grays, hex range from lightest to darkest]
- **Semantic:** success [hex], warning [hex], error [hex], info [hex]
- **Dark mode:** [strategy — redesign surfaces, reduce saturation 10-20%]
## Spacing
- **Base unit:** [4px or 8px]
- **Density:** [compact / comfortable / spacious]
- **Scale:** 2xs(2) xs(4) sm(8) md(16) lg(24) xl(32) 2xl(48) 3xl(64)
## Layout
- **Approach:** [grid-disciplined / creative-editorial / hybrid]
- **Grid:** [columns per breakpoint]
- **Max content width:** [value]
- **Border radius:** [hierarchical scale — e.g., sm:4px, md:8px, lg:12px, full:9999px]
## Motion
- **Approach:** [minimal-functional / intentional / expressive]
- **Easing:** enter(ease-out) exit(ease-in) move(ease-in-out)
- **Duration:** micro(50-100ms) short(150-250ms) medium(250-400ms) long(400-700ms)
## Decisions Log
| Date | Decision | Rationale |
|------|----------|-----------|
| [today] | Initial design system created | Created by /design-consultation based on [product context / research] |
```
**Update CLAUDE.md** (or create it if it doesn't exist) — append this section:
```markdown
## Design System
Always read DESIGN.md before making any visual or UI decisions.
All font choices, colors, spacing, and aesthetic direction are defined there.
Do not deviate without explicit user approval.
In QA mode, flag any code that doesn't match DESIGN.md.
```
**AskUserQuestion Q-final — show summary and confirm:**
List all decisions. Flag any that used agent defaults without explicit user confirmation (the user should know what they're shipping). Options:
- A) Ship it — write DESIGN.md and CLAUDE.md
- B) I want to change something (specify what)
- C) Start over
After shipping DESIGN.md, if the session produced screen-level mockups or page layouts
(not just system-level tokens), suggest:
"Want to see this design system as working Pretext-native HTML? Run /design-html."
---
> **STOP.** Before building the complete design-system proposal, drill-downs, the design preview, and writing DESIGN.md (Phases 3-6, after product context and research), Read `~/.claude/skills/gstack/design-consultation/sections/proposal-and-preview.md` and execute it
> in full. Do not work from memory — that section is the source of truth for this step.
## Capture Learnings
If you discovered a non-obvious pattern, pitfall, or architectural insight during
+5 -294
View File
@@ -110,6 +110,10 @@ If `DESIGN_NOT_AVAILABLE`: Phase 5 falls back to the HTML preview page (still go
{{LEARNINGS_SEARCH}}
{{SECTION_INDEX:design-consultation}}
---
## Phase 1: Product Context
Ask the user a single question that covers everything you need to know. Pre-fill what you can infer from the codebase.
@@ -193,300 +197,7 @@ If the user said no research, skip entirely and proceed to Phase 3 using your bu
{{DESIGN_OUTSIDE_VOICES}}
## Phase 3: The Complete Proposal
This is the soul of the skill. Propose EVERYTHING as one coherent package.
**AskUserQuestion Q2 — present the full proposal with SAFE/RISK breakdown:**
```
Based on [product context] and [research findings / my design knowledge]:
AESTHETIC: [direction] — [one-line rationale]
DECORATION: [level] — [why this pairs with the aesthetic]
LAYOUT: [approach] — [why this fits the product type]
COLOR: [approach] + proposed palette (hex values) — [rationale]
TYPOGRAPHY: [3 font recommendations with roles] — [why these fonts]
SPACING: [base unit + density] — [rationale]
MOTION: [approach] — [rationale]
This system is coherent because [explain how choices reinforce each other].
SAFE CHOICES (category baseline — your users expect these):
- [2-3 decisions that match category conventions, with rationale for playing safe]
RISKS (where your product gets its own face):
- [2-3 deliberate departures from convention]
- For each risk: what it is, why it works, what you gain, what it costs
The safe choices keep you literate in your category. The risks are where
your product becomes memorable. Which risks appeal to you? Want to see
different ones? Or adjust anything else?
```
The SAFE/RISK breakdown is critical. Design coherence is table stakes — every product in a category can be coherent and still look identical. The real question is: where do you take creative risks? The agent should always propose at least 2 risks, each with a clear rationale for why the risk is worth taking and what the user gives up. Risks might include: an unexpected typeface for the category, a bold accent color nobody else uses, tighter or looser spacing than the norm, a layout approach that breaks from convention, motion choices that add personality.
**Options:** A) Looks great — generate the preview page. B) I want to adjust [section]. C) I want different risks — show me wilder options. D) Start over with a different direction. E) Skip the preview, just write DESIGN.md.
### Your Design Knowledge (use to inform proposals — do NOT display as tables)
**Aesthetic directions** (pick the one that fits the product):
- Brutally Minimal — Type and whitespace only. No decoration. Modernist.
- Maximalist Chaos — Dense, layered, pattern-heavy. Y2K meets contemporary.
- Retro-Futuristic — Vintage tech nostalgia. CRT glow, pixel grids, warm monospace.
- Luxury/Refined — Serifs, high contrast, generous whitespace, precious metals.
- Playful/Toy-like — Rounded, bouncy, bold primaries. Approachable and fun.
- Editorial/Magazine — Strong typographic hierarchy, asymmetric grids, pull quotes.
- Brutalist/Raw — Exposed structure, system fonts, visible grid, no polish.
- Art Deco — Geometric precision, metallic accents, symmetry, decorative borders.
- Organic/Natural — Earth tones, rounded forms, hand-drawn texture, grain.
- Industrial/Utilitarian — Function-first, data-dense, monospace accents, muted palette.
**Decoration levels:** minimal (typography does all the work) / intentional (subtle texture, grain, or background treatment) / expressive (full creative direction, layered depth, patterns)
**Layout approaches:** grid-disciplined (strict columns, predictable alignment) / creative-editorial (asymmetry, overlap, grid-breaking) / hybrid (grid for app, creative for marketing)
**Color approaches:** restrained (1 accent + neutrals, color is rare and meaningful) / balanced (primary + secondary, semantic colors for hierarchy) / expressive (color as a primary design tool, bold palettes)
**Motion approaches:** minimal-functional (only transitions that aid comprehension) / intentional (subtle entrance animations, meaningful state transitions) / expressive (full choreography, scroll-driven, playful)
**Font recommendations by purpose:**
- Display/Hero: Satoshi, General Sans, Instrument Serif, Fraunces, Clash Grotesk, Cabinet Grotesk
- Body: Instrument Sans, DM Sans, Source Sans 3, Geist, Plus Jakarta Sans, Outfit
- Data/Tables: Geist (tabular-nums), DM Sans (tabular-nums), JetBrains Mono, IBM Plex Mono
- Code: JetBrains Mono, Fira Code, Berkeley Mono, Geist Mono
**Font blacklist** (never recommend):
Papyrus, Comic Sans, Lobster, Impact, Jokerman, Bleeding Cowboys, Permanent Marker, Bradley Hand, Brush Script, Hobo, Trajan, Raleway, Clash Display, Courier New (for body)
**Overused fonts** (never recommend as primary — use only if user specifically requests):
Inter, Roboto, Arial, Helvetica, Open Sans, Lato, Montserrat, Poppins, Space Grotesk.
Space Grotesk is on the list specifically because every AI design tool converges on it
as "the safe alternative to Inter." That's the convergence trap. Treat it the same as
Inter: only use if the user asks for it by name.
**Anti-convergence directive:** Across multiple generations in the same project, VARY
light/dark, fonts, and aesthetic directions. Never propose the same choices twice
without explicit justification. If the user's prior session used Geist + dark + editorial,
propose something different this time (or explicitly acknowledge you're doubling down
because it fits the brief). Convergence across generations is slop.
**AI slop anti-patterns** (never include in your recommendations):
- Purple/violet gradients as default accent
- 3-column feature grid with icons in colored circles
- Centered everything with uniform spacing
- Uniform bubbly border-radius on all elements
- Gradient buttons as the primary CTA pattern
- Generic stock-photo-style hero sections
- system-ui / -apple-system as the primary display or body font (the "I gave up on typography" signal)
- "Built for X" / "Designed for Y" marketing copy patterns
### Coherence Validation
When the user overrides one section, check if the rest still coheres. Flag mismatches with a gentle nudge — never block:
- Brutalist/Minimal aesthetic + expressive motion → "Heads up: brutalist aesthetics usually pair with minimal motion. Your combo is unusual — which is fine if intentional. Want me to suggest motion that fits, or keep it?"
- Expressive color + restrained decoration → "Bold palette with minimal decoration can work, but the colors will carry a lot of weight. Want me to suggest decoration that supports the palette?"
- Creative-editorial layout + data-heavy product → "Editorial layouts are gorgeous but can fight data density. Want me to show how a hybrid approach keeps both?"
- Always accept the user's final choice. Never refuse to proceed.
---
## Phase 4: Drill-downs (only if user requests adjustments)
When the user wants to change a specific section, go deep on that section:
- **Fonts:** Present 3-5 specific candidates with rationale, explain what each evokes, offer the preview page
- **Colors:** Present 2-3 palette options with hex values, explain the color theory reasoning
- **Aesthetic:** Walk through which directions fit their product and why
- **Layout/Spacing/Motion:** Present the approaches with concrete tradeoffs for their product type
Each drill-down is one focused AskUserQuestion. After the user decides, re-check coherence with the rest of the system.
---
## Phase 5: Design System Preview (default ON)
This phase generates visual previews of the proposed design system. Two paths depending on whether the gstack designer is available.
### Path A: AI Mockups (if DESIGN_READY)
Generate AI-rendered mockups showing the proposed design system applied to realistic screens for this product. This is far more powerful than an HTML preview — the user sees what their product could actually look like.
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)"
_DESIGN_DIR="$HOME/.gstack/projects/$SLUG/designs/design-system-$(date +%Y%m%d)"
mkdir -p "$_DESIGN_DIR"
echo "DESIGN_DIR: $_DESIGN_DIR"
```
Construct a design brief from the Phase 3 proposal (aesthetic, colors, typography, spacing, layout) and the product context from Phase 1:
```bash
$D variants --brief "<product name: [name]. Product type: [type]. Aesthetic: [direction]. Colors: primary [hex], secondary [hex], neutrals [range]. Typography: display [font], body [font]. Layout: [approach]. Show a realistic [page type] screen with [specific content for this product].>" --count 3 --output-dir "$_DESIGN_DIR/"
```
Run quality check on each variant:
```bash
$D check --image "$_DESIGN_DIR/variant-A.png" --brief "<the original brief>"
```
Show each variant inline (Read tool on each PNG) for instant preview.
**Before presenting to the user, self-gate:** For each variant, ask yourself: *"Would
a human designer be embarrassed to put their name on this?"* If yes, discard the
variant and regenerate. This is a hard gate. A mediocre AI mockup is worse than no
mockup. Embarrassment triggers include: purple gradient hero, 3-column SaaS grid,
centered-everything, Inter body text, generic stock-photo vibe, system-ui font,
gradient CTA button, bubble-radius everything. Any of those = reject and regenerate.
Tell the user: "I've generated 3 visual directions applying your design system to a realistic [product type] screen. Pick your favorite in the comparison board that just opened in your browser. You can also remix elements across variants."
{{DESIGN_SHOTGUN_LOOP}}
After the user picks a direction:
- Use `$D extract --image "$_DESIGN_DIR/variant-<CHOSEN>.png"` to analyze the approved mockup and extract design tokens (colors, typography, spacing) that will populate DESIGN.md in Phase 6. This grounds the design system in what was actually approved visually, not just what was described in text.
- If the user wants to iterate further: `$D iterate --feedback "<user's feedback>" --output "$_DESIGN_DIR/refined.png"`
**Plan mode vs. implementation mode:**
- **If in plan mode:** Add the approved mockup path (the full `$_DESIGN_DIR` path) and extracted tokens to the plan file under an "## Approved Design Direction" section. The design system gets written to DESIGN.md when the plan is implemented.
- **If NOT in plan mode:** Proceed directly to Phase 6 and write DESIGN.md with the extracted tokens.
### Path B: HTML Preview Page (fallback if DESIGN_NOT_AVAILABLE)
Generate a polished HTML preview page and open it in the user's browser. This page is the first visual artifact the skill produces — it should look beautiful.
```bash
PREVIEW_FILE="/tmp/design-consultation-preview-$(date +%s).html"
```
Write the preview HTML to `$PREVIEW_FILE`, then open it:
```bash
open "$PREVIEW_FILE"
```
### Preview Page Requirements (Path B only)
The agent writes a **single, self-contained HTML file** (no framework dependencies) that:
1. **Loads proposed fonts** from Google Fonts (or Bunny Fonts) via `<link>` tags
2. **Uses the proposed color palette** throughout — dogfood the design system
3. **Shows the product name** (not "Lorem Ipsum") as the hero heading
4. **Font specimen section:**
- Each font candidate shown in its proposed role (hero heading, body paragraph, button label, data table row)
- Side-by-side comparison if multiple candidates for one role
- Real content that matches the product (e.g., civic tech → government data examples)
5. **Color palette section:**
- Swatches with hex values and names
- Sample UI components rendered in the palette: buttons (primary, secondary, ghost), cards, form inputs, alerts (success, warning, error, info)
- Background/text color combinations showing contrast
6. **Realistic product mockups** — this is what makes the preview page powerful. Based on the project type from Phase 1, render 2-3 realistic page layouts using the full design system:
- **Dashboard / web app:** sample data table with metrics, sidebar nav, header with user avatar, stat cards
- **Marketing site:** hero section with real copy, feature highlights, testimonial block, CTA
- **Settings / admin:** form with labeled inputs, toggle switches, dropdowns, save button
- **Auth / onboarding:** login form with social buttons, branding, input validation states
- Use the product name, realistic content for the domain, and the proposed spacing/layout/border-radius. The user should see their product (roughly) before writing any code.
7. **Light/dark mode toggle** using CSS custom properties and a JS toggle button
8. **Clean, professional layout** — the preview page IS a taste signal for the skill
9. **Responsive** — looks good on any screen width
The page should make the user think "oh nice, they thought of this." It's selling the design system by showing what the product could feel like, not just listing hex codes and font names.
If `open` fails (headless environment), tell the user: *"I wrote the preview to [path] — open it in your browser to see the fonts and colors rendered."*
If the user says skip the preview, go directly to Phase 6.
---
## Phase 6: Write DESIGN.md & Confirm
If `$D extract` was used in Phase 5 (Path A), use the extracted tokens as the primary source for DESIGN.md values — colors, typography, and spacing grounded in the approved mockup rather than text descriptions alone. Merge extracted tokens with the Phase 3 proposal (the proposal provides rationale and context; the extraction provides exact values).
**If in plan mode:** Write the DESIGN.md content into the plan file as a "## Proposed DESIGN.md" section. Do NOT write the actual file — that happens at implementation time.
**If NOT in plan mode:** Write `DESIGN.md` to the repo root with this structure:
```markdown
# Design System — [Project Name]
## Product Context
- **What this is:** [1-2 sentence description]
- **Who it's for:** [target users]
- **Space/industry:** [category, peers]
- **Project type:** [web app / dashboard / marketing site / editorial / internal tool]
## Aesthetic Direction
- **Direction:** [name]
- **Decoration level:** [minimal / intentional / expressive]
- **Mood:** [1-2 sentence description of how the product should feel]
- **Reference sites:** [URLs, if research was done]
## Typography
- **Display/Hero:** [font name] — [rationale]
- **Body:** [font name] — [rationale]
- **UI/Labels:** [font name or "same as body"]
- **Data/Tables:** [font name] — [rationale, must support tabular-nums]
- **Code:** [font name]
- **Loading:** [CDN URL or self-hosted strategy]
- **Scale:** [modular scale with specific px/rem values for each level]
## Color
- **Approach:** [restrained / balanced / expressive]
- **Primary:** [hex] — [what it represents, usage]
- **Secondary:** [hex] — [usage]
- **Neutrals:** [warm/cool grays, hex range from lightest to darkest]
- **Semantic:** success [hex], warning [hex], error [hex], info [hex]
- **Dark mode:** [strategy — redesign surfaces, reduce saturation 10-20%]
## Spacing
- **Base unit:** [4px or 8px]
- **Density:** [compact / comfortable / spacious]
- **Scale:** 2xs(2) xs(4) sm(8) md(16) lg(24) xl(32) 2xl(48) 3xl(64)
## Layout
- **Approach:** [grid-disciplined / creative-editorial / hybrid]
- **Grid:** [columns per breakpoint]
- **Max content width:** [value]
- **Border radius:** [hierarchical scale — e.g., sm:4px, md:8px, lg:12px, full:9999px]
## Motion
- **Approach:** [minimal-functional / intentional / expressive]
- **Easing:** enter(ease-out) exit(ease-in) move(ease-in-out)
- **Duration:** micro(50-100ms) short(150-250ms) medium(250-400ms) long(400-700ms)
## Decisions Log
| Date | Decision | Rationale |
|------|----------|-----------|
| [today] | Initial design system created | Created by /design-consultation based on [product context / research] |
```
**Update CLAUDE.md** (or create it if it doesn't exist) — append this section:
```markdown
## Design System
Always read DESIGN.md before making any visual or UI decisions.
All font choices, colors, spacing, and aesthetic direction are defined there.
Do not deviate without explicit user approval.
In QA mode, flag any code that doesn't match DESIGN.md.
```
**AskUserQuestion Q-final — show summary and confirm:**
List all decisions. Flag any that used agent defaults without explicit user confirmation (the user should know what they're shipping). Options:
- A) Ship it — write DESIGN.md and CLAUDE.md
- B) I want to change something (specify what)
- C) Start over
After shipping DESIGN.md, if the session produced screen-level mockups or page layouts
(not just system-level tokens), suggest:
"Want to see this design system as working Pretext-native HTML? Run /design-html."
---
{{SECTION:proposal-and-preview}}
{{LEARNINGS_LOG}}
{{GBRAIN_SAVE_RESULTS}}
@@ -0,0 +1,14 @@
{
"$schema": "https://gstack.dev/schemas/section-manifest.json",
"skill": "design-consultation",
"version": 1,
"note": "PASSIVE registry (v2 plan T9 / CM2). id/file/title/trigger text ONLY. The skeleton's decision-tree prose decides WHEN to read.",
"sections": [
{
"id": "proposal-and-preview",
"file": "proposal-and-preview.md",
"title": "The complete design-system proposal, drill-downs, design preview (AI mockups / HTML), and writing DESIGN.md (Phases 3-6)",
"trigger": "building the complete design-system proposal, drill-downs, the design preview, and writing DESIGN.md (Phases 3-6, after product context and research)"
}
]
}
@@ -0,0 +1,408 @@
<!-- AUTO-GENERATED from proposal-and-preview.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
## Phase 3: The Complete Proposal
This is the soul of the skill. Propose EVERYTHING as one coherent package.
**AskUserQuestion Q2 — present the full proposal with SAFE/RISK breakdown:**
```
Based on [product context] and [research findings / my design knowledge]:
AESTHETIC: [direction] — [one-line rationale]
DECORATION: [level] — [why this pairs with the aesthetic]
LAYOUT: [approach] — [why this fits the product type]
COLOR: [approach] + proposed palette (hex values) — [rationale]
TYPOGRAPHY: [3 font recommendations with roles] — [why these fonts]
SPACING: [base unit + density] — [rationale]
MOTION: [approach] — [rationale]
This system is coherent because [explain how choices reinforce each other].
SAFE CHOICES (category baseline — your users expect these):
- [2-3 decisions that match category conventions, with rationale for playing safe]
RISKS (where your product gets its own face):
- [2-3 deliberate departures from convention]
- For each risk: what it is, why it works, what you gain, what it costs
The safe choices keep you literate in your category. The risks are where
your product becomes memorable. Which risks appeal to you? Want to see
different ones? Or adjust anything else?
```
The SAFE/RISK breakdown is critical. Design coherence is table stakes — every product in a category can be coherent and still look identical. The real question is: where do you take creative risks? The agent should always propose at least 2 risks, each with a clear rationale for why the risk is worth taking and what the user gives up. Risks might include: an unexpected typeface for the category, a bold accent color nobody else uses, tighter or looser spacing than the norm, a layout approach that breaks from convention, motion choices that add personality.
**Options:** A) Looks great — generate the preview page. B) I want to adjust [section]. C) I want different risks — show me wilder options. D) Start over with a different direction. E) Skip the preview, just write DESIGN.md.
### Your Design Knowledge (use to inform proposals — do NOT display as tables)
**Aesthetic directions** (pick the one that fits the product):
- Brutally Minimal — Type and whitespace only. No decoration. Modernist.
- Maximalist Chaos — Dense, layered, pattern-heavy. Y2K meets contemporary.
- Retro-Futuristic — Vintage tech nostalgia. CRT glow, pixel grids, warm monospace.
- Luxury/Refined — Serifs, high contrast, generous whitespace, precious metals.
- Playful/Toy-like — Rounded, bouncy, bold primaries. Approachable and fun.
- Editorial/Magazine — Strong typographic hierarchy, asymmetric grids, pull quotes.
- Brutalist/Raw — Exposed structure, system fonts, visible grid, no polish.
- Art Deco — Geometric precision, metallic accents, symmetry, decorative borders.
- Organic/Natural — Earth tones, rounded forms, hand-drawn texture, grain.
- Industrial/Utilitarian — Function-first, data-dense, monospace accents, muted palette.
**Decoration levels:** minimal (typography does all the work) / intentional (subtle texture, grain, or background treatment) / expressive (full creative direction, layered depth, patterns)
**Layout approaches:** grid-disciplined (strict columns, predictable alignment) / creative-editorial (asymmetry, overlap, grid-breaking) / hybrid (grid for app, creative for marketing)
**Color approaches:** restrained (1 accent + neutrals, color is rare and meaningful) / balanced (primary + secondary, semantic colors for hierarchy) / expressive (color as a primary design tool, bold palettes)
**Motion approaches:** minimal-functional (only transitions that aid comprehension) / intentional (subtle entrance animations, meaningful state transitions) / expressive (full choreography, scroll-driven, playful)
**Font recommendations by purpose:**
- Display/Hero: Satoshi, General Sans, Instrument Serif, Fraunces, Clash Grotesk, Cabinet Grotesk
- Body: Instrument Sans, DM Sans, Source Sans 3, Geist, Plus Jakarta Sans, Outfit
- Data/Tables: Geist (tabular-nums), DM Sans (tabular-nums), JetBrains Mono, IBM Plex Mono
- Code: JetBrains Mono, Fira Code, Berkeley Mono, Geist Mono
**Font blacklist** (never recommend):
Papyrus, Comic Sans, Lobster, Impact, Jokerman, Bleeding Cowboys, Permanent Marker, Bradley Hand, Brush Script, Hobo, Trajan, Raleway, Clash Display, Courier New (for body)
**Overused fonts** (never recommend as primary — use only if user specifically requests):
Inter, Roboto, Arial, Helvetica, Open Sans, Lato, Montserrat, Poppins, Space Grotesk.
Space Grotesk is on the list specifically because every AI design tool converges on it
as "the safe alternative to Inter." That's the convergence trap. Treat it the same as
Inter: only use if the user asks for it by name.
**Anti-convergence directive:** Across multiple generations in the same project, VARY
light/dark, fonts, and aesthetic directions. Never propose the same choices twice
without explicit justification. If the user's prior session used Geist + dark + editorial,
propose something different this time (or explicitly acknowledge you're doubling down
because it fits the brief). Convergence across generations is slop.
**AI slop anti-patterns** (never include in your recommendations):
- Purple/violet gradients as default accent
- 3-column feature grid with icons in colored circles
- Centered everything with uniform spacing
- Uniform bubbly border-radius on all elements
- Gradient buttons as the primary CTA pattern
- Generic stock-photo-style hero sections
- system-ui / -apple-system as the primary display or body font (the "I gave up on typography" signal)
- "Built for X" / "Designed for Y" marketing copy patterns
### Coherence Validation
When the user overrides one section, check if the rest still coheres. Flag mismatches with a gentle nudge — never block:
- Brutalist/Minimal aesthetic + expressive motion → "Heads up: brutalist aesthetics usually pair with minimal motion. Your combo is unusual — which is fine if intentional. Want me to suggest motion that fits, or keep it?"
- Expressive color + restrained decoration → "Bold palette with minimal decoration can work, but the colors will carry a lot of weight. Want me to suggest decoration that supports the palette?"
- Creative-editorial layout + data-heavy product → "Editorial layouts are gorgeous but can fight data density. Want me to show how a hybrid approach keeps both?"
- Always accept the user's final choice. Never refuse to proceed.
---
## Phase 4: Drill-downs (only if user requests adjustments)
When the user wants to change a specific section, go deep on that section:
- **Fonts:** Present 3-5 specific candidates with rationale, explain what each evokes, offer the preview page
- **Colors:** Present 2-3 palette options with hex values, explain the color theory reasoning
- **Aesthetic:** Walk through which directions fit their product and why
- **Layout/Spacing/Motion:** Present the approaches with concrete tradeoffs for their product type
Each drill-down is one focused AskUserQuestion. After the user decides, re-check coherence with the rest of the system.
---
## Phase 5: Design System Preview (default ON)
This phase generates visual previews of the proposed design system. Two paths depending on whether the gstack designer is available.
### Path A: AI Mockups (if DESIGN_READY)
Generate AI-rendered mockups showing the proposed design system applied to realistic screens for this product. This is far more powerful than an HTML preview — the user sees what their product could actually look like.
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)"
_DESIGN_DIR="$HOME/.gstack/projects/$SLUG/designs/design-system-$(date +%Y%m%d)"
mkdir -p "$_DESIGN_DIR"
echo "DESIGN_DIR: $_DESIGN_DIR"
```
Construct a design brief from the Phase 3 proposal (aesthetic, colors, typography, spacing, layout) and the product context from Phase 1:
```bash
$D variants --brief "<product name: [name]. Product type: [type]. Aesthetic: [direction]. Colors: primary [hex], secondary [hex], neutrals [range]. Typography: display [font], body [font]. Layout: [approach]. Show a realistic [page type] screen with [specific content for this product].>" --count 3 --output-dir "$_DESIGN_DIR/"
```
Run quality check on each variant:
```bash
$D check --image "$_DESIGN_DIR/variant-A.png" --brief "<the original brief>"
```
Show each variant inline (Read tool on each PNG) for instant preview.
**Before presenting to the user, self-gate:** For each variant, ask yourself: *"Would
a human designer be embarrassed to put their name on this?"* If yes, discard the
variant and regenerate. This is a hard gate. A mediocre AI mockup is worse than no
mockup. Embarrassment triggers include: purple gradient hero, 3-column SaaS grid,
centered-everything, Inter body text, generic stock-photo vibe, system-ui font,
gradient CTA button, bubble-radius everything. Any of those = reject and regenerate.
Tell the user: "I've generated 3 visual directions applying your design system to a realistic [product type] screen. Pick your favorite in the comparison board that just opened in your browser. You can also remix elements across variants."
### Comparison Board + Feedback Loop
Create the comparison board and serve it over HTTP:
```bash
$D compare --images "$_DESIGN_DIR/variant-A.png,$_DESIGN_DIR/variant-B.png,$_DESIGN_DIR/variant-C.png" --output "$_DESIGN_DIR/design-board.html" --serve
```
This command generates the board HTML, starts an HTTP server on a random port,
and opens it in the user's default browser. **Run it in the background** with `&`
because the server needs to stay running while the user interacts with the board.
Parse the board URL from stderr output. Default daemon path:
`BOARD_URL: http://127.0.0.1:N/boards/<id>/` (already includes the per-board
path; use this for the AskUserQuestion URL AND as the base for the reload
endpoint). Legacy `--no-daemon` path emits `SERVE_STARTED: port=XXXXX` and
serves a single board at `/`, with reload at `/api/reload` — only relevant
when an external caller explicitly passes `--no-daemon`.
**PRIMARY WAIT: AskUserQuestion with board URL**
After the board is serving, use AskUserQuestion to wait for the user. Include the
board URL so they can click it if they lost the browser tab:
"I've opened a comparison board with the design variants:
<BOARD_URL> — Rate them, leave comments, remix
elements you like, and click Submit when you're done. Let me know when you've
submitted your feedback (or paste your preferences here). If you clicked
Regenerate or Remix on the board, tell me and I'll generate new variants."
Substitute `<BOARD_URL>` with the URL parsed from stderr (the daemon path
emits `BOARD_URL: http://127.0.0.1:N/boards/<id>/`).
**Do NOT use AskUserQuestion to ask which variant the user prefers.** The comparison
board IS the chooser. AskUserQuestion is just the blocking wait mechanism.
**After the user responds to AskUserQuestion:**
Check for feedback files next to the board HTML:
- `$_DESIGN_DIR/feedback.json` — written when user clicks Submit (final choice)
- `$_DESIGN_DIR/feedback-pending.json` — written when user clicks Regenerate/Remix/More Like This
```bash
if [ -f "$_DESIGN_DIR/feedback.json" ]; then
echo "SUBMIT_RECEIVED"
cat "$_DESIGN_DIR/feedback.json"
elif [ -f "$_DESIGN_DIR/feedback-pending.json" ]; then
echo "REGENERATE_RECEIVED"
cat "$_DESIGN_DIR/feedback-pending.json"
rm "$_DESIGN_DIR/feedback-pending.json"
else
echo "NO_FEEDBACK_FILE"
fi
```
The feedback JSON has this shape:
```json
{
"preferred": "A",
"ratings": { "A": 4, "B": 3, "C": 2 },
"comments": { "A": "Love the spacing" },
"overall": "Go with A, bigger CTA",
"regenerated": false
}
```
**If `feedback.json` found:** The user clicked Submit on the board.
Read `preferred`, `ratings`, `comments`, `overall` from the JSON. Proceed with
the approved variant.
**If `feedback-pending.json` found:** The user clicked Regenerate/Remix on the board.
1. Read `regenerateAction` from the JSON (`"different"`, `"match"`, `"more_like_B"`,
`"remix"`, or custom text)
2. If `regenerateAction` is `"remix"`, read `remixSpec` (e.g. `{"layout":"A","colors":"B"}`)
3. Generate new variants with `$D iterate` or `$D variants` using updated brief
4. Create new board: `$D compare --images "..." --output "$_DESIGN_DIR/design-board.html"`
5. Reload the board in the user's browser (same tab) — the URL is per-board
under daemon mode, so use `<BOARD_URL>` (from the `BOARD_URL:` stderr
line) as the base:
`curl -s -X POST "${BOARD_URL}api/reload" -H 'Content-Type: application/json' -d '{"html":"$_DESIGN_DIR/design-board.html"}'`
Under `--no-daemon` the reload endpoint is `/api/reload` at the legacy
port; this path only matters if the caller explicitly opted out of the
daemon.
6. The board auto-refreshes. **AskUserQuestion again** with the same board URL to
wait for the next round of feedback. Repeat until `feedback.json` appears.
**If `NO_FEEDBACK_FILE`:** The user typed their preferences directly in the
AskUserQuestion response instead of using the board. Use their text response
as the feedback.
**POLLING FALLBACK:** Only use polling if `$D serve` fails (no port available).
In that case, show each variant inline using the Read tool (so the user can see them),
then use AskUserQuestion:
"The comparison board server failed to start. I've shown the variants above.
Which do you prefer? Any feedback?"
**After receiving feedback (any path):** Output a clear summary confirming
what was understood:
"Here's what I understood from your feedback:
PREFERRED: Variant [X]
RATINGS: [list]
YOUR NOTES: [comments]
DIRECTION: [overall]
Is this right?"
Use AskUserQuestion to verify before proceeding.
**Save the approved choice:**
```bash
echo '{"approved_variant":"<V>","feedback":"<FB>","date":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","screen":"<SCREEN>","branch":"'$(git branch --show-current 2>/dev/null)'"}' > "$_DESIGN_DIR/approved.json"
```
After the user picks a direction:
- Use `$D extract --image "$_DESIGN_DIR/variant-<CHOSEN>.png"` to analyze the approved mockup and extract design tokens (colors, typography, spacing) that will populate DESIGN.md in Phase 6. This grounds the design system in what was actually approved visually, not just what was described in text.
- If the user wants to iterate further: `$D iterate --feedback "<user's feedback>" --output "$_DESIGN_DIR/refined.png"`
**Plan mode vs. implementation mode:**
- **If in plan mode:** Add the approved mockup path (the full `$_DESIGN_DIR` path) and extracted tokens to the plan file under an "## Approved Design Direction" section. The design system gets written to DESIGN.md when the plan is implemented.
- **If NOT in plan mode:** Proceed directly to Phase 6 and write DESIGN.md with the extracted tokens.
### Path B: HTML Preview Page (fallback if DESIGN_NOT_AVAILABLE)
Generate a polished HTML preview page and open it in the user's browser. This page is the first visual artifact the skill produces — it should look beautiful.
```bash
PREVIEW_FILE="/tmp/design-consultation-preview-$(date +%s).html"
```
Write the preview HTML to `$PREVIEW_FILE`, then open it:
```bash
open "$PREVIEW_FILE"
```
### Preview Page Requirements (Path B only)
The agent writes a **single, self-contained HTML file** (no framework dependencies) that:
1. **Loads proposed fonts** from Google Fonts (or Bunny Fonts) via `<link>` tags
2. **Uses the proposed color palette** throughout — dogfood the design system
3. **Shows the product name** (not "Lorem Ipsum") as the hero heading
4. **Font specimen section:**
- Each font candidate shown in its proposed role (hero heading, body paragraph, button label, data table row)
- Side-by-side comparison if multiple candidates for one role
- Real content that matches the product (e.g., civic tech → government data examples)
5. **Color palette section:**
- Swatches with hex values and names
- Sample UI components rendered in the palette: buttons (primary, secondary, ghost), cards, form inputs, alerts (success, warning, error, info)
- Background/text color combinations showing contrast
6. **Realistic product mockups** — this is what makes the preview page powerful. Based on the project type from Phase 1, render 2-3 realistic page layouts using the full design system:
- **Dashboard / web app:** sample data table with metrics, sidebar nav, header with user avatar, stat cards
- **Marketing site:** hero section with real copy, feature highlights, testimonial block, CTA
- **Settings / admin:** form with labeled inputs, toggle switches, dropdowns, save button
- **Auth / onboarding:** login form with social buttons, branding, input validation states
- Use the product name, realistic content for the domain, and the proposed spacing/layout/border-radius. The user should see their product (roughly) before writing any code.
7. **Light/dark mode toggle** using CSS custom properties and a JS toggle button
8. **Clean, professional layout** — the preview page IS a taste signal for the skill
9. **Responsive** — looks good on any screen width
The page should make the user think "oh nice, they thought of this." It's selling the design system by showing what the product could feel like, not just listing hex codes and font names.
If `open` fails (headless environment), tell the user: *"I wrote the preview to [path] — open it in your browser to see the fonts and colors rendered."*
If the user says skip the preview, go directly to Phase 6.
---
## Phase 6: Write DESIGN.md & Confirm
If `$D extract` was used in Phase 5 (Path A), use the extracted tokens as the primary source for DESIGN.md values — colors, typography, and spacing grounded in the approved mockup rather than text descriptions alone. Merge extracted tokens with the Phase 3 proposal (the proposal provides rationale and context; the extraction provides exact values).
**If in plan mode:** Write the DESIGN.md content into the plan file as a "## Proposed DESIGN.md" section. Do NOT write the actual file — that happens at implementation time.
**If NOT in plan mode:** Write `DESIGN.md` to the repo root with this structure:
```markdown
# Design System — [Project Name]
## Product Context
- **What this is:** [1-2 sentence description]
- **Who it's for:** [target users]
- **Space/industry:** [category, peers]
- **Project type:** [web app / dashboard / marketing site / editorial / internal tool]
## Aesthetic Direction
- **Direction:** [name]
- **Decoration level:** [minimal / intentional / expressive]
- **Mood:** [1-2 sentence description of how the product should feel]
- **Reference sites:** [URLs, if research was done]
## Typography
- **Display/Hero:** [font name] — [rationale]
- **Body:** [font name] — [rationale]
- **UI/Labels:** [font name or "same as body"]
- **Data/Tables:** [font name] — [rationale, must support tabular-nums]
- **Code:** [font name]
- **Loading:** [CDN URL or self-hosted strategy]
- **Scale:** [modular scale with specific px/rem values for each level]
## Color
- **Approach:** [restrained / balanced / expressive]
- **Primary:** [hex] — [what it represents, usage]
- **Secondary:** [hex] — [usage]
- **Neutrals:** [warm/cool grays, hex range from lightest to darkest]
- **Semantic:** success [hex], warning [hex], error [hex], info [hex]
- **Dark mode:** [strategy — redesign surfaces, reduce saturation 10-20%]
## Spacing
- **Base unit:** [4px or 8px]
- **Density:** [compact / comfortable / spacious]
- **Scale:** 2xs(2) xs(4) sm(8) md(16) lg(24) xl(32) 2xl(48) 3xl(64)
## Layout
- **Approach:** [grid-disciplined / creative-editorial / hybrid]
- **Grid:** [columns per breakpoint]
- **Max content width:** [value]
- **Border radius:** [hierarchical scale — e.g., sm:4px, md:8px, lg:12px, full:9999px]
## Motion
- **Approach:** [minimal-functional / intentional / expressive]
- **Easing:** enter(ease-out) exit(ease-in) move(ease-in-out)
- **Duration:** micro(50-100ms) short(150-250ms) medium(250-400ms) long(400-700ms)
## Decisions Log
| Date | Decision | Rationale |
|------|----------|-----------|
| [today] | Initial design system created | Created by /design-consultation based on [product context / research] |
```
**Update CLAUDE.md** (or create it if it doesn't exist) — append this section:
```markdown
## Design System
Always read DESIGN.md before making any visual or UI decisions.
All font choices, colors, spacing, and aesthetic direction are defined there.
Do not deviate without explicit user approval.
In QA mode, flag any code that doesn't match DESIGN.md.
```
**AskUserQuestion Q-final — show summary and confirm:**
List all decisions. Flag any that used agent defaults without explicit user confirmation (the user should know what they're shipping). Options:
- A) Ship it — write DESIGN.md and CLAUDE.md
- B) I want to change something (specify what)
- C) Start over
After shipping DESIGN.md, if the session produced screen-level mockups or page layouts
(not just system-level tokens), suggest:
"Want to see this design system as working Pretext-native HTML? Run /design-html."
---
@@ -0,0 +1,294 @@
## Phase 3: The Complete Proposal
This is the soul of the skill. Propose EVERYTHING as one coherent package.
**AskUserQuestion Q2 — present the full proposal with SAFE/RISK breakdown:**
```
Based on [product context] and [research findings / my design knowledge]:
AESTHETIC: [direction] — [one-line rationale]
DECORATION: [level] — [why this pairs with the aesthetic]
LAYOUT: [approach] — [why this fits the product type]
COLOR: [approach] + proposed palette (hex values) — [rationale]
TYPOGRAPHY: [3 font recommendations with roles] — [why these fonts]
SPACING: [base unit + density] — [rationale]
MOTION: [approach] — [rationale]
This system is coherent because [explain how choices reinforce each other].
SAFE CHOICES (category baseline — your users expect these):
- [2-3 decisions that match category conventions, with rationale for playing safe]
RISKS (where your product gets its own face):
- [2-3 deliberate departures from convention]
- For each risk: what it is, why it works, what you gain, what it costs
The safe choices keep you literate in your category. The risks are where
your product becomes memorable. Which risks appeal to you? Want to see
different ones? Or adjust anything else?
```
The SAFE/RISK breakdown is critical. Design coherence is table stakes — every product in a category can be coherent and still look identical. The real question is: where do you take creative risks? The agent should always propose at least 2 risks, each with a clear rationale for why the risk is worth taking and what the user gives up. Risks might include: an unexpected typeface for the category, a bold accent color nobody else uses, tighter or looser spacing than the norm, a layout approach that breaks from convention, motion choices that add personality.
**Options:** A) Looks great — generate the preview page. B) I want to adjust [section]. C) I want different risks — show me wilder options. D) Start over with a different direction. E) Skip the preview, just write DESIGN.md.
### Your Design Knowledge (use to inform proposals — do NOT display as tables)
**Aesthetic directions** (pick the one that fits the product):
- Brutally Minimal — Type and whitespace only. No decoration. Modernist.
- Maximalist Chaos — Dense, layered, pattern-heavy. Y2K meets contemporary.
- Retro-Futuristic — Vintage tech nostalgia. CRT glow, pixel grids, warm monospace.
- Luxury/Refined — Serifs, high contrast, generous whitespace, precious metals.
- Playful/Toy-like — Rounded, bouncy, bold primaries. Approachable and fun.
- Editorial/Magazine — Strong typographic hierarchy, asymmetric grids, pull quotes.
- Brutalist/Raw — Exposed structure, system fonts, visible grid, no polish.
- Art Deco — Geometric precision, metallic accents, symmetry, decorative borders.
- Organic/Natural — Earth tones, rounded forms, hand-drawn texture, grain.
- Industrial/Utilitarian — Function-first, data-dense, monospace accents, muted palette.
**Decoration levels:** minimal (typography does all the work) / intentional (subtle texture, grain, or background treatment) / expressive (full creative direction, layered depth, patterns)
**Layout approaches:** grid-disciplined (strict columns, predictable alignment) / creative-editorial (asymmetry, overlap, grid-breaking) / hybrid (grid for app, creative for marketing)
**Color approaches:** restrained (1 accent + neutrals, color is rare and meaningful) / balanced (primary + secondary, semantic colors for hierarchy) / expressive (color as a primary design tool, bold palettes)
**Motion approaches:** minimal-functional (only transitions that aid comprehension) / intentional (subtle entrance animations, meaningful state transitions) / expressive (full choreography, scroll-driven, playful)
**Font recommendations by purpose:**
- Display/Hero: Satoshi, General Sans, Instrument Serif, Fraunces, Clash Grotesk, Cabinet Grotesk
- Body: Instrument Sans, DM Sans, Source Sans 3, Geist, Plus Jakarta Sans, Outfit
- Data/Tables: Geist (tabular-nums), DM Sans (tabular-nums), JetBrains Mono, IBM Plex Mono
- Code: JetBrains Mono, Fira Code, Berkeley Mono, Geist Mono
**Font blacklist** (never recommend):
Papyrus, Comic Sans, Lobster, Impact, Jokerman, Bleeding Cowboys, Permanent Marker, Bradley Hand, Brush Script, Hobo, Trajan, Raleway, Clash Display, Courier New (for body)
**Overused fonts** (never recommend as primary — use only if user specifically requests):
Inter, Roboto, Arial, Helvetica, Open Sans, Lato, Montserrat, Poppins, Space Grotesk.
Space Grotesk is on the list specifically because every AI design tool converges on it
as "the safe alternative to Inter." That's the convergence trap. Treat it the same as
Inter: only use if the user asks for it by name.
**Anti-convergence directive:** Across multiple generations in the same project, VARY
light/dark, fonts, and aesthetic directions. Never propose the same choices twice
without explicit justification. If the user's prior session used Geist + dark + editorial,
propose something different this time (or explicitly acknowledge you're doubling down
because it fits the brief). Convergence across generations is slop.
**AI slop anti-patterns** (never include in your recommendations):
- Purple/violet gradients as default accent
- 3-column feature grid with icons in colored circles
- Centered everything with uniform spacing
- Uniform bubbly border-radius on all elements
- Gradient buttons as the primary CTA pattern
- Generic stock-photo-style hero sections
- system-ui / -apple-system as the primary display or body font (the "I gave up on typography" signal)
- "Built for X" / "Designed for Y" marketing copy patterns
### Coherence Validation
When the user overrides one section, check if the rest still coheres. Flag mismatches with a gentle nudge — never block:
- Brutalist/Minimal aesthetic + expressive motion → "Heads up: brutalist aesthetics usually pair with minimal motion. Your combo is unusual — which is fine if intentional. Want me to suggest motion that fits, or keep it?"
- Expressive color + restrained decoration → "Bold palette with minimal decoration can work, but the colors will carry a lot of weight. Want me to suggest decoration that supports the palette?"
- Creative-editorial layout + data-heavy product → "Editorial layouts are gorgeous but can fight data density. Want me to show how a hybrid approach keeps both?"
- Always accept the user's final choice. Never refuse to proceed.
---
## Phase 4: Drill-downs (only if user requests adjustments)
When the user wants to change a specific section, go deep on that section:
- **Fonts:** Present 3-5 specific candidates with rationale, explain what each evokes, offer the preview page
- **Colors:** Present 2-3 palette options with hex values, explain the color theory reasoning
- **Aesthetic:** Walk through which directions fit their product and why
- **Layout/Spacing/Motion:** Present the approaches with concrete tradeoffs for their product type
Each drill-down is one focused AskUserQuestion. After the user decides, re-check coherence with the rest of the system.
---
## Phase 5: Design System Preview (default ON)
This phase generates visual previews of the proposed design system. Two paths depending on whether the gstack designer is available.
### Path A: AI Mockups (if DESIGN_READY)
Generate AI-rendered mockups showing the proposed design system applied to realistic screens for this product. This is far more powerful than an HTML preview — the user sees what their product could actually look like.
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)"
_DESIGN_DIR="$HOME/.gstack/projects/$SLUG/designs/design-system-$(date +%Y%m%d)"
mkdir -p "$_DESIGN_DIR"
echo "DESIGN_DIR: $_DESIGN_DIR"
```
Construct a design brief from the Phase 3 proposal (aesthetic, colors, typography, spacing, layout) and the product context from Phase 1:
```bash
$D variants --brief "<product name: [name]. Product type: [type]. Aesthetic: [direction]. Colors: primary [hex], secondary [hex], neutrals [range]. Typography: display [font], body [font]. Layout: [approach]. Show a realistic [page type] screen with [specific content for this product].>" --count 3 --output-dir "$_DESIGN_DIR/"
```
Run quality check on each variant:
```bash
$D check --image "$_DESIGN_DIR/variant-A.png" --brief "<the original brief>"
```
Show each variant inline (Read tool on each PNG) for instant preview.
**Before presenting to the user, self-gate:** For each variant, ask yourself: *"Would
a human designer be embarrassed to put their name on this?"* If yes, discard the
variant and regenerate. This is a hard gate. A mediocre AI mockup is worse than no
mockup. Embarrassment triggers include: purple gradient hero, 3-column SaaS grid,
centered-everything, Inter body text, generic stock-photo vibe, system-ui font,
gradient CTA button, bubble-radius everything. Any of those = reject and regenerate.
Tell the user: "I've generated 3 visual directions applying your design system to a realistic [product type] screen. Pick your favorite in the comparison board that just opened in your browser. You can also remix elements across variants."
{{DESIGN_SHOTGUN_LOOP}}
After the user picks a direction:
- Use `$D extract --image "$_DESIGN_DIR/variant-<CHOSEN>.png"` to analyze the approved mockup and extract design tokens (colors, typography, spacing) that will populate DESIGN.md in Phase 6. This grounds the design system in what was actually approved visually, not just what was described in text.
- If the user wants to iterate further: `$D iterate --feedback "<user's feedback>" --output "$_DESIGN_DIR/refined.png"`
**Plan mode vs. implementation mode:**
- **If in plan mode:** Add the approved mockup path (the full `$_DESIGN_DIR` path) and extracted tokens to the plan file under an "## Approved Design Direction" section. The design system gets written to DESIGN.md when the plan is implemented.
- **If NOT in plan mode:** Proceed directly to Phase 6 and write DESIGN.md with the extracted tokens.
### Path B: HTML Preview Page (fallback if DESIGN_NOT_AVAILABLE)
Generate a polished HTML preview page and open it in the user's browser. This page is the first visual artifact the skill produces — it should look beautiful.
```bash
PREVIEW_FILE="/tmp/design-consultation-preview-$(date +%s).html"
```
Write the preview HTML to `$PREVIEW_FILE`, then open it:
```bash
open "$PREVIEW_FILE"
```
### Preview Page Requirements (Path B only)
The agent writes a **single, self-contained HTML file** (no framework dependencies) that:
1. **Loads proposed fonts** from Google Fonts (or Bunny Fonts) via `<link>` tags
2. **Uses the proposed color palette** throughout — dogfood the design system
3. **Shows the product name** (not "Lorem Ipsum") as the hero heading
4. **Font specimen section:**
- Each font candidate shown in its proposed role (hero heading, body paragraph, button label, data table row)
- Side-by-side comparison if multiple candidates for one role
- Real content that matches the product (e.g., civic tech → government data examples)
5. **Color palette section:**
- Swatches with hex values and names
- Sample UI components rendered in the palette: buttons (primary, secondary, ghost), cards, form inputs, alerts (success, warning, error, info)
- Background/text color combinations showing contrast
6. **Realistic product mockups** — this is what makes the preview page powerful. Based on the project type from Phase 1, render 2-3 realistic page layouts using the full design system:
- **Dashboard / web app:** sample data table with metrics, sidebar nav, header with user avatar, stat cards
- **Marketing site:** hero section with real copy, feature highlights, testimonial block, CTA
- **Settings / admin:** form with labeled inputs, toggle switches, dropdowns, save button
- **Auth / onboarding:** login form with social buttons, branding, input validation states
- Use the product name, realistic content for the domain, and the proposed spacing/layout/border-radius. The user should see their product (roughly) before writing any code.
7. **Light/dark mode toggle** using CSS custom properties and a JS toggle button
8. **Clean, professional layout** — the preview page IS a taste signal for the skill
9. **Responsive** — looks good on any screen width
The page should make the user think "oh nice, they thought of this." It's selling the design system by showing what the product could feel like, not just listing hex codes and font names.
If `open` fails (headless environment), tell the user: *"I wrote the preview to [path] — open it in your browser to see the fonts and colors rendered."*
If the user says skip the preview, go directly to Phase 6.
---
## Phase 6: Write DESIGN.md & Confirm
If `$D extract` was used in Phase 5 (Path A), use the extracted tokens as the primary source for DESIGN.md values — colors, typography, and spacing grounded in the approved mockup rather than text descriptions alone. Merge extracted tokens with the Phase 3 proposal (the proposal provides rationale and context; the extraction provides exact values).
**If in plan mode:** Write the DESIGN.md content into the plan file as a "## Proposed DESIGN.md" section. Do NOT write the actual file — that happens at implementation time.
**If NOT in plan mode:** Write `DESIGN.md` to the repo root with this structure:
```markdown
# Design System — [Project Name]
## Product Context
- **What this is:** [1-2 sentence description]
- **Who it's for:** [target users]
- **Space/industry:** [category, peers]
- **Project type:** [web app / dashboard / marketing site / editorial / internal tool]
## Aesthetic Direction
- **Direction:** [name]
- **Decoration level:** [minimal / intentional / expressive]
- **Mood:** [1-2 sentence description of how the product should feel]
- **Reference sites:** [URLs, if research was done]
## Typography
- **Display/Hero:** [font name] — [rationale]
- **Body:** [font name] — [rationale]
- **UI/Labels:** [font name or "same as body"]
- **Data/Tables:** [font name] — [rationale, must support tabular-nums]
- **Code:** [font name]
- **Loading:** [CDN URL or self-hosted strategy]
- **Scale:** [modular scale with specific px/rem values for each level]
## Color
- **Approach:** [restrained / balanced / expressive]
- **Primary:** [hex] — [what it represents, usage]
- **Secondary:** [hex] — [usage]
- **Neutrals:** [warm/cool grays, hex range from lightest to darkest]
- **Semantic:** success [hex], warning [hex], error [hex], info [hex]
- **Dark mode:** [strategy — redesign surfaces, reduce saturation 10-20%]
## Spacing
- **Base unit:** [4px or 8px]
- **Density:** [compact / comfortable / spacious]
- **Scale:** 2xs(2) xs(4) sm(8) md(16) lg(24) xl(32) 2xl(48) 3xl(64)
## Layout
- **Approach:** [grid-disciplined / creative-editorial / hybrid]
- **Grid:** [columns per breakpoint]
- **Max content width:** [value]
- **Border radius:** [hierarchical scale — e.g., sm:4px, md:8px, lg:12px, full:9999px]
## Motion
- **Approach:** [minimal-functional / intentional / expressive]
- **Easing:** enter(ease-out) exit(ease-in) move(ease-in-out)
- **Duration:** micro(50-100ms) short(150-250ms) medium(250-400ms) long(400-700ms)
## Decisions Log
| Date | Decision | Rationale |
|------|----------|-----------|
| [today] | Initial design system created | Created by /design-consultation based on [product context / research] |
```
**Update CLAUDE.md** (or create it if it doesn't exist) — append this section:
```markdown
## Design System
Always read DESIGN.md before making any visual or UI decisions.
All font choices, colors, spacing, and aesthetic direction are defined there.
Do not deviate without explicit user approval.
In QA mode, flag any code that doesn't match DESIGN.md.
```
**AskUserQuestion Q-final — show summary and confirm:**
List all decisions. Flag any that used agent defaults without explicit user confirmation (the user should know what they're shipping). Options:
- A) Ship it — write DESIGN.md and CLAUDE.md
- B) I want to change something (specify what)
- C) Start over
After shipping DESIGN.md, if the session produced screen-level mockups or page layouts
(not just system-level tokens), suggest:
"Want to see this design system as working Pretext-native HTML? Run /design-html."
---
+27 -4
View File
@@ -53,6 +53,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -132,7 +135,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -304,11 +307,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -388,7 +411,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -50,6 +50,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -129,7 +132,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -301,11 +304,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -385,7 +408,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -67,6 +67,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -146,7 +149,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -318,11 +321,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -402,7 +425,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -52,6 +52,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -131,7 +134,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -303,11 +306,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -387,7 +410,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+32
View File
@@ -127,6 +127,38 @@ required for our hook to fire there.
(PostToolUse hooks can also set `additionalContext` to append to the tool
result; we don't need this for v1 capture.)
## PostToolUse on tool error (AUQ-failure fallback, OV3:B) — UNVERIFIED
The AUQ-failure prose fallback adds a defensive PostToolUse hook
(`hosts/claude/hooks/auq-error-fallback-hook.ts`) that, when an AskUserQuestion
call returns an error / missing result, injects `additionalContext` reminding the
model to run the prose fallback per `SESSION_KIND`. It uses the same
`additionalContext` mechanism documented above.
**Open question we could NOT settle in a harness:** does Claude Code invoke
PostToolUse hooks when an MCP tool call returns a transport/missing-result error
(the Conductor bug surfaces `[Tool result missing due to internal error]`)? The
docs above cover PostToolUse on *success*. We could not force the Conductor
internal MCP failure on demand to observe it.
**Decision (OV3:B = A):** build the hook defensively anyway.
- It is **inert on success** (only fires when `isErrorResponse(tool_response)` is
true) and **inert if the platform never invokes it** on the error path.
- The prompt-level fallback in `generate-ask-user-format.ts` covers the case
regardless — the hook is a reliability *layer*, not the mechanism.
- Its decision logic is unit-tested deterministically
(`test/auq-error-fallback-hook.test.ts`): given a synthetic error `tool_response`
+ each `SESSION_KIND`, it emits the correct directive; given a real answer it
defers.
**Recommended manual / partial spike (to close the gap later):** register a
throwaway PostToolUse hook that logs on fire, then (a) trigger a normal tool
error (e.g. a failing `Bash` call) to confirm PostToolUse fires on tool errors at
all, and (b) reproduce the Conductor MCP AUQ failure and check the log. If (b)
confirms a fire, promote the hook from "defensive/inert" to "verified". Until
then, treat the runtime layer as best-effort and the prompt-level fallback as the
guaranteed path.
## Settings.json snippet for T8 hook installer
```json
+27 -4
View File
@@ -52,6 +52,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -131,7 +134,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -303,11 +306,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -387,7 +410,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+40 -362
View File
@@ -50,6 +50,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -129,7 +132,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -301,11 +304,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -385,7 +408,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
@@ -780,6 +803,17 @@ subjective decisions.
---
## Section index — Read each section when its situation applies
This skill is a decision-tree skeleton. The steps below point to on-demand
sections. Read a section in full before doing its step; do not work from memory.
| When | Read this section |
|------|-------------------|
| auditing each doc file and applying updates, polishing CHANGELOG voice, checking cross-doc consistency, cleaning up TODOS, the VERSION bump, and committing (Steps 2-9, after the coverage map in Step 1.5) | `sections/release-body.md` |
---
## Step 1: Pre-flight & Diff Analysis
1. Check the current branch. If on the base branch, **abort**: "You're on the base branch. Run from a feature branch."
@@ -856,364 +890,8 @@ When significant gaps are found, suggest running `/document-generate` to fill th
---
## Step 2: Per-File Documentation Audit
Read each documentation file and cross-reference it against the diff. Use these generic heuristics
(adapt to whatever project you're in — these are not gstack-specific):
**README.md:**
- Does it describe all features and capabilities visible in the diff?
- Are install/setup instructions consistent with the changes?
- Are examples, demos, and usage descriptions still valid?
- Are troubleshooting steps still accurate?
**ARCHITECTURE.md:**
- Do ASCII diagrams and component descriptions match the current code?
- Are design decisions and "why" explanations still accurate?
- Be conservative — only update things clearly contradicted by the diff. Architecture docs
describe things unlikely to change frequently.
**CONTRIBUTING.md — New contributor smoke test:**
- Walk through the setup instructions as if you are a brand new contributor.
- Are the listed commands accurate? Would each step succeed?
- Do test tier descriptions match the current test infrastructure?
- Are workflow descriptions (dev setup, operational learnings, etc.) current?
- Flag anything that would fail or confuse a first-time contributor.
**CLAUDE.md / project instructions:**
- Does the project structure section match the actual file tree?
- Are listed commands and scripts accurate?
- Do build/test instructions match what's in package.json (or equivalent)?
**Any other .md files:**
- Read the file, determine its purpose and audience.
- Cross-reference against the diff to check if it contradicts anything the file says.
For each file, classify needed updates as:
- **Auto-update** — Factual corrections clearly warranted by the diff: adding an item to a
table, updating a file path, fixing a count, updating a project structure tree.
- **Ask user** — Narrative changes, section removal, security model changes, large rewrites
(more than ~10 lines in one section), ambiguous relevance, adding entirely new sections.
---
## Step 3: Apply Auto-Updates
Make all clear, factual updates directly using the Edit tool.
For each file modified, output a one-line summary describing **what specifically changed** — not
just "Updated README.md" but "README.md: added /new-skill to skills table, updated skill count
from 9 to 10."
**Never auto-update:**
- README introduction or project positioning
- ARCHITECTURE philosophy or design rationale
- Security model descriptions
- Do not remove entire sections from any document
---
## Step 4: Ask About Risky/Questionable Changes
For each risky or questionable update identified in Step 2, use AskUserQuestion with:
- Context: project name, branch, which doc file, what we're reviewing
- The specific documentation decision
- `RECOMMENDATION: Choose [X] because [one-line reason]`
- Options including C) Skip — leave as-is
Apply approved changes immediately after each answer.
---
## Step 5: CHANGELOG Voice Polish
**CRITICAL — NEVER CLOBBER CHANGELOG ENTRIES.**
This step polishes voice. It does NOT rewrite, replace, or regenerate CHANGELOG content.
A real incident occurred where an agent replaced existing CHANGELOG entries when it should have
preserved them. This skill must NEVER do that.
**Rules:**
1. Read the entire CHANGELOG.md first. Understand what is already there.
2. Only modify wording within existing entries. Never delete, reorder, or replace entries.
3. Never regenerate a CHANGELOG entry from scratch. The entry was written by `/ship` from the
actual diff and commit history. It is the source of truth. You are polishing prose, not
rewriting history.
4. If an entry looks wrong or incomplete, use AskUserQuestion — do NOT silently fix it.
5. Use Edit tool with exact `old_string` matches — never use Write to overwrite CHANGELOG.md.
**If CHANGELOG was not modified in this branch:** skip this step.
**If CHANGELOG was modified in this branch**, review the entry for voice:
- **Sell test (Diataxis rubric):** Score each CHANGELOG entry 0-3:
- **1 point** — answers "What changed?" (reference: names the feature/fix)
- **1 point** — answers "Why should I care?" (explanation: user impact, pain removed)
- **1 point** — answers "How do I use it?" (how-to: command, flag, or link to docs)
- Entries scoring <2 need a rewrite. Entries scoring 3 are gold.
- Lead with what the user can now **do** — not implementation details.
- "You can now..." not "Refactored the..."
- Flag and rewrite any entry that reads like a commit message.
- Internal/contributor changes belong in a separate "### For contributors" subsection.
- Auto-fix minor voice adjustments. Use AskUserQuestion if a rewrite would alter meaning.
---
## Step 6: Cross-Doc Consistency & Discoverability Check
After auditing each file individually, do a cross-doc consistency pass:
1. Does the README's feature/capability list match what CLAUDE.md (or project instructions) describes?
2. Does ARCHITECTURE's component list match CONTRIBUTING's project structure description?
3. Does CHANGELOG's latest version match the VERSION file?
4. **Discoverability:** Is every documentation file reachable from README.md or CLAUDE.md? If
ARCHITECTURE.md exists but neither README nor CLAUDE.md links to it, flag it. Every doc
should be discoverable from one of the two entry-point files.
5. Flag any contradictions between documents. Auto-fix clear factual inconsistencies (e.g., a
version mismatch). Use AskUserQuestion for narrative contradictions.
---
## Step 7: TODOS.md Cleanup
This is a second pass that complements `/ship`'s Step 5.5. Read `review/TODOS-format.md` (if
available) for the canonical TODO item format.
If TODOS.md does not exist, skip this step.
1. **Completed items not yet marked:** Cross-reference the diff against open TODO items. If a
TODO is clearly completed by the changes in this branch, move it to the Completed section
with `**Completed:** vX.Y.Z.W (YYYY-MM-DD)`. Be conservative — only mark items with clear
evidence in the diff.
2. **Items needing description updates:** If a TODO references files or components that were
significantly changed, its description may be stale. Use AskUserQuestion to confirm whether
the TODO should be updated, completed, or left as-is.
3. **New deferred work:** Check the diff for `TODO`, `FIXME`, `HACK`, and `XXX` comments. For
each one that represents meaningful deferred work (not a trivial inline note), use
AskUserQuestion to ask whether it should be captured in TODOS.md.
---
## Step 8: VERSION Bump Question
**CRITICAL — NEVER BUMP VERSION WITHOUT ASKING.**
1. **If VERSION does not exist:** Skip silently.
2. Check if VERSION was already modified on this branch:
```bash
git diff <base>...HEAD -- VERSION
```
3. **If VERSION was NOT bumped:** Use AskUserQuestion:
- RECOMMENDATION: Choose C (Skip) because docs-only changes rarely warrant a version bump
- A) Bump PATCH (X.Y.Z+1) — if doc changes ship alongside code changes
- B) Bump MINOR (X.Y+1.0) — if this is a significant standalone release
- C) Skip — no version bump needed
4. **If VERSION was already bumped:** Do NOT skip silently. Instead, check whether the bump
still covers the full scope of changes on this branch:
a. Read the CHANGELOG entry for the current VERSION. What features does it describe?
b. Read the full diff (`git diff <base>...HEAD --stat` and `git diff <base>...HEAD --name-only`).
Are there significant changes (new features, new skills, new commands, major refactors)
that are NOT mentioned in the CHANGELOG entry for the current version?
c. **If the CHANGELOG entry covers everything:** Skip — output "VERSION: Already bumped to
vX.Y.Z, covers all changes."
d. **If there are significant uncovered changes:** Use AskUserQuestion explaining what the
current version covers vs what's new, and ask:
- RECOMMENDATION: Choose A because the new changes warrant their own version
- A) Bump to next patch (X.Y.Z+1) — give the new changes their own version
- B) Keep current version — add new changes to the existing CHANGELOG entry
- C) Skip — leave version as-is, handle later
The key insight: a VERSION bump set for "feature A" should not silently absorb "feature B"
if feature B is substantial enough to deserve its own version entry.
---
## Step 9: Commit & Output
**Empty check first:** Run `git status` (never use `-uall`). If no documentation files were
modified by any previous step, output "All documentation is up to date." and exit without
committing.
**Commit:**
1. Stage modified documentation files by name (never `git add -A` or `git add .`).
2. Create a single commit:
```bash
git commit -m "$(cat <<'EOF'
docs: update project documentation for vX.Y.Z.W
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
EOF
)"
```
3. Push to the current branch:
```bash
git push
```
**PR/MR body update (idempotent, race-safe):**
1. Read the existing PR/MR body into a PID-unique tempfile (use the platform detected in Step 0):
**If GitHub:**
```bash
gh pr view --json body -q .body > /tmp/gstack-pr-body-$$.md
```
**If GitLab:**
```bash
glab mr view -F json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('description',''))" > /tmp/gstack-pr-body-$$.md
```
2. If the tempfile already contains a `## Documentation` section, replace that section with the
updated content. If it does not contain one, append a `## Documentation` section at the end.
3. The Documentation section should include:
a. **Doc diff preview** — for each file modified, describe what specifically changed (e.g.,
"README.md: added /document-release to skills table, updated skill count from 9 to 10").
b. **Documentation debt** — if the coverage map from Step 1.5 found gaps, append a
`### Documentation Debt` subsection listing:
- Critical gaps: new public surface with zero documentation coverage
- Common gaps: features with reference-only coverage (no how-to or tutorial)
- Stale diagrams: architecture diagrams with entity names that drifted from the code
- Each item should include a one-line description of what's missing and which Diataxis
quadrant would fill it (e.g., "⚠️ `/new-skill` — has reference in AGENTS.md but no
how-to example in README")
If there are any documentation debt items, suggest adding a `docs-debt` label to the PR.
4. Redaction scan-at-sink, then write the updated body back. The body is already
in a temp file (`/tmp/gstack-pr-body-$$.md`); scan THAT file before editing so
the bytes scanned are the bytes sent:
```bash
REDACT_VIS=$(~/.claude/skills/gstack/bin/gstack-config get redact_repo_visibility 2>/dev/null)
[ -z "$REDACT_VIS" ] && REDACT_VIS=$(gh repo view --json visibility -q .visibility 2>/dev/null | tr 'A-Z' 'a-z')
~/.claude/skills/gstack/bin/gstack-redact --from-file /tmp/gstack-pr-body-$$.md --repo-visibility "${REDACT_VIS:-unknown}" --json
# exit 3 (HIGH) → do NOT edit, rotate+redact; exit 2 (MEDIUM) → confirm per finding.
```
**If GitHub:**
```bash
gh pr edit --body-file /tmp/gstack-pr-body-$$.md
```
**If GitLab:**
Read the contents of `/tmp/gstack-pr-body-$$.md` using the Read tool, then pass it to `glab mr update` using a heredoc to avoid shell metacharacter issues:
```bash
glab mr update -d "$(cat <<'MRBODY'
<paste the file contents here>
MRBODY
)"
```
5. Clean up the tempfile:
```bash
rm -f /tmp/gstack-pr-body-$$.md
```
6. If `gh pr view` / `glab mr view` fails (no PR/MR exists): skip with message "No PR/MR found — skipping body update."
7. If `gh pr edit` / `glab mr update` fails: warn "Could not update PR/MR body — documentation changes are in the
commit." and continue.
**PR/MR title sync (idempotent, always-on):**
PR titles must always start with `v<VERSION>` — same rule as `/ship`. If Step 8 bumped VERSION after `/ship` had already created the PR, the title is now stale. This sub-step fixes it.
1. Read the current VERSION:
```bash
V=$(cat VERSION 2>/dev/null | tr -d '[:space:]')
```
If `VERSION` does not exist or is empty, skip this sub-step entirely.
2. Read the current PR/MR title:
**If GitHub:**
```bash
CURRENT_TITLE=$(gh pr view --json title -q .title 2>/dev/null || true)
```
**If GitLab:**
```bash
CURRENT_TITLE=$(glab mr view -F json 2>/dev/null | jq -r .title 2>/dev/null || true)
```
If `CURRENT_TITLE` is empty (no open PR/MR), skip with message "No PR/MR found — skipping title sync."
3. Compute the corrected title using the shared helper (single source of truth — same one `/ship` uses):
```bash
NEW_TITLE=$(~/.claude/skills/gstack/bin/gstack-pr-title-rewrite.sh "$V" "$CURRENT_TITLE")
```
The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
4. If `NEW_TITLE` differs from `CURRENT_TITLE`, update it:
**If GitHub:**
```bash
gh pr edit --title "$NEW_TITLE"
```
**If GitLab:**
```bash
glab mr update -t "$NEW_TITLE"
```
5. If the edit command fails: warn "Could not update PR/MR title — documentation changes are still in the commit." and continue. Do not block on title sync failure.
**Structured doc health summary (final output):**
Output a scannable summary showing every documentation file's status:
```
Documentation health:
README.md [status] ([details])
ARCHITECTURE.md [status] ([details])
CONTRIBUTING.md [status] ([details])
CHANGELOG.md [status] ([details])
TODOS.md [status] ([details])
VERSION [status] ([details])
```
Where status is one of:
- Updated — with description of what changed
- Current — no changes needed
- Voice polished — wording adjusted
- Not bumped — user chose to skip
- Already bumped — version was set by /ship
- Skipped — file does not exist
If the coverage map from Step 1.5 identified any gaps, append:
```
Documentation coverage:
[entity] [reference] [how-to] [tutorial] [explanation]
/new-skill ✅ ❌ ❌ ❌
--new-flag ✅ ✅ ❌ ❌
Diagram drift:
ARCHITECTURE.md: "FooProcessor" renamed to "BarProcessor" in code — diagram may be stale
```
If all coverage is complete and no diagrams drifted, output: "Coverage: all shipped features have adequate documentation."
> **STOP.** Before auditing each doc file and applying updates, polishing CHANGELOG voice, checking cross-doc consistency, cleaning up TODOS, the VERSION bump, and committing (Steps 2-9, after the coverage map in Step 1.5), Read `~/.claude/skills/gstack/document-release/sections/release-body.md` and execute it
> in full. Do not work from memory — that section is the source of truth for this step.
---
+5 -358
View File
@@ -59,6 +59,10 @@ subjective decisions.
---
{{SECTION_INDEX:document-release}}
---
## Step 1: Pre-flight & Diff Analysis
1. Check the current branch. If on the base branch, **abort**: "You're on the base branch. Run from a feature branch."
@@ -135,364 +139,7 @@ When significant gaps are found, suggest running `/document-generate` to fill th
---
## Step 2: Per-File Documentation Audit
Read each documentation file and cross-reference it against the diff. Use these generic heuristics
(adapt to whatever project you're in — these are not gstack-specific):
**README.md:**
- Does it describe all features and capabilities visible in the diff?
- Are install/setup instructions consistent with the changes?
- Are examples, demos, and usage descriptions still valid?
- Are troubleshooting steps still accurate?
**ARCHITECTURE.md:**
- Do ASCII diagrams and component descriptions match the current code?
- Are design decisions and "why" explanations still accurate?
- Be conservative — only update things clearly contradicted by the diff. Architecture docs
describe things unlikely to change frequently.
**CONTRIBUTING.md — New contributor smoke test:**
- Walk through the setup instructions as if you are a brand new contributor.
- Are the listed commands accurate? Would each step succeed?
- Do test tier descriptions match the current test infrastructure?
- Are workflow descriptions (dev setup, operational learnings, etc.) current?
- Flag anything that would fail or confuse a first-time contributor.
**CLAUDE.md / project instructions:**
- Does the project structure section match the actual file tree?
- Are listed commands and scripts accurate?
- Do build/test instructions match what's in package.json (or equivalent)?
**Any other .md files:**
- Read the file, determine its purpose and audience.
- Cross-reference against the diff to check if it contradicts anything the file says.
For each file, classify needed updates as:
- **Auto-update** — Factual corrections clearly warranted by the diff: adding an item to a
table, updating a file path, fixing a count, updating a project structure tree.
- **Ask user** — Narrative changes, section removal, security model changes, large rewrites
(more than ~10 lines in one section), ambiguous relevance, adding entirely new sections.
---
## Step 3: Apply Auto-Updates
Make all clear, factual updates directly using the Edit tool.
For each file modified, output a one-line summary describing **what specifically changed** — not
just "Updated README.md" but "README.md: added /new-skill to skills table, updated skill count
from 9 to 10."
**Never auto-update:**
- README introduction or project positioning
- ARCHITECTURE philosophy or design rationale
- Security model descriptions
- Do not remove entire sections from any document
---
## Step 4: Ask About Risky/Questionable Changes
For each risky or questionable update identified in Step 2, use AskUserQuestion with:
- Context: project name, branch, which doc file, what we're reviewing
- The specific documentation decision
- `RECOMMENDATION: Choose [X] because [one-line reason]`
- Options including C) Skip — leave as-is
Apply approved changes immediately after each answer.
---
## Step 5: CHANGELOG Voice Polish
**CRITICAL — NEVER CLOBBER CHANGELOG ENTRIES.**
This step polishes voice. It does NOT rewrite, replace, or regenerate CHANGELOG content.
A real incident occurred where an agent replaced existing CHANGELOG entries when it should have
preserved them. This skill must NEVER do that.
**Rules:**
1. Read the entire CHANGELOG.md first. Understand what is already there.
2. Only modify wording within existing entries. Never delete, reorder, or replace entries.
3. Never regenerate a CHANGELOG entry from scratch. The entry was written by `/ship` from the
actual diff and commit history. It is the source of truth. You are polishing prose, not
rewriting history.
4. If an entry looks wrong or incomplete, use AskUserQuestion — do NOT silently fix it.
5. Use Edit tool with exact `old_string` matches — never use Write to overwrite CHANGELOG.md.
**If CHANGELOG was not modified in this branch:** skip this step.
**If CHANGELOG was modified in this branch**, review the entry for voice:
- **Sell test (Diataxis rubric):** Score each CHANGELOG entry 0-3:
- **1 point** — answers "What changed?" (reference: names the feature/fix)
- **1 point** — answers "Why should I care?" (explanation: user impact, pain removed)
- **1 point** — answers "How do I use it?" (how-to: command, flag, or link to docs)
- Entries scoring <2 need a rewrite. Entries scoring 3 are gold.
- Lead with what the user can now **do** — not implementation details.
- "You can now..." not "Refactored the..."
- Flag and rewrite any entry that reads like a commit message.
- Internal/contributor changes belong in a separate "### For contributors" subsection.
- Auto-fix minor voice adjustments. Use AskUserQuestion if a rewrite would alter meaning.
---
## Step 6: Cross-Doc Consistency & Discoverability Check
After auditing each file individually, do a cross-doc consistency pass:
1. Does the README's feature/capability list match what CLAUDE.md (or project instructions) describes?
2. Does ARCHITECTURE's component list match CONTRIBUTING's project structure description?
3. Does CHANGELOG's latest version match the VERSION file?
4. **Discoverability:** Is every documentation file reachable from README.md or CLAUDE.md? If
ARCHITECTURE.md exists but neither README nor CLAUDE.md links to it, flag it. Every doc
should be discoverable from one of the two entry-point files.
5. Flag any contradictions between documents. Auto-fix clear factual inconsistencies (e.g., a
version mismatch). Use AskUserQuestion for narrative contradictions.
---
## Step 7: TODOS.md Cleanup
This is a second pass that complements `/ship`'s Step 5.5. Read `review/TODOS-format.md` (if
available) for the canonical TODO item format.
If TODOS.md does not exist, skip this step.
1. **Completed items not yet marked:** Cross-reference the diff against open TODO items. If a
TODO is clearly completed by the changes in this branch, move it to the Completed section
with `**Completed:** vX.Y.Z.W (YYYY-MM-DD)`. Be conservative — only mark items with clear
evidence in the diff.
2. **Items needing description updates:** If a TODO references files or components that were
significantly changed, its description may be stale. Use AskUserQuestion to confirm whether
the TODO should be updated, completed, or left as-is.
3. **New deferred work:** Check the diff for `TODO`, `FIXME`, `HACK`, and `XXX` comments. For
each one that represents meaningful deferred work (not a trivial inline note), use
AskUserQuestion to ask whether it should be captured in TODOS.md.
---
## Step 8: VERSION Bump Question
**CRITICAL — NEVER BUMP VERSION WITHOUT ASKING.**
1. **If VERSION does not exist:** Skip silently.
2. Check if VERSION was already modified on this branch:
```bash
git diff <base>...HEAD -- VERSION
```
3. **If VERSION was NOT bumped:** Use AskUserQuestion:
- RECOMMENDATION: Choose C (Skip) because docs-only changes rarely warrant a version bump
- A) Bump PATCH (X.Y.Z+1) — if doc changes ship alongside code changes
- B) Bump MINOR (X.Y+1.0) — if this is a significant standalone release
- C) Skip — no version bump needed
4. **If VERSION was already bumped:** Do NOT skip silently. Instead, check whether the bump
still covers the full scope of changes on this branch:
a. Read the CHANGELOG entry for the current VERSION. What features does it describe?
b. Read the full diff (`git diff <base>...HEAD --stat` and `git diff <base>...HEAD --name-only`).
Are there significant changes (new features, new skills, new commands, major refactors)
that are NOT mentioned in the CHANGELOG entry for the current version?
c. **If the CHANGELOG entry covers everything:** Skip — output "VERSION: Already bumped to
vX.Y.Z, covers all changes."
d. **If there are significant uncovered changes:** Use AskUserQuestion explaining what the
current version covers vs what's new, and ask:
- RECOMMENDATION: Choose A because the new changes warrant their own version
- A) Bump to next patch (X.Y.Z+1) — give the new changes their own version
- B) Keep current version — add new changes to the existing CHANGELOG entry
- C) Skip — leave version as-is, handle later
The key insight: a VERSION bump set for "feature A" should not silently absorb "feature B"
if feature B is substantial enough to deserve its own version entry.
---
## Step 9: Commit & Output
**Empty check first:** Run `git status` (never use `-uall`). If no documentation files were
modified by any previous step, output "All documentation is up to date." and exit without
committing.
**Commit:**
1. Stage modified documentation files by name (never `git add -A` or `git add .`).
2. Create a single commit:
```bash
git commit -m "$(cat <<'EOF'
docs: update project documentation for vX.Y.Z.W
{{CO_AUTHOR_TRAILER}}
EOF
)"
```
3. Push to the current branch:
```bash
git push
```
**PR/MR body update (idempotent, race-safe):**
1. Read the existing PR/MR body into a PID-unique tempfile (use the platform detected in Step 0):
**If GitHub:**
```bash
gh pr view --json body -q .body > /tmp/gstack-pr-body-$$.md
```
**If GitLab:**
```bash
glab mr view -F json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('description',''))" > /tmp/gstack-pr-body-$$.md
```
2. If the tempfile already contains a `## Documentation` section, replace that section with the
updated content. If it does not contain one, append a `## Documentation` section at the end.
3. The Documentation section should include:
a. **Doc diff preview** — for each file modified, describe what specifically changed (e.g.,
"README.md: added /document-release to skills table, updated skill count from 9 to 10").
b. **Documentation debt** — if the coverage map from Step 1.5 found gaps, append a
`### Documentation Debt` subsection listing:
- Critical gaps: new public surface with zero documentation coverage
- Common gaps: features with reference-only coverage (no how-to or tutorial)
- Stale diagrams: architecture diagrams with entity names that drifted from the code
- Each item should include a one-line description of what's missing and which Diataxis
quadrant would fill it (e.g., "⚠️ `/new-skill` — has reference in AGENTS.md but no
how-to example in README")
If there are any documentation debt items, suggest adding a `docs-debt` label to the PR.
4. Redaction scan-at-sink, then write the updated body back. The body is already
in a temp file (`/tmp/gstack-pr-body-$$.md`); scan THAT file before editing so
the bytes scanned are the bytes sent:
```bash
REDACT_VIS=$(~/.claude/skills/gstack/bin/gstack-config get redact_repo_visibility 2>/dev/null)
[ -z "$REDACT_VIS" ] && REDACT_VIS=$(gh repo view --json visibility -q .visibility 2>/dev/null | tr 'A-Z' 'a-z')
~/.claude/skills/gstack/bin/gstack-redact --from-file /tmp/gstack-pr-body-$$.md --repo-visibility "${REDACT_VIS:-unknown}" --json
# exit 3 (HIGH) → do NOT edit, rotate+redact; exit 2 (MEDIUM) → confirm per finding.
```
**If GitHub:**
```bash
gh pr edit --body-file /tmp/gstack-pr-body-$$.md
```
**If GitLab:**
Read the contents of `/tmp/gstack-pr-body-$$.md` using the Read tool, then pass it to `glab mr update` using a heredoc to avoid shell metacharacter issues:
```bash
glab mr update -d "$(cat <<'MRBODY'
<paste the file contents here>
MRBODY
)"
```
5. Clean up the tempfile:
```bash
rm -f /tmp/gstack-pr-body-$$.md
```
6. If `gh pr view` / `glab mr view` fails (no PR/MR exists): skip with message "No PR/MR found — skipping body update."
7. If `gh pr edit` / `glab mr update` fails: warn "Could not update PR/MR body — documentation changes are in the
commit." and continue.
**PR/MR title sync (idempotent, always-on):**
PR titles must always start with `v<VERSION>` — same rule as `/ship`. If Step 8 bumped VERSION after `/ship` had already created the PR, the title is now stale. This sub-step fixes it.
1. Read the current VERSION:
```bash
V=$(cat VERSION 2>/dev/null | tr -d '[:space:]')
```
If `VERSION` does not exist or is empty, skip this sub-step entirely.
2. Read the current PR/MR title:
**If GitHub:**
```bash
CURRENT_TITLE=$(gh pr view --json title -q .title 2>/dev/null || true)
```
**If GitLab:**
```bash
CURRENT_TITLE=$(glab mr view -F json 2>/dev/null | jq -r .title 2>/dev/null || true)
```
If `CURRENT_TITLE` is empty (no open PR/MR), skip with message "No PR/MR found — skipping title sync."
3. Compute the corrected title using the shared helper (single source of truth — same one `/ship` uses):
```bash
NEW_TITLE=$(~/.claude/skills/gstack/bin/gstack-pr-title-rewrite.sh "$V" "$CURRENT_TITLE")
```
The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
4. If `NEW_TITLE` differs from `CURRENT_TITLE`, update it:
**If GitHub:**
```bash
gh pr edit --title "$NEW_TITLE"
```
**If GitLab:**
```bash
glab mr update -t "$NEW_TITLE"
```
5. If the edit command fails: warn "Could not update PR/MR title — documentation changes are still in the commit." and continue. Do not block on title sync failure.
**Structured doc health summary (final output):**
Output a scannable summary showing every documentation file's status:
```
Documentation health:
README.md [status] ([details])
ARCHITECTURE.md [status] ([details])
CONTRIBUTING.md [status] ([details])
CHANGELOG.md [status] ([details])
TODOS.md [status] ([details])
VERSION [status] ([details])
```
Where status is one of:
- Updated — with description of what changed
- Current — no changes needed
- Voice polished — wording adjusted
- Not bumped — user chose to skip
- Already bumped — version was set by /ship
- Skipped — file does not exist
If the coverage map from Step 1.5 identified any gaps, append:
```
Documentation coverage:
[entity] [reference] [how-to] [tutorial] [explanation]
/new-skill ✅ ❌ ❌ ❌
--new-flag ✅ ✅ ❌ ❌
Diagram drift:
ARCHITECTURE.md: "FooProcessor" renamed to "BarProcessor" in code — diagram may be stale
```
If all coverage is complete and no diagrams drifted, output: "Coverage: all shipped features have adequate documentation."
{{SECTION:release-body}}
---
+14
View File
@@ -0,0 +1,14 @@
{
"$schema": "https://gstack.dev/schemas/section-manifest.json",
"skill": "document-release",
"version": 1,
"note": "PASSIVE registry (v2 plan T9 / CM2). id/file/title/trigger text ONLY. The skeleton's decision-tree prose decides WHEN to read. No machine predicate here.",
"sections": [
{
"id": "release-body",
"file": "release-body.md",
"title": "Per-file audit, auto-updates, risky-change asks, CHANGELOG voice polish, cross-doc consistency, TODOS cleanup, VERSION bump, commit + PR body (Steps 2-9)",
"trigger": "auditing each doc file and applying updates, polishing CHANGELOG voice, checking cross-doc consistency, cleaning up TODOS, the VERSION bump, and committing (Steps 2-9, after the coverage map in Step 1.5)"
}
]
}
+360
View File
@@ -0,0 +1,360 @@
<!-- AUTO-GENERATED from release-body.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
## Step 2: Per-File Documentation Audit
Read each documentation file and cross-reference it against the diff. Use these generic heuristics
(adapt to whatever project you're in — these are not gstack-specific):
**README.md:**
- Does it describe all features and capabilities visible in the diff?
- Are install/setup instructions consistent with the changes?
- Are examples, demos, and usage descriptions still valid?
- Are troubleshooting steps still accurate?
**ARCHITECTURE.md:**
- Do ASCII diagrams and component descriptions match the current code?
- Are design decisions and "why" explanations still accurate?
- Be conservative — only update things clearly contradicted by the diff. Architecture docs
describe things unlikely to change frequently.
**CONTRIBUTING.md — New contributor smoke test:**
- Walk through the setup instructions as if you are a brand new contributor.
- Are the listed commands accurate? Would each step succeed?
- Do test tier descriptions match the current test infrastructure?
- Are workflow descriptions (dev setup, operational learnings, etc.) current?
- Flag anything that would fail or confuse a first-time contributor.
**CLAUDE.md / project instructions:**
- Does the project structure section match the actual file tree?
- Are listed commands and scripts accurate?
- Do build/test instructions match what's in package.json (or equivalent)?
**Any other .md files:**
- Read the file, determine its purpose and audience.
- Cross-reference against the diff to check if it contradicts anything the file says.
For each file, classify needed updates as:
- **Auto-update** — Factual corrections clearly warranted by the diff: adding an item to a
table, updating a file path, fixing a count, updating a project structure tree.
- **Ask user** — Narrative changes, section removal, security model changes, large rewrites
(more than ~10 lines in one section), ambiguous relevance, adding entirely new sections.
---
## Step 3: Apply Auto-Updates
Make all clear, factual updates directly using the Edit tool.
For each file modified, output a one-line summary describing **what specifically changed** — not
just "Updated README.md" but "README.md: added /new-skill to skills table, updated skill count
from 9 to 10."
**Never auto-update:**
- README introduction or project positioning
- ARCHITECTURE philosophy or design rationale
- Security model descriptions
- Do not remove entire sections from any document
---
## Step 4: Ask About Risky/Questionable Changes
For each risky or questionable update identified in Step 2, use AskUserQuestion with:
- Context: project name, branch, which doc file, what we're reviewing
- The specific documentation decision
- `RECOMMENDATION: Choose [X] because [one-line reason]`
- Options including C) Skip — leave as-is
Apply approved changes immediately after each answer.
---
## Step 5: CHANGELOG Voice Polish
**CRITICAL — NEVER CLOBBER CHANGELOG ENTRIES.**
This step polishes voice. It does NOT rewrite, replace, or regenerate CHANGELOG content.
A real incident occurred where an agent replaced existing CHANGELOG entries when it should have
preserved them. This skill must NEVER do that.
**Rules:**
1. Read the entire CHANGELOG.md first. Understand what is already there.
2. Only modify wording within existing entries. Never delete, reorder, or replace entries.
3. Never regenerate a CHANGELOG entry from scratch. The entry was written by `/ship` from the
actual diff and commit history. It is the source of truth. You are polishing prose, not
rewriting history.
4. If an entry looks wrong or incomplete, use AskUserQuestion — do NOT silently fix it.
5. Use Edit tool with exact `old_string` matches — never use Write to overwrite CHANGELOG.md.
**If CHANGELOG was not modified in this branch:** skip this step.
**If CHANGELOG was modified in this branch**, review the entry for voice:
- **Sell test (Diataxis rubric):** Score each CHANGELOG entry 0-3:
- **1 point** — answers "What changed?" (reference: names the feature/fix)
- **1 point** — answers "Why should I care?" (explanation: user impact, pain removed)
- **1 point** — answers "How do I use it?" (how-to: command, flag, or link to docs)
- Entries scoring <2 need a rewrite. Entries scoring 3 are gold.
- Lead with what the user can now **do** — not implementation details.
- "You can now..." not "Refactored the..."
- Flag and rewrite any entry that reads like a commit message.
- Internal/contributor changes belong in a separate "### For contributors" subsection.
- Auto-fix minor voice adjustments. Use AskUserQuestion if a rewrite would alter meaning.
---
## Step 6: Cross-Doc Consistency & Discoverability Check
After auditing each file individually, do a cross-doc consistency pass:
1. Does the README's feature/capability list match what CLAUDE.md (or project instructions) describes?
2. Does ARCHITECTURE's component list match CONTRIBUTING's project structure description?
3. Does CHANGELOG's latest version match the VERSION file?
4. **Discoverability:** Is every documentation file reachable from README.md or CLAUDE.md? If
ARCHITECTURE.md exists but neither README nor CLAUDE.md links to it, flag it. Every doc
should be discoverable from one of the two entry-point files.
5. Flag any contradictions between documents. Auto-fix clear factual inconsistencies (e.g., a
version mismatch). Use AskUserQuestion for narrative contradictions.
---
## Step 7: TODOS.md Cleanup
This is a second pass that complements `/ship`'s Step 5.5. Read `review/TODOS-format.md` (if
available) for the canonical TODO item format.
If TODOS.md does not exist, skip this step.
1. **Completed items not yet marked:** Cross-reference the diff against open TODO items. If a
TODO is clearly completed by the changes in this branch, move it to the Completed section
with `**Completed:** vX.Y.Z.W (YYYY-MM-DD)`. Be conservative — only mark items with clear
evidence in the diff.
2. **Items needing description updates:** If a TODO references files or components that were
significantly changed, its description may be stale. Use AskUserQuestion to confirm whether
the TODO should be updated, completed, or left as-is.
3. **New deferred work:** Check the diff for `TODO`, `FIXME`, `HACK`, and `XXX` comments. For
each one that represents meaningful deferred work (not a trivial inline note), use
AskUserQuestion to ask whether it should be captured in TODOS.md.
---
## Step 8: VERSION Bump Question
**CRITICAL — NEVER BUMP VERSION WITHOUT ASKING.**
1. **If VERSION does not exist:** Skip silently.
2. Check if VERSION was already modified on this branch:
```bash
git diff <base>...HEAD -- VERSION
```
3. **If VERSION was NOT bumped:** Use AskUserQuestion:
- RECOMMENDATION: Choose C (Skip) because docs-only changes rarely warrant a version bump
- A) Bump PATCH (X.Y.Z+1) — if doc changes ship alongside code changes
- B) Bump MINOR (X.Y+1.0) — if this is a significant standalone release
- C) Skip — no version bump needed
4. **If VERSION was already bumped:** Do NOT skip silently. Instead, check whether the bump
still covers the full scope of changes on this branch:
a. Read the CHANGELOG entry for the current VERSION. What features does it describe?
b. Read the full diff (`git diff <base>...HEAD --stat` and `git diff <base>...HEAD --name-only`).
Are there significant changes (new features, new skills, new commands, major refactors)
that are NOT mentioned in the CHANGELOG entry for the current version?
c. **If the CHANGELOG entry covers everything:** Skip — output "VERSION: Already bumped to
vX.Y.Z, covers all changes."
d. **If there are significant uncovered changes:** Use AskUserQuestion explaining what the
current version covers vs what's new, and ask:
- RECOMMENDATION: Choose A because the new changes warrant their own version
- A) Bump to next patch (X.Y.Z+1) — give the new changes their own version
- B) Keep current version — add new changes to the existing CHANGELOG entry
- C) Skip — leave version as-is, handle later
The key insight: a VERSION bump set for "feature A" should not silently absorb "feature B"
if feature B is substantial enough to deserve its own version entry.
---
## Step 9: Commit & Output
**Empty check first:** Run `git status` (never use `-uall`). If no documentation files were
modified by any previous step, output "All documentation is up to date." and exit without
committing.
**Commit:**
1. Stage modified documentation files by name (never `git add -A` or `git add .`).
2. Create a single commit:
```bash
git commit -m "$(cat <<'EOF'
docs: update project documentation for vX.Y.Z.W
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
EOF
)"
```
3. Push to the current branch:
```bash
git push
```
**PR/MR body update (idempotent, race-safe):**
1. Read the existing PR/MR body into a PID-unique tempfile (use the platform detected in Step 0):
**If GitHub:**
```bash
gh pr view --json body -q .body > /tmp/gstack-pr-body-$$.md
```
**If GitLab:**
```bash
glab mr view -F json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('description',''))" > /tmp/gstack-pr-body-$$.md
```
2. If the tempfile already contains a `## Documentation` section, replace that section with the
updated content. If it does not contain one, append a `## Documentation` section at the end.
3. The Documentation section should include:
a. **Doc diff preview** — for each file modified, describe what specifically changed (e.g.,
"README.md: added /document-release to skills table, updated skill count from 9 to 10").
b. **Documentation debt** — if the coverage map from Step 1.5 found gaps, append a
`### Documentation Debt` subsection listing:
- Critical gaps: new public surface with zero documentation coverage
- Common gaps: features with reference-only coverage (no how-to or tutorial)
- Stale diagrams: architecture diagrams with entity names that drifted from the code
- Each item should include a one-line description of what's missing and which Diataxis
quadrant would fill it (e.g., "⚠️ `/new-skill` — has reference in AGENTS.md but no
how-to example in README")
If there are any documentation debt items, suggest adding a `docs-debt` label to the PR.
4. Redaction scan-at-sink, then write the updated body back. The body is already
in a temp file (`/tmp/gstack-pr-body-$$.md`); scan THAT file before editing so
the bytes scanned are the bytes sent:
```bash
REDACT_VIS=$(~/.claude/skills/gstack/bin/gstack-config get redact_repo_visibility 2>/dev/null)
[ -z "$REDACT_VIS" ] && REDACT_VIS=$(gh repo view --json visibility -q .visibility 2>/dev/null | tr 'A-Z' 'a-z')
~/.claude/skills/gstack/bin/gstack-redact --from-file /tmp/gstack-pr-body-$$.md --repo-visibility "${REDACT_VIS:-unknown}" --json
# exit 3 (HIGH) → do NOT edit, rotate+redact; exit 2 (MEDIUM) → confirm per finding.
```
**If GitHub:**
```bash
gh pr edit --body-file /tmp/gstack-pr-body-$$.md
```
**If GitLab:**
Read the contents of `/tmp/gstack-pr-body-$$.md` using the Read tool, then pass it to `glab mr update` using a heredoc to avoid shell metacharacter issues:
```bash
glab mr update -d "$(cat <<'MRBODY'
<paste the file contents here>
MRBODY
)"
```
5. Clean up the tempfile:
```bash
rm -f /tmp/gstack-pr-body-$$.md
```
6. If `gh pr view` / `glab mr view` fails (no PR/MR exists): skip with message "No PR/MR found — skipping body update."
7. If `gh pr edit` / `glab mr update` fails: warn "Could not update PR/MR body — documentation changes are in the
commit." and continue.
**PR/MR title sync (idempotent, always-on):**
PR titles must always start with `v<VERSION>` — same rule as `/ship`. If Step 8 bumped VERSION after `/ship` had already created the PR, the title is now stale. This sub-step fixes it.
1. Read the current VERSION:
```bash
V=$(cat VERSION 2>/dev/null | tr -d '[:space:]')
```
If `VERSION` does not exist or is empty, skip this sub-step entirely.
2. Read the current PR/MR title:
**If GitHub:**
```bash
CURRENT_TITLE=$(gh pr view --json title -q .title 2>/dev/null || true)
```
**If GitLab:**
```bash
CURRENT_TITLE=$(glab mr view -F json 2>/dev/null | jq -r .title 2>/dev/null || true)
```
If `CURRENT_TITLE` is empty (no open PR/MR), skip with message "No PR/MR found — skipping title sync."
3. Compute the corrected title using the shared helper (single source of truth — same one `/ship` uses):
```bash
NEW_TITLE=$(~/.claude/skills/gstack/bin/gstack-pr-title-rewrite.sh "$V" "$CURRENT_TITLE")
```
The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
4. If `NEW_TITLE` differs from `CURRENT_TITLE`, update it:
**If GitHub:**
```bash
gh pr edit --title "$NEW_TITLE"
```
**If GitLab:**
```bash
glab mr update -t "$NEW_TITLE"
```
5. If the edit command fails: warn "Could not update PR/MR title — documentation changes are still in the commit." and continue. Do not block on title sync failure.
**Structured doc health summary (final output):**
Output a scannable summary showing every documentation file's status:
```
Documentation health:
README.md [status] ([details])
ARCHITECTURE.md [status] ([details])
CONTRIBUTING.md [status] ([details])
CHANGELOG.md [status] ([details])
TODOS.md [status] ([details])
VERSION [status] ([details])
```
Where status is one of:
- Updated — with description of what changed
- Current — no changes needed
- Voice polished — wording adjusted
- Not bumped — user chose to skip
- Already bumped — version was set by /ship
- Skipped — file does not exist
If the coverage map from Step 1.5 identified any gaps, append:
```
Documentation coverage:
[entity] [reference] [how-to] [tutorial] [explanation]
/new-skill ✅ ❌ ❌ ❌
--new-flag ✅ ✅ ❌ ❌
Diagram drift:
ARCHITECTURE.md: "FooProcessor" renamed to "BarProcessor" in code — diagram may be stale
```
If all coverage is complete and no diagrams drifted, output: "Coverage: all shipped features have adequate documentation."
@@ -0,0 +1,358 @@
## Step 2: Per-File Documentation Audit
Read each documentation file and cross-reference it against the diff. Use these generic heuristics
(adapt to whatever project you're in — these are not gstack-specific):
**README.md:**
- Does it describe all features and capabilities visible in the diff?
- Are install/setup instructions consistent with the changes?
- Are examples, demos, and usage descriptions still valid?
- Are troubleshooting steps still accurate?
**ARCHITECTURE.md:**
- Do ASCII diagrams and component descriptions match the current code?
- Are design decisions and "why" explanations still accurate?
- Be conservative — only update things clearly contradicted by the diff. Architecture docs
describe things unlikely to change frequently.
**CONTRIBUTING.md — New contributor smoke test:**
- Walk through the setup instructions as if you are a brand new contributor.
- Are the listed commands accurate? Would each step succeed?
- Do test tier descriptions match the current test infrastructure?
- Are workflow descriptions (dev setup, operational learnings, etc.) current?
- Flag anything that would fail or confuse a first-time contributor.
**CLAUDE.md / project instructions:**
- Does the project structure section match the actual file tree?
- Are listed commands and scripts accurate?
- Do build/test instructions match what's in package.json (or equivalent)?
**Any other .md files:**
- Read the file, determine its purpose and audience.
- Cross-reference against the diff to check if it contradicts anything the file says.
For each file, classify needed updates as:
- **Auto-update** — Factual corrections clearly warranted by the diff: adding an item to a
table, updating a file path, fixing a count, updating a project structure tree.
- **Ask user** — Narrative changes, section removal, security model changes, large rewrites
(more than ~10 lines in one section), ambiguous relevance, adding entirely new sections.
---
## Step 3: Apply Auto-Updates
Make all clear, factual updates directly using the Edit tool.
For each file modified, output a one-line summary describing **what specifically changed** — not
just "Updated README.md" but "README.md: added /new-skill to skills table, updated skill count
from 9 to 10."
**Never auto-update:**
- README introduction or project positioning
- ARCHITECTURE philosophy or design rationale
- Security model descriptions
- Do not remove entire sections from any document
---
## Step 4: Ask About Risky/Questionable Changes
For each risky or questionable update identified in Step 2, use AskUserQuestion with:
- Context: project name, branch, which doc file, what we're reviewing
- The specific documentation decision
- `RECOMMENDATION: Choose [X] because [one-line reason]`
- Options including C) Skip — leave as-is
Apply approved changes immediately after each answer.
---
## Step 5: CHANGELOG Voice Polish
**CRITICAL — NEVER CLOBBER CHANGELOG ENTRIES.**
This step polishes voice. It does NOT rewrite, replace, or regenerate CHANGELOG content.
A real incident occurred where an agent replaced existing CHANGELOG entries when it should have
preserved them. This skill must NEVER do that.
**Rules:**
1. Read the entire CHANGELOG.md first. Understand what is already there.
2. Only modify wording within existing entries. Never delete, reorder, or replace entries.
3. Never regenerate a CHANGELOG entry from scratch. The entry was written by `/ship` from the
actual diff and commit history. It is the source of truth. You are polishing prose, not
rewriting history.
4. If an entry looks wrong or incomplete, use AskUserQuestion — do NOT silently fix it.
5. Use Edit tool with exact `old_string` matches — never use Write to overwrite CHANGELOG.md.
**If CHANGELOG was not modified in this branch:** skip this step.
**If CHANGELOG was modified in this branch**, review the entry for voice:
- **Sell test (Diataxis rubric):** Score each CHANGELOG entry 0-3:
- **1 point** — answers "What changed?" (reference: names the feature/fix)
- **1 point** — answers "Why should I care?" (explanation: user impact, pain removed)
- **1 point** — answers "How do I use it?" (how-to: command, flag, or link to docs)
- Entries scoring <2 need a rewrite. Entries scoring 3 are gold.
- Lead with what the user can now **do** — not implementation details.
- "You can now..." not "Refactored the..."
- Flag and rewrite any entry that reads like a commit message.
- Internal/contributor changes belong in a separate "### For contributors" subsection.
- Auto-fix minor voice adjustments. Use AskUserQuestion if a rewrite would alter meaning.
---
## Step 6: Cross-Doc Consistency & Discoverability Check
After auditing each file individually, do a cross-doc consistency pass:
1. Does the README's feature/capability list match what CLAUDE.md (or project instructions) describes?
2. Does ARCHITECTURE's component list match CONTRIBUTING's project structure description?
3. Does CHANGELOG's latest version match the VERSION file?
4. **Discoverability:** Is every documentation file reachable from README.md or CLAUDE.md? If
ARCHITECTURE.md exists but neither README nor CLAUDE.md links to it, flag it. Every doc
should be discoverable from one of the two entry-point files.
5. Flag any contradictions between documents. Auto-fix clear factual inconsistencies (e.g., a
version mismatch). Use AskUserQuestion for narrative contradictions.
---
## Step 7: TODOS.md Cleanup
This is a second pass that complements `/ship`'s Step 5.5. Read `review/TODOS-format.md` (if
available) for the canonical TODO item format.
If TODOS.md does not exist, skip this step.
1. **Completed items not yet marked:** Cross-reference the diff against open TODO items. If a
TODO is clearly completed by the changes in this branch, move it to the Completed section
with `**Completed:** vX.Y.Z.W (YYYY-MM-DD)`. Be conservative — only mark items with clear
evidence in the diff.
2. **Items needing description updates:** If a TODO references files or components that were
significantly changed, its description may be stale. Use AskUserQuestion to confirm whether
the TODO should be updated, completed, or left as-is.
3. **New deferred work:** Check the diff for `TODO`, `FIXME`, `HACK`, and `XXX` comments. For
each one that represents meaningful deferred work (not a trivial inline note), use
AskUserQuestion to ask whether it should be captured in TODOS.md.
---
## Step 8: VERSION Bump Question
**CRITICAL — NEVER BUMP VERSION WITHOUT ASKING.**
1. **If VERSION does not exist:** Skip silently.
2. Check if VERSION was already modified on this branch:
```bash
git diff <base>...HEAD -- VERSION
```
3. **If VERSION was NOT bumped:** Use AskUserQuestion:
- RECOMMENDATION: Choose C (Skip) because docs-only changes rarely warrant a version bump
- A) Bump PATCH (X.Y.Z+1) — if doc changes ship alongside code changes
- B) Bump MINOR (X.Y+1.0) — if this is a significant standalone release
- C) Skip — no version bump needed
4. **If VERSION was already bumped:** Do NOT skip silently. Instead, check whether the bump
still covers the full scope of changes on this branch:
a. Read the CHANGELOG entry for the current VERSION. What features does it describe?
b. Read the full diff (`git diff <base>...HEAD --stat` and `git diff <base>...HEAD --name-only`).
Are there significant changes (new features, new skills, new commands, major refactors)
that are NOT mentioned in the CHANGELOG entry for the current version?
c. **If the CHANGELOG entry covers everything:** Skip — output "VERSION: Already bumped to
vX.Y.Z, covers all changes."
d. **If there are significant uncovered changes:** Use AskUserQuestion explaining what the
current version covers vs what's new, and ask:
- RECOMMENDATION: Choose A because the new changes warrant their own version
- A) Bump to next patch (X.Y.Z+1) — give the new changes their own version
- B) Keep current version — add new changes to the existing CHANGELOG entry
- C) Skip — leave version as-is, handle later
The key insight: a VERSION bump set for "feature A" should not silently absorb "feature B"
if feature B is substantial enough to deserve its own version entry.
---
## Step 9: Commit & Output
**Empty check first:** Run `git status` (never use `-uall`). If no documentation files were
modified by any previous step, output "All documentation is up to date." and exit without
committing.
**Commit:**
1. Stage modified documentation files by name (never `git add -A` or `git add .`).
2. Create a single commit:
```bash
git commit -m "$(cat <<'EOF'
docs: update project documentation for vX.Y.Z.W
{{CO_AUTHOR_TRAILER}}
EOF
)"
```
3. Push to the current branch:
```bash
git push
```
**PR/MR body update (idempotent, race-safe):**
1. Read the existing PR/MR body into a PID-unique tempfile (use the platform detected in Step 0):
**If GitHub:**
```bash
gh pr view --json body -q .body > /tmp/gstack-pr-body-$$.md
```
**If GitLab:**
```bash
glab mr view -F json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('description',''))" > /tmp/gstack-pr-body-$$.md
```
2. If the tempfile already contains a `## Documentation` section, replace that section with the
updated content. If it does not contain one, append a `## Documentation` section at the end.
3. The Documentation section should include:
a. **Doc diff preview** — for each file modified, describe what specifically changed (e.g.,
"README.md: added /document-release to skills table, updated skill count from 9 to 10").
b. **Documentation debt** — if the coverage map from Step 1.5 found gaps, append a
`### Documentation Debt` subsection listing:
- Critical gaps: new public surface with zero documentation coverage
- Common gaps: features with reference-only coverage (no how-to or tutorial)
- Stale diagrams: architecture diagrams with entity names that drifted from the code
- Each item should include a one-line description of what's missing and which Diataxis
quadrant would fill it (e.g., "⚠️ `/new-skill` — has reference in AGENTS.md but no
how-to example in README")
If there are any documentation debt items, suggest adding a `docs-debt` label to the PR.
4. Redaction scan-at-sink, then write the updated body back. The body is already
in a temp file (`/tmp/gstack-pr-body-$$.md`); scan THAT file before editing so
the bytes scanned are the bytes sent:
```bash
REDACT_VIS=$(~/.claude/skills/gstack/bin/gstack-config get redact_repo_visibility 2>/dev/null)
[ -z "$REDACT_VIS" ] && REDACT_VIS=$(gh repo view --json visibility -q .visibility 2>/dev/null | tr 'A-Z' 'a-z')
~/.claude/skills/gstack/bin/gstack-redact --from-file /tmp/gstack-pr-body-$$.md --repo-visibility "${REDACT_VIS:-unknown}" --json
# exit 3 (HIGH) → do NOT edit, rotate+redact; exit 2 (MEDIUM) → confirm per finding.
```
**If GitHub:**
```bash
gh pr edit --body-file /tmp/gstack-pr-body-$$.md
```
**If GitLab:**
Read the contents of `/tmp/gstack-pr-body-$$.md` using the Read tool, then pass it to `glab mr update` using a heredoc to avoid shell metacharacter issues:
```bash
glab mr update -d "$(cat <<'MRBODY'
<paste the file contents here>
MRBODY
)"
```
5. Clean up the tempfile:
```bash
rm -f /tmp/gstack-pr-body-$$.md
```
6. If `gh pr view` / `glab mr view` fails (no PR/MR exists): skip with message "No PR/MR found — skipping body update."
7. If `gh pr edit` / `glab mr update` fails: warn "Could not update PR/MR body — documentation changes are in the
commit." and continue.
**PR/MR title sync (idempotent, always-on):**
PR titles must always start with `v<VERSION>` — same rule as `/ship`. If Step 8 bumped VERSION after `/ship` had already created the PR, the title is now stale. This sub-step fixes it.
1. Read the current VERSION:
```bash
V=$(cat VERSION 2>/dev/null | tr -d '[:space:]')
```
If `VERSION` does not exist or is empty, skip this sub-step entirely.
2. Read the current PR/MR title:
**If GitHub:**
```bash
CURRENT_TITLE=$(gh pr view --json title -q .title 2>/dev/null || true)
```
**If GitLab:**
```bash
CURRENT_TITLE=$(glab mr view -F json 2>/dev/null | jq -r .title 2>/dev/null || true)
```
If `CURRENT_TITLE` is empty (no open PR/MR), skip with message "No PR/MR found — skipping title sync."
3. Compute the corrected title using the shared helper (single source of truth — same one `/ship` uses):
```bash
NEW_TITLE=$(~/.claude/skills/gstack/bin/gstack-pr-title-rewrite.sh "$V" "$CURRENT_TITLE")
```
The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
4. If `NEW_TITLE` differs from `CURRENT_TITLE`, update it:
**If GitHub:**
```bash
gh pr edit --title "$NEW_TITLE"
```
**If GitLab:**
```bash
glab mr update -t "$NEW_TITLE"
```
5. If the edit command fails: warn "Could not update PR/MR title — documentation changes are still in the commit." and continue. Do not block on title sync failure.
**Structured doc health summary (final output):**
Output a scannable summary showing every documentation file's status:
```
Documentation health:
README.md [status] ([details])
ARCHITECTURE.md [status] ([details])
CONTRIBUTING.md [status] ([details])
CHANGELOG.md [status] ([details])
TODOS.md [status] ([details])
VERSION [status] ([details])
```
Where status is one of:
- Updated — with description of what changed
- Current — no changes needed
- Voice polished — wording adjusted
- Not bumped — user chose to skip
- Already bumped — version was set by /ship
- Skipped — file does not exist
If the coverage map from Step 1.5 identified any gaps, append:
```
Documentation coverage:
[entity] [reference] [how-to] [tutorial] [explanation]
/new-skill ✅ ❌ ❌ ❌
--new-flag ✅ ✅ ❌ ❌
Diagram drift:
ARCHITECTURE.md: "FooProcessor" renamed to "BarProcessor" in code — diagram may be stale
```
If all coverage is complete and no diagrams drifted, output: "Coverage: all shipped features have adequate documentation."
+27 -4
View File
@@ -48,6 +48,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -127,7 +130,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -299,11 +302,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -383,7 +406,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+7
View File
@@ -0,0 +1,7 @@
#!/usr/bin/env bash
# Bash shim — Claude Code hooks run `command` strings via /bin/sh, so this
# wrapper makes the TypeScript hook executable via bun. Settings.json
# references this file directly.
set -e
HERE="$(cd "$(dirname "$0")" && pwd)"
exec bun "$HERE/auq-error-fallback-hook.ts"
+202
View File
@@ -0,0 +1,202 @@
#!/usr/bin/env bun
/**
* PostToolUse hook for AskUserQuestion runtime reliability layer for the
* AUQ-failure prose fallback (OV3:B).
*
* When an AskUserQuestion call comes back as an ERROR / missing result (the
* Conductor MCP bug returns `[Tool result missing due to internal error]`), this
* hook injects `additionalContext` reminding the model of the failure-fallback
* rule, tailored to the session kind. It does NOT render the prose itself the
* model still emits it. The hook only guarantees the reminder fires at the moment
* of failure, instead of relying on the model noticing the error result and
* recalling the echoed SESSION_KIND.
*
* DEFENSIVE / INERT-IF-UNSUPPORTED: it is unverified whether Claude Code invokes
* PostToolUse hooks when an MCP tool returns a transport/missing-result error (we
* could not force that Conductor-internal failure in a harness see
* docs/spikes/claude-code-hook-mutation.md §"PostToolUse on tool error"). If the
* platform does NOT fire the hook on that path, this is simply never invoked no
* harm; the prompt-level fallback in generate-ask-user-format.ts still covers it.
* On a SUCCESSFUL AskUserQuestion (a real answer), the hook defers (no output).
*
* Triggered by ~/.claude/settings.json (registered by `setup` next to
* question-log-hook):
* PostToolUse matcher "(AskUserQuestion|mcp__.*__AskUserQuestion)"
*
* Invariants:
* - Always exits 0. A failing hook MUST NOT block the user's session.
* - Never triggers on a successful answer (would corrupt a normal AUQ).
* - Errors land in ~/.gstack/hook-errors.log.
*/
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { spawnSync } from 'child_process';
interface HookStdin {
tool_name?: string;
tool_response?: unknown;
cwd?: string;
}
function stateRoot(): string {
return (
process.env.GSTACK_STATE_ROOT ||
process.env.GSTACK_HOME ||
path.join(os.homedir(), '.gstack')
);
}
function logHookError(msg: string): void {
try {
const sr = stateRoot();
fs.mkdirSync(sr, { recursive: true });
fs.appendFileSync(
path.join(sr, 'hook-errors.log'),
`${new Date().toISOString()} auq-error-fallback-hook: ${msg}\n`,
);
} catch {
// last-resort swallow
}
}
function readStdin(): Promise<string> {
return new Promise((resolve) => {
let buf = '';
process.stdin.setEncoding('utf-8');
process.stdin.on('data', (chunk) => (buf += chunk));
process.stdin.on('end', () => resolve(buf));
process.stdin.on('error', () => resolve(buf));
setTimeout(() => resolve(buf), 2000);
});
}
/** No-op output — let the tool result stand untouched. */
function defer(): void {
process.stdout.write(
JSON.stringify({ hookSpecificOutput: { hookEventName: 'PostToolUse' } }),
);
process.exit(0);
}
function inject(additionalContext: string): void {
process.stdout.write(
JSON.stringify({
hookSpecificOutput: { hookEventName: 'PostToolUse', additionalContext },
}),
);
process.exit(0);
}
/**
* Decide whether the tool_response is an ERROR / missing result rather than a
* real answer. Conservative: only flag clear failure shapes, so a successful
* AskUserQuestion (which carries the user's choice) is never misread as failure.
*/
export function isErrorResponse(response: unknown): boolean {
if (response === null || response === undefined) return true;
if (typeof response === 'string') {
const s = response.trim();
if (s === '') return true;
// Match ONLY the specific missing-result sentinel phrase, not any string that
// merely contains "error" — a real answer like "Investigate the internal error"
// must NOT trigger the fallback. (Codex review finding.)
return /tool result missing/i.test(s);
}
if (typeof response === 'object') {
const rec = response as Record<string, unknown>;
// Structured flag must be the boolean true — not the substring "is_error" inside
// a serialized success payload like '{"is_error": false}'.
if (rec.is_error === true || rec.isError === true) return true;
if (typeof rec.error === 'string' && rec.error.trim() !== '') return true;
// Some hosts wrap the payload as { content: "..." } or { content: [{text}] }.
const content = rec.content;
if (typeof content === 'string') return /tool result missing/i.test(content);
if (Array.isArray(content)) {
const text = content
.map((c) => (typeof c === 'string' ? c : (c as Record<string, unknown>)?.text ?? ''))
.join(' ');
return /tool result missing/i.test(text);
}
}
return false;
}
/** Resolve SESSION_KIND via the shared helper (same classification the preamble
* echoes). Falls back to 'interactive' (degrade-safe) on any failure. */
export function sessionKind(cwd?: string): 'spawned' | 'headless' | 'interactive' {
try {
const here = path.dirname(new URL(import.meta.url).pathname);
const bin = path.resolve(here, '..', '..', '..', 'bin', 'gstack-session-kind');
const res = spawnSync(bin, [], {
encoding: 'utf-8',
timeout: 3000,
cwd: cwd && fs.existsSync(cwd) ? cwd : undefined,
});
const out = (res.stdout || '').trim();
if (out === 'spawned' || out === 'headless' || out === 'interactive') return out;
} catch (e) {
logHookError(`sessionKind failed: ${(e as Error).message}`);
}
return 'interactive';
}
/** The directive injected per session kind. Exported for unit testing. */
export function directiveFor(kind: 'spawned' | 'headless' | 'interactive'): string {
const lead =
'The AskUserQuestion call did not return a usable answer (error / missing result). ' +
'Per the AskUserQuestion failure-fallback rule: ';
switch (kind) {
case 'spawned':
return (
lead +
'SESSION_KIND=spawned — auto-choose the recommended option per the Spawned session block. ' +
'Do not emit prose, do not BLOCK.'
);
case 'headless':
return (
lead +
'SESSION_KIND=headless — report `BLOCKED — AskUserQuestion unavailable` and stop; no human can answer.'
);
case 'interactive':
default:
return (
lead +
'SESSION_KIND=interactive — render the decision as a PROSE message now: a clear ELI10 of the issue, ' +
'then a Recommendation line, then ONE paragraph per choice carrying its `(recommended)` marker, its ' +
'`Completeness: X/10`, and 2-4 sentences of reasoning. Tell the user to reply with a letter, then STOP. ' +
'(Retry the call once first only if no answer could have surfaced.)'
);
}
}
async function main(): Promise<void> {
const raw = await readStdin();
if (!raw.trim()) return defer();
let stdin: HookStdin;
try {
stdin = JSON.parse(raw);
} catch (e) {
logHookError(`stdin parse failed: ${(e as Error).message}`);
return defer();
}
const toolName = stdin.tool_name || '';
if (toolName !== 'AskUserQuestion' && !/^mcp__.+__AskUserQuestion$/.test(toolName)) {
return defer();
}
if (!isErrorResponse(stdin.tool_response)) return defer();
inject(directiveFor(sessionKind(stdin.cwd)));
}
// Only run the stdin→stdout pipeline when executed as a hook, not when imported
// by the unit test (which exercises the exported pure functions).
if (import.meta.main) {
main().catch((e) => {
logHookError(`main crash: ${(e as Error).message}`);
defer();
});
}
+27 -4
View File
@@ -87,6 +87,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -166,7 +169,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -338,11 +341,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -422,7 +445,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -50,6 +50,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -129,7 +132,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -301,11 +304,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -385,7 +408,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -52,6 +52,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -131,7 +134,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -303,11 +306,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -387,7 +410,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -53,6 +53,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -132,7 +135,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -304,11 +307,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -388,7 +411,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -56,6 +56,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -135,7 +138,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -307,11 +310,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -391,7 +414,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -50,6 +50,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -129,7 +132,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -301,11 +304,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -385,7 +408,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -45,6 +45,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -124,7 +127,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -296,11 +299,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -380,7 +403,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -46,6 +46,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -125,7 +128,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -297,11 +300,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -381,7 +404,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -48,6 +48,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -127,7 +130,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -299,11 +302,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -383,7 +406,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+4 -1
View File
@@ -47,6 +47,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -162,7 +165,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
+27 -4
View File
@@ -83,6 +83,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -162,7 +165,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -334,11 +337,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -418,7 +441,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -45,6 +45,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -124,7 +127,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -296,11 +299,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -380,7 +403,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "gstack",
"version": "1.56.1.0",
"version": "1.57.3.0",
"description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.",
"license": "MIT",
"type": "module",
+27 -4
View File
@@ -47,6 +47,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -126,7 +129,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -298,11 +301,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -382,7 +405,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -77,6 +77,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -156,7 +159,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -328,11 +331,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -412,7 +435,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -49,6 +49,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -128,7 +131,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -300,11 +303,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -384,7 +407,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -55,6 +55,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -134,7 +137,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -306,11 +309,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -390,7 +413,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -53,6 +53,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -132,7 +135,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -304,11 +307,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -388,7 +411,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -58,6 +58,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -137,7 +140,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -309,11 +312,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -393,7 +416,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -48,6 +48,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -127,7 +130,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -299,11 +302,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -383,7 +406,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -54,6 +54,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -133,7 +136,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -305,11 +308,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -389,7 +412,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -65,6 +65,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -144,7 +147,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -316,11 +319,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -400,7 +423,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -50,6 +50,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -129,7 +132,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -301,11 +304,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -385,7 +408,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -46,6 +46,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -125,7 +128,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -297,11 +300,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -381,7 +404,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
@@ -9,11 +9,31 @@ export function generateAskUserFormat(_ctx: TemplateContext): string {
**Rule:** if any \`mcp__*__AskUserQuestion\` variant is in your tool list, prefer it. Hosts may disable native AUQ via \`--disallowedTools AskUserQuestion\` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report \`BLOCKED — AskUserQuestion unavailable\`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only \`/plan-tune\` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains \`[plan-tune auto-decide] <id> → <option>\` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug e.g. Conductor's MCP AskUserQuestion is flaky and returns \`[Tool result missing due to internal error]\`).
- If it was present and **errored** (not absent), retry the SAME call **once** but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on \`SESSION_KIND\` (echoed by the preamble; empty/absent ⇒ \`interactive\`):
- \`spawned\` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- \`headless\`\`BLOCKED — AskUserQuestion unavailable\`; stop and wait (no human can answer).
- \`interactive\` → **prose fallback** (below).
**Prose fallback render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not / bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** explicit \`Completeness: X/10\` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** a \`Recommendation: <choice> because <reason>\` line plus the \`(recommended)\` marker on that choice.
Layout: a \`D<N>\` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its \`(recommended)\` marker, its \`Completeness: X/10\`, and 2-4 sentences of reasoning — never a bare bullet list; a closing \`Net:\` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
\`\`\`
D<N> <one-line question title>
@@ -93,7 +113,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose unless the documented failure fallback applies (then: prose with the mandatory triad issue ELI10, per-choice Completeness, Recommendation + \`(recommended)\` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \\u-escaped
- [ ] If you had 5+ options, you split (or batched into 4-groups) did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
@@ -26,7 +26,7 @@ In plan mode, allowed because they inform the plan: \`$B\`, \`$D\`, \`codex exec
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant \`mcp__*__AskUserQuestion\` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report \`BLOCKED — AskUserQuestion unavailable\` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.`;
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant \`mcp__*__AskUserQuestion\` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: \`headless\` → BLOCKED; \`interactive\` the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.`;
}
export function generateCompletionStatus(ctx: TemplateContext): string {
@@ -33,6 +33,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(${ctx.paths.binDir}/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=\${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(${ctx.paths.binDir}/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(${ctx.paths.binDir}/gstack-config get telemetry 2>/dev/null || true)
+22 -2
View File
@@ -1317,6 +1317,7 @@ fi
# already registered under that tag, the install is a no-op (no prompt).
PLAN_TUNE_LOG_HOOK="$SOURCE_GSTACK_DIR/hosts/claude/hooks/question-log-hook"
PLAN_TUNE_PREF_HOOK="$SOURCE_GSTACK_DIR/hosts/claude/hooks/question-preference-hook"
AUQ_ERROR_FALLBACK_HOOK="$SOURCE_GSTACK_DIR/hosts/claude/hooks/auq-error-fallback-hook"
PLAN_TUNE_INSTALL_MARKER="$HOME/.gstack/.plan-tune-hooks-prompted"
if [ "$NO_TEAM_MODE" -ne 1 ] \
@@ -1324,9 +1325,13 @@ if [ "$NO_TEAM_MODE" -ne 1 ] \
&& [ -x "$PLAN_TUNE_LOG_HOOK" ] \
&& [ -x "$PLAN_TUNE_PREF_HOOK" ]; then
# Already installed? Check the settings.json for our source tag.
# Already installed? Require BOTH the plan-tune source AND the AUQ-error-fallback
# source — so an existing install that predates the fallback hook re-runs the
# install (which is idempotent for the plan-tune hooks) and picks up the new one.
ALREADY_INSTALLED=0
if "$SETTINGS_HOOK" list-sources 2>/dev/null | grep -q "plan-tune-cathedral"; then
_HOOK_SOURCES=$("$SETTINGS_HOOK" list-sources 2>/dev/null || true)
if printf '%s' "$_HOOK_SOURCES" | grep -q "plan-tune-cathedral" \
&& printf '%s' "$_HOOK_SOURCES" | grep -q "auq-error-fallback"; then
ALREADY_INSTALLED=1
fi
@@ -1364,6 +1369,21 @@ if [ "$NO_TEAM_MODE" -ne 1 ] \
--command "$PLAN_TUNE_PREF_HOOK" \
--source plan-tune-cathedral \
--timeout 5
# AskUserQuestion-failure prose-fallback reliability hook (OV3:B). Fires only when
# an AskUserQuestion call returns an error/missing result; inert on success and
# inert if the platform doesn't invoke PostToolUse on tool errors. MUST use its
# OWN source tag: gstack-settings-hook dedupes by (event, matcher, source) and
# REPLACES the entry's hooks, so sharing 'plan-tune-cathedral' would overwrite the
# question-log capture hook (same event+matcher). A distinct source = a second
# PostToolUse entry; both run in parallel.
if [ -x "$AUQ_ERROR_FALLBACK_HOOK" ]; then
"$SETTINGS_HOOK" add-event \
--event PostToolUse \
--matcher '(AskUserQuestion|mcp__.*__AskUserQuestion)' \
--command "$AUQ_ERROR_FALLBACK_HOOK" \
--source auq-error-fallback \
--timeout 5
fi
}
if [ "$ALREADY_INSTALLED" -eq 1 ]; then
+4 -1
View File
@@ -42,6 +42,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -121,7 +124,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
+27 -4
View File
@@ -49,6 +49,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -128,7 +131,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -300,11 +303,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -384,7 +407,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -48,6 +48,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -127,7 +130,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -299,11 +302,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -383,7 +406,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+29 -4
View File
@@ -50,6 +50,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -129,7 +132,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -301,11 +304,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -385,7 +408,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
@@ -1202,6 +1225,8 @@ git push -u origin <branch-name>
---
**PR/MR title invariant (always applies — do not skip even if you don't open the section below):** Any PR or MR you create OR update in the next step MUST have a title that starts with `v$NEW_VERSION` (the version bumped in Step 12), in the format `v<NEW_VERSION> <type>: <summary>`. Never create or edit a PR/MR title without this prefix. Compute the correct title with the single source of truth helper: `~/.claude/skills/gstack/bin/gstack-pr-title-rewrite.sh "$NEW_VERSION" "<current title>"`. The full create/update procedure (idempotency, redaction scan, self-check) is in the section below.
> **STOP.** Before syncing docs and creating or updating the PR/MR (Steps 18-19), Read `~/.claude/skills/gstack/ship/sections/pr-body.md` and execute it
> in full. Do not work from memory — that section is the source of truth for this step.
+2
View File
@@ -395,6 +395,8 @@ git push -u origin <branch-name>
---
**PR/MR title invariant (always applies — do not skip even if you don't open the section below):** Any PR or MR you create OR update in the next step MUST have a title that starts with `v$NEW_VERSION` (the version bumped in Step 12), in the format `v<NEW_VERSION> <type>: <summary>`. Never create or edit a PR/MR title without this prefix. Compute the correct title with the single source of truth helper: `~/.claude/skills/gstack/bin/gstack-pr-title-rewrite.sh "$NEW_VERSION" "<current title>"`. The full create/update procedure (idempotency, redaction scan, self-check) is in the section below.
{{SECTION:pr-body}}
## Step 20: Persist ship metrics
+2 -2
View File
@@ -97,8 +97,8 @@ you missed it.>
## Linked Spec
<Auto-detect: look for /spec archives matching this branch via:
eval "$(${ctx.paths.binDir}/gstack-paths)"
eval "$(${ctx.paths.binDir}/gstack-slug)"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
eval "$(~/.claude/skills/gstack/bin/gstack-slug)"
CURRENT_BRANCH=$(git branch --show-current)
SPEC_ARCHIVES="$GSTACK_STATE_ROOT/projects/$SLUG/specs"
# Find newest archive whose spec_branch frontmatter matches current branch (or one of its
+2 -2
View File
@@ -95,8 +95,8 @@ you missed it.>
## Linked Spec
<Auto-detect: look for /spec archives matching this branch via:
eval "$(${ctx.paths.binDir}/gstack-paths)"
eval "$(${ctx.paths.binDir}/gstack-slug)"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
eval "$(~/.claude/skills/gstack/bin/gstack-slug)"
CURRENT_BRANCH=$(git branch --show-current)
SPEC_ARCHIVES="$GSTACK_STATE_ROOT/projects/$SLUG/specs"
# Find newest archive whose spec_branch frontmatter matches current branch (or one of its
+27 -4
View File
@@ -46,6 +46,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -125,7 +128,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -297,11 +300,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -381,7 +404,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+54 -8
View File
@@ -47,6 +47,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -126,7 +129,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -298,11 +301,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -382,7 +405,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
@@ -1045,6 +1068,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -1124,7 +1150,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -1296,11 +1322,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -1380,7 +1426,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -48,6 +48,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -127,7 +130,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -299,11 +302,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -383,7 +406,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+131
View File
@@ -0,0 +1,131 @@
/**
* auq-error-fallback-hook the OV3:B runtime reliability layer.
*
* Two layers of testing:
* - PURE functions (isErrorResponse, directiveFor): deterministic, the core logic.
* - INTEGRATION: spawn the hook as a PostToolUse process with synthetic stdin and
* a controlled env, assert it injects the right directive on an error result and
* stays inert on a real answer.
*
* NOTE: whether the Claude Code PLATFORM invokes PostToolUse on an MCP
* transport/missing-result error is unverified (could not force the Conductor
* bug in a harness see docs/spikes/claude-code-hook-mutation.md). These tests
* pin the hook's BEHAVIOR given it is invoked; the platform trigger is the
* documented residual risk. The hook is inert if never invoked.
*/
import { describe, test, expect } from 'bun:test';
import { spawnSync } from 'child_process';
import * as path from 'path';
import { isErrorResponse, directiveFor } from '../hosts/claude/hooks/auq-error-fallback-hook.ts';
const HOOK = path.resolve(__dirname, '..', 'hosts', 'claude', 'hooks', 'auq-error-fallback-hook.ts');
describe('isErrorResponse — only clear failures, never a real answer', () => {
test('null / undefined / empty string are failures', () => {
expect(isErrorResponse(null)).toBe(true);
expect(isErrorResponse(undefined)).toBe(true);
expect(isErrorResponse('')).toBe(true);
expect(isErrorResponse(' ')).toBe(true);
});
test('the Conductor missing-result string is a failure', () => {
expect(isErrorResponse('[Tool result missing due to internal error]')).toBe(true);
});
test('is_error: true / error-field / sentinel-in-content are failures', () => {
expect(isErrorResponse({ is_error: true })).toBe(true);
expect(isErrorResponse({ isError: true })).toBe(true);
expect(isErrorResponse({ error: 'boom' })).toBe(true);
expect(isErrorResponse({ content: 'Tool result missing due to internal error' })).toBe(true);
});
test('a real answer is NOT a failure (no false trigger)', () => {
expect(isErrorResponse({ answers: [{ option_label: 'A' }] })).toBe(false);
expect(isErrorResponse('A')).toBe(false);
// a choice that coincidentally contains "error" must not trip it
expect(isErrorResponse({ answers: [{ option_label: 'Fix the error' }] })).toBe(false);
expect(isErrorResponse('Investigate the login error')).toBe(false);
});
test('Codex review: narrow detection — generic "error"/"is_error" substrings do NOT trigger', () => {
// A real answer mentioning "internal error" must not be read as a failure.
expect(isErrorResponse('Investigate the internal error')).toBe(false);
// A serialized success payload containing the substring is_error:false must not trigger.
expect(isErrorResponse('{"is_error": false, "answer": "A"}')).toBe(false);
expect(isErrorResponse({ is_error: false })).toBe(false);
expect(isErrorResponse({ content: 'The page had an internal error we fixed' })).toBe(false);
});
});
describe('directiveFor — per-session-kind instruction', () => {
test('interactive directive demands the prose triad', () => {
const d = directiveFor('interactive');
expect(d).toMatch(/ELI10/);
expect(d).toMatch(/Completeness: X\/10/);
expect(d).toMatch(/\(recommended\)/);
expect(d).toMatch(/reply with a letter/i);
expect(d).toMatch(/STOP/);
});
test('headless directive BLOCKs', () => {
expect(directiveFor('headless')).toMatch(/BLOCKED — AskUserQuestion unavailable/);
});
test('spawned directive auto-chooses', () => {
expect(directiveFor('spawned')).toMatch(/auto-choose/i);
});
});
/** Spawn the hook with synthetic stdin + controlled env; parse its JSON stdout. */
function runHook(stdin: object, env: Record<string, string>): { additionalContext?: string } {
const res = spawnSync('bun', [HOOK], {
input: JSON.stringify(stdin),
encoding: 'utf-8',
env: { PATH: process.env.PATH ?? '/usr/bin:/bin', ...env },
});
const parsed = JSON.parse(res.stdout || '{}');
return parsed.hookSpecificOutput ?? {};
}
describe('hook integration — invoked as PostToolUse', () => {
test('error result + headless env → injects BLOCK directive', () => {
const out = runHook(
{ tool_name: 'mcp__conductor__AskUserQuestion', tool_response: '[Tool result missing due to internal error]' },
{ GSTACK_HEADLESS: '1' },
);
expect(out.additionalContext).toMatch(/BLOCKED — AskUserQuestion unavailable/);
});
test('error result + interactive env → injects prose-triad directive', () => {
const out = runHook(
{ tool_name: 'AskUserQuestion', tool_response: null },
{ CONDUCTOR_PORT: '55010' },
);
expect(out.additionalContext).toMatch(/render the decision as a PROSE message/i);
expect(out.additionalContext).toMatch(/Completeness: X\/10/);
});
test('error result + spawned env → injects auto-choose directive', () => {
const out = runHook(
{ tool_name: 'AskUserQuestion', tool_response: { is_error: true } },
{ OPENCLAW_SESSION: '1' },
);
expect(out.additionalContext).toMatch(/auto-choose/i);
});
test('SUCCESSFUL answer → no injection (inert on real answers)', () => {
const out = runHook(
{ tool_name: 'AskUserQuestion', tool_response: { answers: [{ option_label: 'A' }] } },
{ GSTACK_HEADLESS: '1' },
);
expect(out.additionalContext).toBeUndefined();
});
test('non-AUQ tool → defers (no injection)', () => {
const out = runHook(
{ tool_name: 'Bash', tool_response: null },
{ GSTACK_HEADLESS: '1' },
);
expect(out.additionalContext).toBeUndefined();
});
});
+5
View File
@@ -47,6 +47,11 @@ const MANDATORY: Array<{ name: string; re: RegExp }> = [
{ name: 'Completeness coverage rule', re: /Completeness\s*:/i },
{ name: 'kind-vs-coverage rule', re: /options differ in kind/i },
{ name: 'Self-check checklist', re: /Self-check before emitting/i },
// The runtime-failure fallback must be ALWAYS-LOADED too: when an AUQ call errors
// mid-skill, the model needs the prose-fallback rule in context that instant, not
// stranded in an on-demand section. Same guarantee as the format spec above.
{ name: 'AUQ-failure fallback subsection', re: /When AskUserQuestion is unavailable or a call fails/i },
{ name: 'fallback SESSION_KIND branch', re: /SESSION_KIND/ },
];
/** Per-skill AUQ rules that govern review-finding cadence. A carve may move
+22
View File
@@ -0,0 +1,22 @@
/**
* E1 carve-guard completeness meta-guard (GATE tier, free).
*
* Makes the carve gap impossible to reopen: every skill carved on disk (owns a
* sections/manifest.json) MUST be in the canonical CARVE_GUARDS registry, and
* vice-versa. Because the static (E2) and behavioral (T2) guards are data-driven
* FROM the registry, registry membership IS guard coverage so this set-parity
* check is the whole game (codex #2: no need to grep test source). Carve a 7th
* skill without a registry entry and this fails CI.
*/
import { describe, test, expect } from 'bun:test';
import * as path from 'path';
import { checkCompleteness } from './helpers/carve-guard-checks';
const ROOT = path.resolve(import.meta.dir, '..');
describe('carve-guard completeness (gate, free)', () => {
test('filesystem carved set == CARVE_GUARDS set, and every entry is consistent', () => {
expect(checkCompleteness(ROOT)).toEqual([]);
});
});
+100
View File
@@ -0,0 +1,100 @@
/**
* ET1 guard-of-guards negative tests (GATE tier, free).
*
* Proves the guards actually BITE. The happy-path E1/E2 tests prove the real
* skills pass; these prove a BROKEN carve fails. Without this, a logic bug in
* checkOrdering/checkCompleteness would pass silently and protect nothing the
* exact silent-pass failure class this whole effort exists to kill.
*
* The checks take an injectable `root` (codex #5), so we point the REAL guard
* functions at a temp fixture dir broken three ways not at a wrapper.
*/
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import type { CarveGuard } from './helpers/carve-guards';
import { checkOrdering, checkCompleteness, discoverCarvedSkills } from './helpers/carve-guard-checks';
let root = '';
/** Write a syntactically-valid carved skill under `root`. */
function writeCarve(skill: string, opts: { stop: boolean; autoGen: boolean; leakBody: boolean }) {
const dir = path.join(root, skill);
const secDir = path.join(dir, 'sections');
fs.mkdirSync(secDir, { recursive: true });
fs.writeFileSync(
path.join(secDir, 'manifest.json'),
JSON.stringify({ skill, sections: [{ id: 'body', file: 'body.md', title: 'Body', trigger: 'doing the work' }] }),
);
const header = opts.autoGen ? '<!-- AUTO-GENERATED -->\n' : '';
fs.writeFileSync(path.join(secDir, 'body.md'), `${header}## Heavy Body\nThe real work lives here. MOVED_MARKER.\n`);
const stopLine = opts.stop ? '> **STOP.** Before doing the work, Read `sections/body.md` and execute it.\n' : '';
const leak = opts.leakBody ? 'MOVED_MARKER\n' : '';
fs.writeFileSync(
path.join(dir, 'SKILL.md'),
`# ${skill}\n## Step 0: Setup\nstays here\n## Section index\n| When | Read |\n${stopLine}${leak}## EXIT PLAN MODE GATE\n`,
);
}
const guardFor = (skill: string): CarveGuard => ({
skill,
expectedSections: ['body.md'],
requiredReads: ['body.md'],
scenario: 'do the work',
staticInvariants: {
mustStayInSkeleton: ['## Step 0: Setup'],
mustMoveToSection: ['MOVED_MARKER'],
gateAfterStop: 'EXIT PLAN MODE GATE',
},
maxSkeletonBytes: 999_999,
minUnionBytes: 0,
mustContain: [],
});
beforeAll(() => {
root = fs.mkdtempSync(path.join(os.tmpdir(), 'carve-neg-'));
});
afterAll(() => {
fs.rmSync(root, { recursive: true, force: true });
});
describe('guard-of-guards — the guards bite (gate, free)', () => {
test('a well-formed fixture carve passes checkOrdering (control)', () => {
writeCarve('goodskill', { stop: true, autoGen: true, leakBody: false });
expect(checkOrdering(root, guardFor('goodskill'))).toEqual([]);
fs.rmSync(path.join(root, 'goodskill'), { recursive: true, force: true });
});
test('E2 fails when the STOP-Read directive is removed', () => {
writeCarve('nostopskill', { stop: false, autoGen: true, leakBody: false });
const failures = checkOrdering(root, guardFor('nostopskill'));
expect(failures.some((f) => f.includes('no STOP-Read directive'))).toBe(true);
fs.rmSync(path.join(root, 'nostopskill'), { recursive: true, force: true });
});
test('E2 fails when heavy body leaks back into the skeleton', () => {
writeCarve('leakskill', { stop: true, autoGen: true, leakBody: true });
const failures = checkOrdering(root, guardFor('leakskill'));
expect(failures.some((f) => f.includes('still in the skeleton'))).toBe(true);
fs.rmSync(path.join(root, 'leakskill'), { recursive: true, force: true });
});
test('E2 fails when a section is hand-edited (no AUTO-GENERATED header)', () => {
writeCarve('handeditskill', { stop: true, autoGen: false, leakBody: false });
const failures = checkOrdering(root, guardFor('handeditskill'));
expect(failures.some((f) => f.includes('hand-edited'))).toBe(true);
fs.rmSync(path.join(root, 'handeditskill'), { recursive: true, force: true });
});
test('E1 fails when a skill is carved on disk but missing from the registry', () => {
writeCarve('unregisteredskill', { stop: true, autoGen: true, leakBody: false });
// Discovery sees it...
expect(discoverCarvedSkills(root)).toContain('unregisteredskill');
// ...and completeness flags it as an unguarded carve.
const failures = checkCompleteness(root);
expect(failures.some((f) => f.includes('unregisteredskill') && f.includes('NOT in CARVE_GUARDS'))).toBe(true);
fs.rmSync(path.join(root, 'unregisteredskill'), { recursive: true, force: true });
});
});
+97
View File
@@ -0,0 +1,97 @@
/**
* T2 data-driven behavioral section-loading guard (PERIODIC tier, paid, SDK capture).
*
* The behavioral proof that a REAL agent actually Reads each carved skill's
* required sections at runtime not just that the skeleton structure looks right
* (that's E2, free, per-PR). One file iterating the canonical CARVE_GUARDS
* registry (EQ2): registry membership IS the test, so "registered ⇒ asserted" is
* structural a carve can't be registered yet behaviorally unguarded.
*
* Per codex refined-plan pass:
* #2 ONE test() per skill, each with its own timeout + named failure output;
* a hung claude -p fails only its skill, not the whole file.
* #3 / D-CODEX(A) GSTACK_CARVE_SKILL=<name> runs only that skill's case, so
* the touchfile selector can scope cost to the changed skill; unset runs all.
* #7 each case drives the run with the registry's `scenario` (built to force
* the STOP-Read path) and asserts the required sections were Read.
*
* 'external' skills (ship, plan-ceo-review) have bespoke fixtures (git state,
* Step-0 mode loop) and keep their dedicated tests; E1 asserts those exist.
*/
import { describe, test, expect } from 'bun:test';
import { setupSkillDir, skillFromWorktree, captureSectionReads } from './helpers/auq-sdk-capture';
import { CARVE_GUARDS } from './helpers/carve-guards';
const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'periodic';
const describeE2E = shouldRun ? describe : describe.skip;
const runId = `carve-section-loading-${process.env.EVALS_RUN_ID ?? 'local'}`;
const only = process.env.GSTACK_CARVE_SKILL?.trim();
// A generic plan fixture for 'plan' behavioral skills (the review family).
const PLAN_MD = [
'# Plan: add an in-memory cache layer',
'',
'## Context',
'Reads hit the DB on every request. Add a process-local LRU cache in front of the',
'read path to cut DB load.',
'',
'## Approach',
'- Wrap the read repository in a cache that stores the last 1000 keys.',
'- Invalidate on write.',
'',
'## Out of scope',
'Distributed cache, cross-process coherence.',
'',
].join('\n');
describeE2E('carve behavioral section-loading (periodic, SDK capture)', () => {
for (const guard of Object.values(CARVE_GUARDS)) {
// 'external' carves keep their dedicated bespoke tests (E1 verifies those exist).
if (guard.behavioral === 'external') continue;
// Cost-scoped selection: when GSTACK_CARVE_SKILL is set, run only that skill.
if (only && only !== guard.skill) continue;
test(
`${guard.skill}: a real run Reads ${guard.requiredReads.join(', ')}`,
async () => {
const { skillMd, sectionsFrom } = skillFromWorktree(guard.skill);
const fixtures = guard.behavioral === 'plan' ? { 'PLAN.md': PLAN_MD } : {};
const planDir = setupSkillDir({
skillName: guard.skill,
skillMd,
sectionsFrom,
fixtures,
tmpPrefix: `gstack-${guard.skill}-secload-`,
});
const { readSections, reportProduced, output } = await captureSectionReads({
planDir,
skillName: guard.skill,
scenario: guard.scenario,
reportMarker: /report|review|summary|design doc|handoff/i,
testName: `${guard.skill} section-loading`,
runId,
});
const missing = guard.requiredReads.filter((s) => !readSections.has(s));
// Named failure output (codex #2): skill + expected + observed.
expect({
skill: guard.skill,
reportProduced,
expected: guard.requiredReads,
observed: [...readSections],
missing,
}).toEqual({
skill: guard.skill,
reportProduced: true,
expected: guard.requiredReads,
observed: expect.any(Array),
missing: [],
});
expect(output.trim().length).toBeGreaterThan(200);
},
360_000,
);
}
});
+27
View File
@@ -0,0 +1,27 @@
/**
* E2 carve static ordering guard (GATE tier, free, deterministic).
*
* The per-PR mechanical backstop for EVERY carved skill: it fails CI the moment a
* regen drops/weakens a skeleton's STOP-Read directive, strands a section, leaks
* heavy body back into the skeleton, or moves a post-STOP gate above the STOP.
*
* Data-driven from the canonical CARVE_GUARDS registry (EQ1) with per-skill
* invariants (codex outside-voice #3 NOT a copy of the ceo-specific test, which
* this generalizes and retires). One test() per skill so a failure names the skill.
*/
import { describe, test, expect } from 'bun:test';
import * as path from 'path';
import { CARVE_GUARDS } from './helpers/carve-guards';
import { checkOrdering } from './helpers/carve-guard-checks';
const ROOT = path.resolve(import.meta.dir, '..');
describe('carve static ordering (gate, free)', () => {
for (const guard of Object.values(CARVE_GUARDS)) {
test(`${guard.skill}: skeleton routes to sections correctly`, () => {
const failures = checkOrdering(ROOT, guard);
expect({ skill: guard.skill, failures }).toEqual({ skill: guard.skill, failures: [] });
});
}
});
+74 -53
View File
@@ -1,16 +1,22 @@
/**
* cso security-guidance preservation test (v1.45.0.0 T6).
* cso security-guidance preservation test.
*
* The cso skill carries load-bearing security prose: OWASP Top 10 mappings,
* STRIDE threat-model phrasing, "do not auto-fix without user approval"
* gates. Codex 2nd-pass critique #9: "cso exemption too broad ... should
* still get resolver dedup, catalog trim, sectioning if safe, and targeted
* evals around must-not-miss checks."
* cso carries load-bearing security prose: OWASP Top 10 mappings, STRIDE
* threat-model phrasing, mode dispatch, and false-positive-filtering exceptions
* that must NOT be auto-discarded.
*
* This test pins the must-not-miss checks. cso gets the same resolver gate
* (T2), jargon dedup (T3), and catalog trim (T4) as every other skill but
* its security-guidance body content stays intact. Future compression work
* that would strip this content fails CI here.
* cso is now carved (skeleton SKILL.md + sections/audit-phases.md). The
* scope-dependent audit phases (2-11) moved to the section; the mode dispatch
* (## Arguments, ## Mode Resolution), the always-run phases (0, 1), and the
* FP-filtering exceptions (Phase 12) stay always-loaded in the skeleton.
*
* Two distinct guarantees (codex outside-voice #5 earliest-use, not loose
* substrings):
* 1. PRESERVATION the security phrases survive somewhere in the union
* (skeleton + sections); a carve relocates, it never drops.
* 2. ALWAYS-LOADED CONTRACT dispatch + FP-filtering directives stay in the
* skeleton, and mode dispatch precedes any STOP-Read (a directive that
* decides which sections to read can't sit behind the STOP that reads them).
*/
import { describe, test, expect } from 'bun:test';
@@ -18,69 +24,84 @@ import * as fs from 'fs';
import * as path from 'path';
const REPO_ROOT = path.resolve(import.meta.dir, '..');
const CSO_SKILL = path.join(REPO_ROOT, 'cso', 'SKILL.md');
const CSO_DIR = path.join(REPO_ROOT, 'cso');
const CSO_SKELETON = path.join(CSO_DIR, 'SKILL.md');
const MUST_PRESERVE_PHRASES = [
// OWASP / STRIDE positioning
'OWASP',
'STRIDE',
// Mode discipline
'daily',
'comprehensive',
// Severity language
'confidence',
// Active verification requirement (codex critique: "active verification")
'verif', // covers "verify", "verification", "verified"
];
function readSkeleton(): string {
return fs.readFileSync(CSO_SKELETON, 'utf-8');
}
function readUnion(): string {
let text = readSkeleton();
const dir = path.join(CSO_DIR, 'sections');
if (fs.existsSync(dir)) {
for (const f of fs.readdirSync(dir).sort()) {
if (f.endsWith('.md') && !f.endsWith('.md.tmpl')) {
text += '\n' + fs.readFileSync(path.join(dir, f), 'utf-8');
}
}
}
return text;
}
const MUST_PRESERVE_HEADINGS = [
'## Preamble', // from PREAMBLE resolver
];
// Security content that must survive the carve (checked against the UNION).
const MUST_PRESERVE_PHRASES = ['OWASP', 'STRIDE', 'daily', 'comprehensive', 'confidence', 'verif'];
describe('cso skill preserves load-bearing security guidance', () => {
test('cso/SKILL.md exists and is non-trivial', () => {
expect(fs.existsSync(CSO_SKILL)).toBe(true);
const content = fs.readFileSync(CSO_SKILL, 'utf-8');
// cso is a content-heavy security skill; under 30 KB suggests stripping went too far.
expect(content.length).toBeGreaterThan(30_000);
test('cso skeleton exists and is non-trivial', () => {
expect(fs.existsSync(CSO_SKELETON)).toBe(true);
// Skeleton stays substantial: dispatch + always-run phases + FP filtering +
// report phases are all always-loaded. Under 30 KB means too much moved out.
expect(readSkeleton().length).toBeGreaterThan(30_000);
});
test('cso preserves required security phrases (case-insensitive)', () => {
const content = fs.readFileSync(CSO_SKILL, 'utf-8').toLowerCase();
const missing: string[] = [];
for (const phrase of MUST_PRESERVE_PHRASES) {
if (!content.includes(phrase.toLowerCase())) missing.push(phrase);
}
test('security phrases survive in the union (skeleton + sections)', () => {
const union = readUnion().toLowerCase();
const missing = MUST_PRESERVE_PHRASES.filter((p) => !union.includes(p.toLowerCase()));
if (missing.length > 0) {
throw new Error(
`cso/SKILL.md is missing required security phrases: ${missing.join(', ')}. ` +
`These are load-bearing for the skill's audit posture. If you intentionally ` +
`removed them, update this test with the new phrasing.`,
`cso union is missing required security phrases: ${missing.join(', ')}. ` +
`These are load-bearing. A carve relocates them; it must not drop them.`,
);
}
});
test('cso preserves required headings', () => {
const content = fs.readFileSync(CSO_SKILL, 'utf-8');
for (const heading of MUST_PRESERVE_HEADINGS) {
expect(content).toContain(heading);
test('ALWAYS-LOADED: mode dispatch + FP-filtering stay in the skeleton', () => {
const skeleton = readSkeleton();
// Dispatch must be always-loaded — the agent resolves scope before reading sections.
expect(skeleton).toContain('## Arguments');
expect(skeleton).toContain('## Mode Resolution');
// FP-filtering with its critical exceptions is mandatory and must not be on-demand.
expect(skeleton).toContain('Phase 12');
// The "SKILL.md files are NOT documentation" exception is a must-not-miss
// security directive (skill supply-chain findings); it stays always-loaded.
expect(skeleton).toContain('NOT documentation');
});
test('EARLIEST-USE: mode dispatch precedes any STOP-Read directive (codex #6)', () => {
const skeleton = readSkeleton();
const stop = skeleton.indexOf('> **STOP.**');
const modeRes = skeleton.indexOf('## Mode Resolution');
const args = skeleton.indexOf('## Arguments');
expect(modeRes).toBeGreaterThan(-1);
expect(args).toBeGreaterThan(-1);
if (stop >= 0) {
// A dispatch directive stranded after the STOP can't govern which sections to read.
expect(args).toBeLessThan(stop);
expect(modeRes).toBeLessThan(stop);
}
});
test('cso catalog trim landed (frontmatter description ≤ 200 chars)', () => {
const content = fs.readFileSync(CSO_SKILL, 'utf-8');
const content = readSkeleton();
const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
expect(fmMatch).not.toBeNull();
const fm = fmMatch![1];
const descMatch = fm.match(/^description:\s+(.+)$/m);
expect(descMatch).not.toBeNull();
const desc = descMatch![1].trim();
expect(desc.length).toBeLessThanOrEqual(200);
expect(desc).toContain('(gstack)');
const desc = fmMatch![1].match(/^description:\s+(.+)$/m);
expect(desc).not.toBeNull();
expect(desc![1].trim().length).toBeLessThanOrEqual(200);
expect(desc![1]).toContain('(gstack)');
});
test('cso routing prose moved to "## When to invoke" body section', () => {
const content = fs.readFileSync(CSO_SKILL, 'utf-8');
expect(content).toContain('## When to invoke this skill');
expect(readSkeleton()).toContain('## When to invoke this skill');
});
});
+14 -1
View File
@@ -14,7 +14,20 @@ import { HOST_PATHS } from "../scripts/resolvers/types";
import { PATTERNS } from "../lib/redact-patterns";
const ROOT = path.resolve(import.meta.dir, "..");
const CSO = fs.readFileSync(path.join(ROOT, "cso", "SKILL.md"), "utf-8");
// cso is carved (skeleton + sections/audit-phases.md). The Secrets Archaeology
// prose + secret prefixes moved into the section; check the union so relocated
// content still counts.
function unionSkill(skill: string): string {
let t = fs.readFileSync(path.join(ROOT, skill, "SKILL.md"), "utf-8");
const dir = path.join(ROOT, skill, "sections");
if (fs.existsSync(dir)) {
for (const f of fs.readdirSync(dir).sort()) {
if (f.endsWith(".md") && !f.endsWith(".md.tmpl")) t += "\n" + fs.readFileSync(path.join(dir, f), "utf-8");
}
}
return t;
}
const CSO = unionSkill("cso");
const ctx = { skillName: "cso", tmplPath: "", host: "claude" as const, paths: HOST_PATHS["claude"] };
describe("cso/spec taxonomy alignment", () => {
+15 -1
View File
@@ -6,7 +6,21 @@ import * as fs from "fs";
import * as path from "path";
const ROOT = path.resolve(import.meta.dir, "..");
const RELEASE = fs.readFileSync(path.join(ROOT, "document-release", "SKILL.md.tmpl"), "utf-8");
// document-release is carved (skeleton + sections/release-body.md). Step 9
// (commit + PR-body redaction scan) moved into the section template; check the
// union of SKILL.md.tmpl + sections/*.md.tmpl so the scan-before-edit ordering
// still verifies. document-generate is NOT carved (plain .md.tmpl).
function unionTmpl(skill: string): string {
let t = fs.readFileSync(path.join(ROOT, skill, "SKILL.md.tmpl"), "utf-8");
const dir = path.join(ROOT, skill, "sections");
if (fs.existsSync(dir)) {
for (const f of fs.readdirSync(dir).sort()) {
if (f.endsWith(".md.tmpl")) t += "\n" + fs.readFileSync(path.join(dir, f), "utf-8");
}
}
return t;
}
const RELEASE = unionTmpl("document-release");
const GENERATE = fs.readFileSync(path.join(ROOT, "document-generate", "SKILL.md.tmpl"), "utf-8");
describe("/document-release redaction", () => {
+27 -4
View File
@@ -50,6 +50,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -129,7 +132,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -301,11 +304,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -385,7 +408,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -36,6 +36,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <($GSTACK_BIN/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$($GSTACK_BIN/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$($GSTACK_BIN/gstack-config get telemetry 2>/dev/null || true)
@@ -115,7 +118,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -287,11 +290,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -371,7 +394,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+27 -4
View File
@@ -38,6 +38,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <($GSTACK_BIN/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$($GSTACK_BIN/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$($GSTACK_BIN/gstack-config get telemetry 2>/dev/null || true)
@@ -117,7 +120,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -289,11 +292,31 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -373,7 +396,7 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
+26 -1
View File
@@ -1,4 +1,4 @@
import { describe, test, expect } from 'bun:test';
import { describe, test, expect, beforeAll } from 'bun:test';
import { COMMAND_DESCRIPTIONS } from '../browse/src/commands';
import { SNAPSHOT_FLAGS } from '../browse/src/snapshot';
import * as fs from 'fs';
@@ -2125,6 +2125,21 @@ describe('Factory generation (--host factory)', () => {
import { ALL_HOST_CONFIGS, getExternalHosts } from '../hosts/index';
describe('Parameterized host smoke tests', () => {
// Regenerate every external host up front so the per-host `--dry-run` freshness
// checks are deterministic. These host dirs (.agents/.factory/.cursor/...) are
// gitignored regenerated artifacts, so the freshness check is really an
// idempotency/determinism check — it still catches non-deterministic gen, but no
// longer flakes on stale-on-disk state left by a missing `gen --host all` prestep
// (the canonical `bun test` does not run one). The tracked-claude freshness test
// (`generated files are fresh`) runs earlier and is unaffected.
beforeAll(() => {
for (const h of getExternalHosts()) {
Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', h.name], {
cwd: ROOT, stdout: 'pipe', stderr: 'pipe',
});
}
});
for (const hostConfig of getExternalHosts()) {
describe(`${hostConfig.displayName} (--host ${hostConfig.name})`, () => {
const hostDir = path.join(ROOT, hostConfig.hostSubdir, 'skills');
@@ -2208,6 +2223,16 @@ describe('Parameterized host smoke tests', () => {
// ─── --host all tests ────────────────────────────────────────
describe('--host all', () => {
// Same determinism guard as the parameterized block: make external hosts fresh on
// disk so `--host all --dry-run` reports FRESH regardless of prior state.
beforeAll(() => {
for (const h of getExternalHosts()) {
Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', h.name], {
cwd: ROOT, stdout: 'pipe', stderr: 'pipe',
});
}
});
test('--host all generates for all registered hosts', () => {
const result = Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', 'all', '--dry-run'], {
cwd: ROOT, stdout: 'pipe', stderr: 'pipe',
+70
View File
@@ -0,0 +1,70 @@
/**
* gstack-session-kind classifies the session so skills know whether a human can
* answer an AskUserQuestion. Drives the AUQ-failure fallback branch:
* spawned auto-choose (orchestrator)
* headless BLOCK on AUQ failure
* interactive prose fallback on AUQ failure
*
* These permutations are the contract the resolver rule depends on. Run with a
* SCRUBBED env (the test process itself runs inside Conductor, so CONDUCTOR_* /
* CLAUDE_CODE_* would leak in and contaminate the classification).
*
* Free, deterministic, gate-tier.
*/
import { describe, test, expect } from 'bun:test';
import { execFileSync } from 'child_process';
import * as path from 'path';
const BIN = path.resolve(__dirname, '..', 'bin', 'gstack-session-kind');
/** Run the helper with ONLY the supplied env (plus PATH so bash resolves). */
function kind(env: Record<string, string>): string {
return execFileSync(BIN, [], {
env: { PATH: process.env.PATH ?? '/usr/bin:/bin', ...env },
encoding: 'utf-8',
}).trim();
}
describe('gstack-session-kind', () => {
test('OPENCLAW_SESSION → spawned (highest precedence)', () => {
expect(kind({ OPENCLAW_SESSION: '1' })).toBe('spawned');
// spawned wins even when other markers are also present
expect(kind({ OPENCLAW_SESSION: '1', GSTACK_HEADLESS: '1', CONDUCTOR_PORT: '5' })).toBe('spawned');
});
test('GSTACK_HEADLESS → headless', () => {
expect(kind({ GSTACK_HEADLESS: '1' })).toBe('headless');
});
test('CONDUCTOR_* → interactive (a human host is present)', () => {
expect(kind({ CONDUCTOR_WORKSPACE_PATH: '/tmp/ws' })).toBe('interactive');
expect(kind({ CONDUCTOR_PORT: '55010' })).toBe('interactive');
});
test('CLAUDE_CODE_ENTRYPOINT=cli → interactive', () => {
expect(kind({ CLAUDE_CODE_ENTRYPOINT: 'cli' })).toBe('interactive');
});
test('interactive host beats CI markers', () => {
expect(kind({ CONDUCTOR_PORT: '5', CI: '1' })).toBe('interactive');
});
test('CI / GITHUB_ACTIONS with no host → headless', () => {
expect(kind({ CI: '1' })).toBe('headless');
expect(kind({ GITHUB_ACTIONS: 'true' })).toBe('headless');
});
test('GSTACK_HEADLESS beats CONDUCTOR (explicit override wins)', () => {
expect(kind({ GSTACK_HEADLESS: '1', CONDUCTOR_PORT: '5' })).toBe('headless');
});
test('bare env → interactive (degrade-safe default)', () => {
expect(kind({})).toBe('interactive');
});
test('empty GSTACK_HEADLESS is treated as unset (interactive)', () => {
// The resolver/helper guard on -n, so an empty string must NOT mean headless —
// this is the opt-out path harness suites use to exercise the interactive branch.
expect(kind({ GSTACK_HEADLESS: '' })).toBe('interactive');
});
});
+7
View File
@@ -300,6 +300,13 @@ export async function runAgentSdkTest(
const queryImpl: QueryProvider = opts.queryProvider ?? query;
const model = opts.model ?? 'claude-opus-4-7';
// NOTE on GSTACK_HEADLESS: the SDK child inherits process.env, so headless
// classification for eval/E2E runs is set by the `test:gate` / `test:evals`
// package.json scripts (scoped to that invocation), NOT mutated here. We must not
// pass sdkOpts.env (it breaks the SDK auth pipeline — see CLAUDE.md) and must not
// mutate process.env ambiently (it would leak headless into later interactive-path
// tests in the same Bun process — Codex review finding).
let attempt = 0;
let lastErr: unknown = null;
+177
View File
@@ -0,0 +1,177 @@
/**
* Pure carve-guard check functions, with an injectable `root` (codex
* outside-voice #5, refined-plan pass) so the negative tests (T5) can point the
* REAL guards at a broken fixture dir instead of testing a wrapper.
*
* Used by:
* - test/carve-section-ordering.test.ts (E2) checkOrdering
* - test/carve-guard-completeness.test.ts (E1) discoverCarvedSkills + checkCompleteness
* - test/carve-guards-negative.test.ts (T5) both, against a fixture root
*
* Imports only the leaf data module (carve-guards.ts) + node stdlib no cycle.
*/
import * as fs from 'fs';
import * as path from 'path';
import { CARVE_GUARDS, type CarveGuard } from './carve-guards';
/** Every dir under `root` that owns a sections/manifest.json. Injectable for tests. */
export function discoverCarvedSkills(root: string): string[] {
return fs
.readdirSync(root, { withFileTypes: true })
.filter((d) => d.isDirectory())
.map((d) => d.name)
.filter((name) => fs.existsSync(path.join(root, name, 'sections', 'manifest.json')))
.sort();
}
function readSkeleton(root: string, skill: string): string {
return fs.readFileSync(path.join(root, skill, 'SKILL.md'), 'utf-8');
}
/** Skeleton + every sections/*.md unioned (relocated content still counts). */
function readUnion(root: string, skill: string): string {
let text = readSkeleton(root, skill);
const dir = path.join(root, skill, 'sections');
if (fs.existsSync(dir)) {
for (const f of fs.readdirSync(dir).sort()) {
if (f.endsWith('.md') && !f.endsWith('.md.tmpl')) {
text += '\n' + fs.readFileSync(path.join(dir, f), 'utf-8');
}
}
}
return text;
}
const STOP = '> **STOP.**';
/**
* Static ordering invariants for one carved skill. Returns a list of failure
* strings (empty = pass). Pure: takes `root` so it runs against the real repo or
* a fixture identically.
*/
export function checkOrdering(root: string, guard: CarveGuard): string[] {
const failures: string[] = [];
let skeleton: string;
try {
skeleton = readSkeleton(root, guard.skill);
} catch (err) {
return [`cannot read ${guard.skill}/SKILL.md: ${(err as Error).message}`];
}
const union = readUnion(root, guard.skill);
// 1. The skeleton routes to sections via a Section index + STOP-Read directives.
if (!skeleton.includes('## Section index')) {
failures.push('skeleton is missing the "## Section index" table');
}
if (!skeleton.includes(STOP)) {
failures.push('skeleton has no STOP-Read directive');
}
// 2. Every expected section is referenced by path AND generated (AUTO-GENERATED).
for (const file of guard.expectedSections) {
if (!skeleton.includes(`sections/${file}`)) {
failures.push(`skeleton does not reference sections/${file}`);
}
const secPath = path.join(root, guard.skill, 'sections', file);
if (!fs.existsSync(secPath)) {
failures.push(`section file missing: sections/${file}`);
} else if (!fs.readFileSync(secPath, 'utf-8').slice(0, 200).includes('AUTO-GENERATED')) {
failures.push(`sections/${file} is hand-edited (no AUTO-GENERATED header)`);
}
}
// 3. Pre-STOP anchors stay in the skeleton.
for (const anchor of guard.staticInvariants.mustStayInSkeleton) {
if (!skeleton.includes(anchor)) {
failures.push(`mustStayInSkeleton anchor missing from skeleton: "${anchor}"`);
}
}
// 3b. Earliest-use: dispatch directives must appear BEFORE the first STOP
// (codex #6 — a directive that governs which sections to read can't sit after
// the STOP that reads them).
const firstStopIdx = skeleton.indexOf(STOP);
for (const anchor of guard.staticInvariants.mustPrecedeStop ?? []) {
const at = skeleton.indexOf(anchor);
if (at < 0) {
failures.push(`mustPrecedeStop anchor missing from skeleton: "${anchor}"`);
} else if (firstStopIdx >= 0 && at > firstStopIdx) {
failures.push(`mustPrecedeStop anchor "${anchor}" appears AFTER the STOP (stranded)`);
}
}
// 4. Heavy body moved out of the skeleton but is preserved in the union.
for (const moved of guard.staticInvariants.mustMoveToSection) {
if (skeleton.includes(moved)) {
failures.push(`mustMoveToSection marker is still in the skeleton: "${moved}"`);
}
if (!union.includes(moved)) {
failures.push(`mustMoveToSection marker absent from the union (lost): "${moved}"`);
}
}
// 5. The post-STOP gate fires after the last STOP (review skills).
const gate = guard.staticInvariants.gateAfterStop;
if (gate) {
// Gate must fire after the LAST STOP (once all section work returns), not just
// the first — for multi-STOP skeletons a gate between two STOPs is stranded.
const lastStop = skeleton.lastIndexOf(STOP);
const lastGate = skeleton.lastIndexOf(gate);
if (lastGate < 0) {
failures.push(`gateAfterStop marker missing from skeleton: "${gate}"`);
} else if (lastStop >= 0 && lastGate < lastStop) {
failures.push(`gateAfterStop "${gate}" appears before the last STOP (stranded above it)`);
}
}
return failures;
}
/**
* Completeness (E1): the filesystem carved set must equal the registry set, both
* directions, and every registry entry must be internally consistent. Pure:
* takes `root`.
*/
export function checkCompleteness(root: string): string[] {
const failures: string[] = [];
const discovered = new Set(discoverCarvedSkills(root));
const registered = new Set(Object.keys(CARVE_GUARDS));
for (const skill of discovered) {
if (!registered.has(skill)) {
failures.push(`carved on disk but NOT in CARVE_GUARDS (unguarded carve): ${skill}`);
}
}
for (const skill of registered) {
if (!discovered.has(skill)) {
failures.push(`in CARVE_GUARDS but not carved on disk (stale registry entry): ${skill}`);
}
}
for (const [skill, g] of Object.entries(CARVE_GUARDS)) {
if (g.expectedSections.length === 0) {
failures.push(`${skill}: expectedSections is empty`);
}
if (g.requiredReads.length === 0) {
failures.push(`${skill}: requiredReads is empty (behavioral guard would be decorative)`);
}
for (const r of g.requiredReads) {
if (!g.expectedSections.includes(r)) {
failures.push(`${skill}: requiredRead "${r}" is not in expectedSections`);
}
}
// Behavioral guard exists: 'plan'/'prompt' are covered structurally by the
// data-driven loop (registry membership IS coverage); 'external' must name a
// dedicated test file that actually exists on disk.
if (g.behavioral === 'external') {
if (!g.externalTest) {
failures.push(`${skill}: behavioral 'external' but no externalTest path`);
} else if (!fs.existsSync(path.join(root, g.externalTest))) {
failures.push(`${skill}: externalTest missing on disk: ${g.externalTest}`);
}
}
}
return failures;
}
+290
View File
@@ -0,0 +1,290 @@
/**
* Canonical carved-skill guard registry the single source of truth for which
* skills are carved (skeleton SKILL.md + on-demand sections/*.md) and what each
* carve must guarantee.
*
* PURE LEAF DATA MODULE (codex outside-voice #1, refined-plan pass): this file
* has NO runtime imports `import type` only. parity-harness.ts and
* skill-size-budget.test.ts derive their carved-skill lists FROM here (no
* parallel hand-maintained lists), so a runtime import back into either of them
* would create a cycle. Keep it data.
*
* Consumers:
* - test/carve-section-ordering.test.ts (E2, gate) staticInvariants
* - test/carve-section-loading.test.ts (T2, periodic) requiredReads + scenario
* - test/carve-guard-completeness.test.ts (E1, gate) the set must equal the
* filesystem carved set
* - test/carve-guards-negative.test.ts (ET1, gate) injects a broken fixture
* - test/helpers/parity-harness.ts sectioned/maxSkeletonBytes/minBytes/mustContain
* - test/skill-size-budget.test.ts SECTIONS_EXTRACTED = CARVED_SKILLS
*
* Adding a carve = add one entry here (atomically, in the same commit as the
* skeleton + manifest + sections codex #4 so E1's bidirectional parity never
* false-positives mid-commit).
*/
/** Static (skeleton-shape) invariants the per-PR ordering guard (E2) asserts. */
export interface CarveStaticInvariants {
/**
* Substrings that MUST remain in the always-loaded skeleton. Empty = skip
* (the skill has no distinctive pre-STOP anchor worth pinning beyond the
* universal STOP/section-index checks E2 already runs).
*/
mustStayInSkeleton: string[];
/**
* Substrings that MUST appear in the skeleton BEFORE the first STOP-Read
* (earliest-use, codex #6). For cso: mode-dispatch directives (## Arguments,
* ## Mode Resolution) must be resolved before any section is read a dispatch
* directive stranded after the STOP can't govern which sections to read.
* Empty/undefined = skip (most skills).
*/
mustPrecedeStop?: string[];
/**
* Substrings that MUST be in the union (skeleton + sections) but MUST NOT be in
* the skeleton i.e. the heavy body that the carve relocated. Empty = skip.
*/
mustMoveToSection: string[];
/**
* If set, this marker must appear in the skeleton AFTER the last STOP-Read
* directive (e.g. the EXIT PLAN MODE GATE that fires once section work returns).
* Undefined = the skill has no post-STOP gate (operational/conversational carve).
*/
gateAfterStop?: string;
}
export interface CarveGuard {
skill: string;
/** Section .md filenames the manifest lists and the skeleton must STOP-Read. */
expectedSections: string[];
/**
* Sections the behavioral test (T2) asserts the agent actually Read when driven
* by `scenario`. A non-empty subset of expectedSections the ones the scenario
* is built to require. The registry owns this so "registered ⇒ asserted" is
* structural (codex #2), not policed.
*/
requiredReads: string[];
/**
* Fixture prompt that drives a real `claude -p` run down the STOP-Read path for
* this skill (codex #7). The behavioral test asserts the run reached the STOP
* (read requiredReads), not merely that nothing was read.
*/
scenario: string;
staticInvariants: CarveStaticInvariants;
/**
* How the behavioral guard (T2) exercises this skill:
* - 'plan' write a PLAN.md fixture, run the review against it
* - 'prompt' no fixture file; the scenario prompt alone drives the run
* - 'external' covered by a dedicated bespoke test (complex fixtures, e.g.
* ship's git/VERSION/CHANGELOG state). The data-driven loop
* skips it; E1 asserts `externalTest` exists instead.
*/
behavioral: 'plan' | 'prompt' | 'external';
/** Required when behavioral === 'external': path (repo-relative) to the dedicated test. */
externalTest?: string;
/** Parity: max bytes for the always-loaded skeleton (asserts the carve shrank it). */
maxSkeletonBytes: number;
/** Parity: min bytes for the skeleton+sections union (total behavior preserved). */
minUnionBytes: number;
/** Parity: content phrases the union must preserve. */
mustContain: string[];
/**
* Parity: optional per-skill override for the union size-growth ceiling vs the
* v1.53.0.0 baseline (default 1.05). Bumped only when a deliberate cross-cutting
* preamble feature legitimately grows a smaller carved skeleton past 5%.
*/
maxSizeRatio?: number;
}
export const CARVE_GUARDS: Record<string, CarveGuard> = {
ship: {
skill: 'ship',
expectedSections: [
'tests.md',
'test-coverage.md',
'plan-completion.md',
'review-army.md',
'greptile.md',
'adversarial.md',
'changelog.md',
'pr-body.md',
],
requiredReads: ['review-army.md', 'changelog.md'],
scenario:
'This is a FRESH version-changing ship: the branch has a real code change, VERSION still equals the base version (needs a bump), and CHANGELOG.md needs a new entry. Follow the skill flow for a version-changing ship: run the pre-landing review and prepare the CHANGELOG entry. Produce the ship plan / review report. Do NOT actually commit, push, or open a PR.',
staticInvariants: {
// The PR-title-version invariant MUST stay always-loaded: the v1.54.0.0
// carve stranded it in pr-body.md and PRs started landing with bare titles
// (CI backstop: test/pr-title-sync-workflow-safety.test.ts).
mustStayInSkeleton: ['v$NEW_VERSION', 'gstack-pr-title-rewrite'],
// ...while the full create/update procedure stays carved into pr-body.md
// (out of the skeleton, present in the union). Asserts BOTH PR paths
// survive: the create path and the idempotent update path.
mustMoveToSection: ['gh pr create --base', 'gh pr edit --title'],
// ship is operational (multi-STOP, not a plan review); no single post-STOP gate.
gateAfterStop: undefined,
},
behavioral: 'external',
externalTest: 'test/skill-e2e-ship-section-loading.test.ts',
maxSkeletonBytes: 90_000,
minUnionBytes: 120_000,
mustContain: ['VERSION', 'CHANGELOG', 'review', 'merge', 'PR'],
},
'plan-ceo-review': {
skill: 'plan-ceo-review',
expectedSections: ['review-sections.md'],
requiredReads: ['review-sections.md'],
scenario:
'Review the plan in PLAN.md. Hold the current scope (HOLD SCOPE mode) — do not challenge or expand scope. Run the full CEO review and produce the review report.',
staticInvariants: {
mustStayInSkeleton: ['## Step 0: Nuclear Scope Challenge'],
mustMoveToSection: ['### Section 1: Architecture Review', '## Mode Quick Reference'],
gateAfterStop: 'EXIT PLAN MODE GATE',
},
behavioral: 'external',
externalTest: 'test/skill-e2e-plan-ceo-review-section-loading.test.ts',
maxSkeletonBytes: 90_000,
minUnionBytes: 80_000,
mustContain: ['SCOPE EXPANSION', 'SELECTIVE EXPANSION', 'HOLD SCOPE', 'SCOPE REDUCTION'],
},
'plan-eng-review': {
skill: 'plan-eng-review',
expectedSections: ['review-sections.md'],
requiredReads: ['review-sections.md'],
scenario:
'Review the plan in PLAN.md. Accept the current scope. Run the full engineering review (architecture, code quality, tests, performance) and produce the review report.',
staticInvariants: {
mustStayInSkeleton: ['### Step 0: Scope Challenge'],
mustMoveToSection: ['### 1. Architecture review'],
gateAfterStop: 'EXIT PLAN MODE GATE',
},
behavioral: 'plan',
maxSkeletonBytes: 62_000,
minUnionBytes: 70_000,
mustContain: ['Architecture', 'Code Quality', 'Test', 'Performance'],
},
'plan-design-review': {
skill: 'plan-design-review',
expectedSections: ['review-sections.md'],
requiredReads: ['review-sections.md'],
scenario:
'Review the plan in PLAN.md for design and UX. Accept the current scope. Run the full design review passes and produce the review report.',
staticInvariants: {
mustStayInSkeleton: [],
mustMoveToSection: ['### Pass 1: Information Architecture'],
gateAfterStop: 'EXIT PLAN MODE GATE',
},
behavioral: 'plan',
maxSkeletonBytes: 82_000,
minUnionBytes: 70_000,
mustContain: ['design', 'visual'],
},
'plan-devex-review': {
skill: 'plan-devex-review',
expectedSections: ['review-sections.md'],
requiredReads: ['review-sections.md'],
scenario:
'Review the plan in PLAN.md for developer experience. Accept the current scope. Run the full DX review passes and produce the review report.',
staticInvariants: {
mustStayInSkeleton: [],
mustMoveToSection: ['### Pass 1: Getting Started Experience'],
gateAfterStop: 'EXIT PLAN MODE GATE',
},
behavioral: 'plan',
maxSkeletonBytes: 76_000,
minUnionBytes: 70_000,
mustContain: ['developer experience', 'Getting Started'],
},
'office-hours': {
skill: 'office-hours',
expectedSections: ['design-and-handoff.md'],
requiredReads: ['design-and-handoff.md'],
scenario:
'Run office hours for this product idea through to the end: have the diagnostic conversation, explore alternatives, then write the design doc and run the relationship handoff (Phases 5-6).',
staticInvariants: {
mustStayInSkeleton: [],
mustMoveToSection: [],
// office-hours is conversational; the design-doc/handoff section has no
// post-STOP review gate in the skeleton.
gateAfterStop: undefined,
},
behavioral: 'prompt',
maxSkeletonBytes: 96_000,
minUnionBytes: 70_000,
mustContain: ['design doc', 'problem statement'],
},
'document-release': {
skill: 'document-release',
expectedSections: ['release-body.md'],
requiredReads: ['release-body.md'],
scenario:
'A PR has shipped a new CLI flag and touched README.md and CHANGELOG.md. Skip the git pre-flight shell commands (assume the diff adds --new-flag and updates those two docs). Run the documentation workflow: build the coverage map, then audit the docs, apply updates, and polish the CHANGELOG voice. Produce the documentation health summary.',
staticInvariants: {
mustStayInSkeleton: ['## Step 1: Pre-flight', '## Step 1.5: Coverage Map'],
mustMoveToSection: ['## Step 2: Per-File Documentation Audit', '## Step 5: CHANGELOG Voice Polish'],
// Operational skill (no plan-mode review gate).
gateAfterStop: undefined,
},
behavioral: 'prompt',
maxSkeletonBytes: 50_000,
minUnionBytes: 55_000,
mustContain: ['CHANGELOG', 'Diataxis', 'coverage'],
// The AUQ-failure prose fallback (v1.57.2.0) adds ~2KB to every skill's
// always-loaded preamble; on this small carved skeleton that lands at ~5.9%
// over the pre-carve/pre-AUQ v1.53.0.0 baseline. Headroom for the
// cross-cutting addition; all other skills keep the strict 1.05 ceiling.
maxSizeRatio: 1.08,
},
'design-consultation': {
skill: 'design-consultation',
expectedSections: ['proposal-and-preview.md'],
requiredReads: ['proposal-and-preview.md'],
scenario:
'The user gave product context (a B2B analytics dashboard for ops teams) and declined the research phase. Skip browser/design tool setup. Proceed to build the complete design-system proposal, then write DESIGN.md. Produce the proposal and the DESIGN.md content.',
staticInvariants: {
mustStayInSkeleton: ['## Phase 0: Pre-checks', '## Phase 1: Product Context', '## Phase 2: Research'],
mustMoveToSection: ['## Phase 3: The Complete Proposal', '## Phase 6: Write DESIGN.md'],
gateAfterStop: undefined,
},
behavioral: 'prompt',
maxSkeletonBytes: 64_000,
minUnionBytes: 72_000,
mustContain: ['Typography', 'Color', 'Aesthetic Direction'],
},
cso: {
skill: 'cso',
expectedSections: ['audit-phases.md'],
requiredReads: ['audit-phases.md'],
scenario:
'Run a security audit on this repository in --owasp mode (OWASP Top 10 only). Resolve the mode, do the Phase 0 stack detection and Phase 1 attack-surface census, then run the scoped audit phases and produce the findings report. Skip any step that needs network access.',
staticInvariants: {
// Dispatch + always-run + FP-filtering phases are ALWAYS loaded (security).
mustStayInSkeleton: [
'## Arguments',
'## Mode Resolution',
'### Phase 0',
'### Phase 1',
'### Phase 12',
'### Phase 13',
'### Phase 14',
],
// Earliest-use: mode must be resolvable before any section is read (codex #6).
mustPrecedeStop: ['## Arguments', '## Mode Resolution'],
// Scope-dependent audit detail moved to the section.
mustMoveToSection: [
'### Phase 2: Secrets Archaeology',
'### Phase 9: OWASP Top 10 Assessment',
'### Phase 10: STRIDE Threat Model',
],
gateAfterStop: undefined,
},
behavioral: 'prompt',
maxSkeletonBytes: 70_000,
minUnionBytes: 72_000,
mustContain: ['OWASP', 'STRIDE', 'daily', 'comprehensive', 'verif'],
},
};
/** Sorted carved-skill names. Consumers derive their lists from this — no parallel lists. */
export const CARVED_SKILLS: readonly string[] = Object.freeze(
Object.keys(CARVE_GUARDS).sort(),
);
+32 -95
View File
@@ -22,6 +22,7 @@ import * as fs from 'fs';
import * as path from 'path';
import type { ParityBaseline, SkillBaselineEntry } from './capture-parity-baseline';
import { captureBaseline } from './capture-parity-baseline';
import { CARVE_GUARDS } from './carve-guards';
export interface ParityInvariant {
skill: string;
@@ -198,86 +199,13 @@ export function runParityChecks(opts: {
* Each entry pins what must-not-break in a skill family. Extend as future
* skills land. Phase B (v2.0.0.0) adds LLM-judge invariants on top of these.
*/
export const PARITY_INVARIANTS: ParityInvariant[] = [
{
skill: 'cso',
mustContain: ['OWASP', 'STRIDE', 'daily', 'comprehensive', 'verif'],
mustHaveHeadings: ['## Preamble', '## When to invoke'],
maxSizeRatio: 1.05,
minBytes: 30_000,
},
{
// Carved (v2 plan T9): skeleton SKILL.md + sections/*.md. Content checks run
// against the union (relocated phrases still count); size floors run against
// the union (total behavior preserved); maxSkeletonBytes asserts the
// always-loaded skeleton actually shrank from the ~167KB monolith.
skill: 'ship',
sectioned: true,
maxSkeletonBytes: 90_000,
mustContain: [
'VERSION',
'CHANGELOG',
'review',
'merge',
'PR',
],
mustHaveHeadings: ['## Preamble', '## When to invoke'],
maxSizeRatio: 1.05,
minBytes: 120_000,
},
{
// Carved (v2 plan T9): skeleton SKILL.md + sections/review-sections.md.
// Content + size floors run against the union (relocated prose still counts);
// maxSkeletonBytes asserts the always-loaded skeleton shrank from the ~138KB
// monolith to ~81KB (measured 80,731 B, -42%). Headroom to 90KB so a small
// skeleton edit doesn't trip CI, but a 10KB regression does.
skill: 'plan-ceo-review',
sectioned: true,
maxSkeletonBytes: 90_000,
mustContain: [
'SCOPE EXPANSION',
'SELECTIVE EXPANSION',
'HOLD SCOPE',
'SCOPE REDUCTION',
],
mustHaveHeadings: ['## Preamble', '## When to invoke'],
maxSizeRatio: 1.05,
minBytes: 80_000,
},
{
// Carved (v2 plan T9): skeleton + sections/review-sections.md. The 4-section
// review, outside voice, and required outputs moved to the section; content
// checks run against the union. Skeleton shrank 106,984 -> 54,892 B (-48.7%);
// maxSkeletonBytes 62KB = measured + headroom.
skill: 'plan-eng-review',
sectioned: true,
maxSkeletonBytes: 62_000,
mustContain: [
'Architecture',
'Code Quality',
'Test',
'Performance',
],
mustHaveHeadings: ['## Preamble', '## When to invoke'],
maxSizeRatio: 1.05,
minBytes: 70_000,
},
{
// Carved (v2 plan T9): skeleton + sections/review-sections.md. The 7 design
// passes + required outputs moved to the section; content checks run against
// the union. Skeleton shrank 112,057 -> 76,024 B (-32.2%); maxSkeletonBytes
// 82KB = measured + headroom.
skill: 'plan-design-review',
sectioned: true,
maxSkeletonBytes: 82_000,
mustContain: [
'design',
'visual',
],
mustHaveHeadings: ['## Preamble', '## When to invoke'],
maxSizeRatio: 1.05,
minBytes: 70_000,
},
/**
* Monolith (non-carved) invariants hand-written. Carved-skill invariants are
* generated from CARVE_GUARDS below (single source of truth), so they never drift
* from the size-budget / static / behavioral guards.
*/
const MONOLITH_INVARIANTS: ParityInvariant[] = [
// cso is now carved — its invariant is generated from CARVE_GUARDS below.
{
skill: 'review',
mustContain: ['confidence', 'P1', 'P2'],
@@ -299,21 +227,6 @@ export const PARITY_INVARIANTS: ParityInvariant[] = [
maxSizeRatio: 1.05,
minBytes: 30_000,
},
{
// Carved (v2 plan T9): skeleton SKILL.md + sections/design-and-handoff.md.
// Phase 5 (design doc) + Phase 6 (handoff) moved into the section, so
// 'design doc' / 'problem statement' now live there — content checks run
// against the union. maxSkeletonBytes asserts the always-loaded skeleton
// shrank from the ~118KB monolith to ~89KB (measured 88,975 B, -24.8%);
// headroom to 96KB so a small skeleton edit doesn't trip CI.
skill: 'office-hours',
sectioned: true,
maxSkeletonBytes: 96_000,
mustContain: ['design doc', 'problem statement'],
mustHaveHeadings: ['## Preamble', '## When to invoke'],
maxSizeRatio: 1.05,
minBytes: 70_000,
},
{
skill: 'autoplan',
mustContain: ['ceo', 'eng', 'design'],
@@ -322,3 +235,27 @@ export const PARITY_INVARIANTS: ParityInvariant[] = [
minBytes: 70_000,
},
];
/**
* Carved-skill invariants, GENERATED from the canonical CARVE_GUARDS registry
* (EQ1: single source of truth). Each carve's skeleton-shrink floor
* (maxSkeletonBytes), union floor (minUnionBytes), and content invariants
* (mustContain) live in carve-guards.ts; this just projects them into the parity
* shape. Adding a carve there auto-adds its union guard here which is how
* plan-devex-review (previously in SECTIONS_EXTRACTED but missing a sectioned
* parity invariant) is now guarded.
*/
const CARVED_INVARIANTS: ParityInvariant[] = Object.values(CARVE_GUARDS).map((g) => ({
skill: g.skill,
sectioned: true,
maxSkeletonBytes: g.maxSkeletonBytes,
minBytes: g.minUnionBytes,
mustContain: g.mustContain,
mustHaveHeadings: ['## Preamble', '## When to invoke'],
maxSizeRatio: g.maxSizeRatio ?? 1.05,
}));
export const PARITY_INVARIANTS: ParityInvariant[] = [
...MONOLITH_INVARIANTS,
...CARVED_INVARIANTS,
];
+3
View File
@@ -52,6 +52,9 @@ export class ClaudeAdapter implements ProviderAdapter {
timeout: opts.timeoutMs,
encoding: 'utf-8',
maxBuffer: 32 * 1024 * 1024,
// Default GSTACK_HEADLESS=1 so a benchmark run classifies as headless (an
// AskUserQuestion failure BLOCKs rather than emitting unanswerable prose).
env: { ...process.env, GSTACK_HEADLESS: '1' },
});
const parsed = this.parseOutput(out);
return {
+5 -1
View File
@@ -176,7 +176,11 @@ export async function runSkillTest(options: {
const proc = Bun.spawn(['sh', '-c', `cat "${promptFile}" | claude ${args.map(a => `"${a}"`).join(' ')}`], {
cwd: workingDirectory,
env: extraEnv ? { ...process.env, ...extraEnv } : undefined,
// Default GSTACK_HEADLESS=1 so eval/E2E runs classify as headless (BLOCK on an
// AskUserQuestion failure rather than emit a prose question no human reads). A
// suite exercising the INTERACTIVE prose-fallback path opts out by passing
// `env: { GSTACK_HEADLESS: '' }` — extraEnv wins because it spreads last.
env: { ...process.env, GSTACK_HEADLESS: '1', ...extraEnv },
stdout: 'pipe',
stderr: 'pipe',
});
+6
View File
@@ -123,6 +123,11 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
'ship-idempotency-pty': ['ship/**', 'bin/gstack-next-version', 'bin/gstack-version-bump', 'scripts/resolvers/sections.ts', 'lib/worktree.ts', 'test/helpers/claude-pty-runner.ts'],
'ship-section-loading': ['ship/**', 'scripts/resolvers/sections.ts', 'scripts/gen-skill-docs.ts', 'test/helpers/auq-sdk-capture.ts', 'test/helpers/session-runner.ts'],
'plan-ceo-section-loading': ['plan-ceo-review/**', 'scripts/resolvers/sections.ts', 'scripts/gen-skill-docs.ts', 'test/helpers/auq-sdk-capture.ts', 'test/helpers/session-runner.ts'],
// Data-driven behavioral guard for the 'plan'/'prompt' carves (eng, design,
// devex, office-hours + future PR2 carves). One file iterating CARVE_GUARDS;
// the selector sets GSTACK_CARVE_SKILL=<name> to scope cost to the changed
// skill (D-CODEX A). Touching the registry/helper or sections.ts runs all.
'carve-section-loading': ['plan-eng-review/**', 'plan-design-review/**', 'plan-devex-review/**', 'office-hours/**', 'document-release/**', 'design-consultation/**', 'cso/**', 'test/helpers/carve-guards.ts', 'scripts/resolvers/sections.ts', 'scripts/gen-skill-docs.ts', 'test/helpers/auq-sdk-capture.ts', 'test/helpers/session-runner.ts'],
'autoplan-chain-pty': ['autoplan/**', 'plan-ceo-review/**', 'plan-design-review/**', 'plan-eng-review/**', 'plan-devex-review/**', 'test/fixtures/plans/ui-heavy-feature.md', 'test/helpers/claude-pty-runner.ts'],
'e2e-harness-audit': ['plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**', 'plan-devex-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/claude-pty-runner.ts'],
@@ -512,6 +517,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
'ship-idempotency-pty': 'periodic', // ~$3/run, real /ship in plan mode
'ship-section-loading': 'periodic', // ~$3/run, real /ship; asserts section reads
'plan-ceo-section-loading': 'periodic', // ~$3-5/run, real /plan-ceo-review; asserts section read
'carve-section-loading': 'periodic', // ~$1-2/skill, data-driven; GSTACK_CARVE_SKILL scopes to one
'autoplan-chain-pty': 'periodic', // ~$8/run, all 3 phases sequential
// Per-finding count + review-report-at-bottom — periodic because each
+135
View File
@@ -0,0 +1,135 @@
/**
* pr-title-sync.yml is a `pull_request_target` workflow static injection
* tripwire (gate, free).
*
* The anxiety this kills: `pull_request_target` runs with a WRITE token in the
* base-repo context, even for fork PRs. That is what lets this workflow rewrite
* fork-PR titles (the backstop). It is also the single most dangerous workflow
* trigger in GitHub Actions. Two classic footguns turn it into remote code
* execution / token theft, and `actionlint` catches NEITHER:
*
* 1. Checking out the PR head (`actions/checkout` with a `ref:` pointing at
* `pull_request.head` / `head_ref`) and then running anything from it
* that executes attacker-controlled fork code with the write token.
* 2. Interpolating an attacker-controlled `${{ github.event.pull_request.* }}`
* field directly INSIDE a `run:` block the title/body are attacker-
* controlled and the `${{ }}` is expanded into the shell before execution,
* so a crafted title runs as code. Those fields MUST arrive via `env:` and
* be referenced as `"$VAR"` (shell-quoted), never inlined.
*
* This tripwire reads the workflow file directly and fails CI if either pattern
* reappears. Mirrors the static-grep invariant tests in browse/test
* (terminal-agent-pid-identity, server-sanitize-surrogates).
*
* Note: `gh api ... -q '.head.sha'` inside a run block is SAFE (reading PR
* metadata as data via a jq filter string, not `${{ }}` interpolation), so we
* ban the interpolation form specifically, not the literal substring `head.sha`.
*/
import { describe, test, expect } from 'bun:test';
import * as fs from 'node:fs';
import * as path from 'node:path';
const WORKFLOW = path.resolve(__dirname, '..', '.github', 'workflows', 'pr-title-sync.yml');
/** Indentation width (count of leading spaces) of a line. */
function indent(line: string): number {
const m = line.match(/^( *)/);
return m ? m[1].length : 0;
}
/**
* Return the lines that live inside a `run:` block, each tagged with its 1-based
* line number. Handles both `run: |` (multiline) and `run: <inline command>`.
*/
function runBlockLines(content: string): Array<{ n: number; text: string }> {
const lines = content.split('\n');
const out: Array<{ n: number; text: string }> = [];
let inRun = false;
let runIndent = -1;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const n = i + 1;
const inlineRun = line.match(/^(\s*)run:\s*(\S.*)$/); // `run: echo foo`
const blockRun = /^(\s*)run:\s*(\|>?[+-]?)?\s*$/.test(line); // `run: |`
if (inlineRun && !/^\|/.test(inlineRun[2])) {
out.push({ n, text: inlineRun[2] });
inRun = false;
continue;
}
if (blockRun) {
inRun = true;
runIndent = indent(line);
continue;
}
if (inRun) {
if (line.trim() === '') {
out.push({ n, text: line });
continue;
}
// Block ends when a non-empty line is indented at or below the `run:` key.
if (indent(line) <= runIndent) {
inRun = false;
} else {
out.push({ n, text: line });
}
}
}
return out;
}
describe('pr-title-sync.yml pull_request_target safety', () => {
const content = fs.readFileSync(WORKFLOW, 'utf-8');
test('workflow file exists', () => {
expect(fs.existsSync(WORKFLOW)).toBe(true);
});
test('does NOT check out the PR head ref (no fork-code execution)', () => {
const offenders: string[] = [];
content.split('\n').forEach((line, i) => {
// A checkout `ref:` (or any `ref:`) pointing at the PR head is the footgun.
if (/ref:\s*\$\{\{[^}]*(pull_request\.head|head_ref)/.test(line)) {
offenders.push(` L${i + 1}: ${line.trim()}`);
}
});
if (offenders.length > 0) {
throw new Error(
`pr-title-sync.yml checks out the PR head under pull_request_target — that ` +
`runs attacker-controlled fork code with a write token. Check out the base ` +
`repo (no ref:) and read PR-head data via the API instead.\n` +
offenders.join('\n'),
);
}
});
test('does NOT interpolate ${{ github.event.pull_request.* }} inside a run: block', () => {
const offenders: string[] = [];
for (const { n, text } of runBlockLines(content)) {
if (/\$\{\{\s*github\.event\.pull_request/.test(text)) {
offenders.push(` L${n}: ${text.trim()}`);
}
}
if (offenders.length > 0) {
throw new Error(
`pr-title-sync.yml inlines an attacker-controlled PR field into a run: block ` +
`— a crafted PR title/body executes as shell. Pass it via env: and ` +
`reference "$VAR" (shell-quoted) instead.\n` +
offenders.join('\n'),
);
}
});
test('uses pull_request_target (the hardening is actually present)', () => {
// Positive assertion: if someone reverts to plain pull_request, the fork
// backstop silently stops working (read-only token). Keep it intentional.
expect(/^on:\s*$/m.test(content) || /\bpull_request_target\b/.test(content)).toBe(true);
expect(content).toMatch(/\bpull_request_target\b/);
});
test('passes the PR title through env:, not raw interpolation', () => {
// The safe pattern: OLD_TITLE: ${{ github.event.pull_request.title }} in an
// env: mapping, consumed as "$OLD_TITLE" in script.
expect(content).toMatch(/env:/);
expect(content).toMatch(/github\.event\.pull_request\.title/);
});
});
+87
View File
@@ -16,6 +16,8 @@
* for the weekly periodic eval to notice.
*/
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import type { TemplateContext } from '../scripts/resolvers/types';
import { HOST_PATHS } from '../scripts/resolvers/types';
import { generateAskUserFormat } from '../scripts/resolvers/preamble/generate-ask-user-format';
@@ -161,3 +163,88 @@ describe('generateAskUserFormat — 5+ option split rule (slim inline + docs poi
expect(out).toContain('**Non-ASCII characters');
});
});
describe('generateAskUserFormat — runtime-failure prose fallback', () => {
const out = generateAskUserFormat(makeCtx());
test('documents the unavailable/failed subsection', () => {
expect(out).toMatch(/When AskUserQuestion is unavailable or a call fails/i);
});
test('carves out the auto-decide denial as NOT a failure', () => {
expect(out).toContain('[plan-tune auto-decide]');
expect(out).toMatch(/NOT a failure/i);
// and explicitly: do not fall back to prose on an auto-decide denial
expect(out).toMatch(/Do NOT[\s\S]{0,40}fall back to prose|never prose/i);
});
test('retries the errored call exactly once before degrading', () => {
expect(out).toMatch(/retry the SAME call \*\*once\*\*|retry the same call.*once/i);
// idempotency guard against double-prompting
expect(out).toMatch(/double-prompt|no answer could have surfaced/i);
});
test('branches on SESSION_KIND: spawned / headless / interactive', () => {
expect(out).toContain('SESSION_KIND');
expect(out).toMatch(/`spawned`[\s\S]*auto-choose/);
expect(out).toMatch(/`headless`[\s\S]*BLOCKED/);
expect(out).toMatch(/`interactive`[\s\S]*prose fallback/);
// empty/absent SESSION_KIND degrades to interactive
expect(out).toMatch(/empty\/absent[\s\S]{0,40}interactive/i);
});
// The mandatory triad the user explicitly required for the plain-text output.
test('prose fallback mandates the triad: issue ELI10', () => {
expect(out).toMatch(/ELI10 of the issue itself/i);
});
test('prose fallback mandates the triad: per-choice Completeness score', () => {
expect(out).toMatch(/Completeness scores per choice/i);
expect(out).toMatch(/Completeness: X\/10.*EACH choice|on EACH choice/i);
});
test('prose fallback mandates the triad: recommendation + (recommended) marker', () => {
expect(out).toMatch(/Recommendation: <choice> because/);
expect(out).toMatch(/\(recommended\)`? marker on that choice/);
});
test('prose fallback is one paragraph per choice, not a bare bullet list', () => {
expect(out).toMatch(/ONE paragraph per choice/i);
expect(out).toMatch(/never a bare bullet list/i);
});
test('prose fallback tells the user to reply with a letter, then STOP', () => {
expect(out).toMatch(/reply with a letter/i);
expect(out).toMatch(/STOP and wait/i);
});
// OV2: the former "tool_use, not prose" assertions must carry the qualifier so the
// fallback is not self-contradicting. Guards against the instruction collision
// silently returning on a future edit.
test('OV2: the Format line qualifies "not prose" with the fallback exception', () => {
expect(out).toMatch(/must be sent as tool_use, not prose — unless the documented failure fallback/);
});
test('OV2: the self-check "not writing prose" line carries the fallback qualifier', () => {
expect(out).toMatch(/not writing prose — unless the documented failure fallback applies/);
});
});
describe('CQ2 — cross-file invariant: auto-decide prefix matches the hook', () => {
const out = generateAskUserFormat(makeCtx());
const hookSrc = fs.readFileSync(
path.resolve(__dirname, '..', 'hosts', 'claude', 'hooks', 'question-preference-hook.ts'),
'utf-8',
);
test('the hook actually emits the [plan-tune auto-decide] prefix', () => {
expect(hookSrc).toContain('[plan-tune auto-decide]');
});
test('the resolver references the exact same prefix the hook emits', () => {
// If a future edit reworded the hook reason, this catches the drift: the prose
// fallback would stop recognizing the auto-decide denial as not-a-failure.
const PREFIX = '[plan-tune auto-decide]';
expect(hookSrc.includes(PREFIX) && out.includes(PREFIX)).toBe(true);
});
});
@@ -74,7 +74,7 @@ describeE2E('/plan-ceo-review section-loading E2E (periodic, SDK capture)', () =
'Review the plan in PLAN.md. Hold the current scope (HOLD SCOPE mode) — do not challenge or expand scope. Run the full CEO review and produce the review report.',
requiredSections: REQUIRED_SECTIONS,
reportMarker: /GSTACK REVIEW REPORT|COMPLETION SUMMARY|review/i,
testName: '/plan-ceo-review section-loading',
testName: 'plan-ceo-section-loading',
runId,
});

Some files were not shown because too many files have changed in this diff Show More