From aeea57f96a3f73e13b732f036dca5ed7ed7f7bdf Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 24 Apr 2026 02:11:24 -0700 Subject: [PATCH] v1.12.1.0 fix: remove vestigial plan-mode handshake (#1185) * refactor: remove vestigial plan-mode handshake resolver Delete scripts/resolvers/preamble/generate-plan-mode-handshake.ts and its four question-registry entries. Split the authoritative "Plan Mode Safe Operations" and "Skill Invocation During Plan Mode" sections out of generate-completion-status.ts into a sibling generatePlanModeInfo() export in the same module, wired at preamble position 1 where the handshake used to live. Same text, new position. The vestigial handshake told interactive review skills to emit an A=exit-and-rerun / C=cancel AskUserQuestion before running their interactive STOP-Ask workflow. That contradicted the authoritative rule at the tail of completion-status.ts saying AskUserQuestion satisfies plan mode's end-of-turn requirement. Skills now run directly when invoked in plan mode, with each finding gated by AskUserQuestion just like outside plan mode. Co-Authored-By: Claude Opus 4.7 (1M context) * test: rename plan-mode-handshake-helpers to plan-mode-helpers, strengthen smokes Rename test/helpers/plan-mode-handshake-helpers.ts to test/helpers/plan-mode-helpers.ts. Keep the write-guard helper that asserts no Write/Edit tool call before the first AskUserQuestion (this is what catches silent-bypass regressions the textual smoke can't see). Rename the API: runPlanModeHandshakeTest to runPlanModeSkillTest, assertHandshakeShape to assertNotHandshakeShape. Extend the capture struct with exitPlanModeBeforeAsk. Rewrite the four per-skill E2E tests (plan-ceo, plan-eng, plan-design, plan-devex) as smoke tests that assert the skill's Step 0 question fires first, not an A/C handshake. Each test picks a cheap first answer (HOLD, TRIAGE, numeric score) so the run terminates quickly. Keep test/skill-e2e-plan-mode-no-op.test.ts as the outside-plan-mode non-interference regression, per codex outside-voice review: deleting it would lose coverage for "the hoisted section stays quiet when plan mode is absent." Replace the gen-skill-docs.test.ts handshake describe block (lines 2778+) with a plan-mode-info describe block that: - scans every generated SKILL.md under the repo root + every host subdir (.agents, .openclaw, .opencode, .factory, .hermes, .kiro, .cursor, .slate) and asserts "## Plan Mode Handshake" is absent - asserts "## Skill Invocation During Plan Mode" lands in the first 15KB of each of the four review skills' generated SKILL.md Both assertions run on every bun test. A PR that re-introduces the handshake resolver fails CI immediately. Update test/e2e-harness-audit.test.ts to reference the renamed runPlanModeSkillTest. Update test/helpers/touchfiles.ts entries to point at the new resolver owner (generate-completion-status.ts) and the renamed helper, and align per-skill touchfile keys. Co-Authored-By: Claude Opus 4.7 (1M context) * chore: regenerate SKILL.md across all hosts + refresh golden fixtures Run bun run gen:skill-docs for every host to flush the vestigial "## Plan Mode Handshake" section from every generated SKILL.md and emit the hoisted "## Skill Invocation During Plan Mode" section at preamble position 1 instead. Refresh the three golden-fixture snapshots (claude, codex, factory) to match the new position. No behavior change beyond the resolver swap in the prior commit. Co-Authored-By: Claude Opus 4.7 (1M context) * chore: bump version and changelog (v1.12.1.0) Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 52 +++++++ SKILL.md | 30 ++-- TODOS.md | 26 +--- VERSION | 2 +- autoplan/SKILL.md | 30 ++-- benchmark-models/SKILL.md | 30 ++-- benchmark/SKILL.md | 30 ++-- browse/SKILL.md | 30 ++-- canary/SKILL.md | 30 ++-- codex/SKILL.md | 30 ++-- context-restore/SKILL.md | 30 ++-- context-save/SKILL.md | 30 ++-- cso/SKILL.md | 30 ++-- design-consultation/SKILL.md | 30 ++-- design-html/SKILL.md | 30 ++-- design-review/SKILL.md | 30 ++-- design-shotgun/SKILL.md | 30 ++-- devex-review/SKILL.md | 30 ++-- document-release/SKILL.md | 30 ++-- health/SKILL.md | 30 ++-- investigate/SKILL.md | 30 ++-- land-and-deploy/SKILL.md | 30 ++-- landing-report/SKILL.md | 30 ++-- learn/SKILL.md | 30 ++-- make-pdf/SKILL.md | 30 ++-- office-hours/SKILL.md | 30 ++-- open-gstack-browser/SKILL.md | 30 ++-- package.json | 2 +- pair-agent/SKILL.md | 30 ++-- plan-ceo-review/SKILL.md | 116 ++------------ plan-design-review/SKILL.md | 116 ++------------ plan-devex-review/SKILL.md | 116 ++------------ plan-eng-review/SKILL.md | 116 ++------------ plan-tune/SKILL.md | 30 ++-- qa-only/SKILL.md | 30 ++-- qa/SKILL.md | 30 ++-- retro/SKILL.md | 30 ++-- review/SKILL.md | 30 ++-- scripts/question-registry.ts | 39 ----- scripts/resolvers/preamble.ts | 19 +-- .../preamble/generate-completion-status.ts | 51 +++++-- .../preamble/generate-plan-mode-handshake.ts | 141 ------------------ setup-browser-cookies/SKILL.md | 30 ++-- setup-deploy/SKILL.md | 30 ++-- setup-gbrain/SKILL.md | 30 ++-- ship/SKILL.md | 30 ++-- test/e2e-harness-audit.test.ts | 4 +- test/fixtures/golden/claude-ship-SKILL.md | 30 ++-- test/fixtures/golden/codex-ship-SKILL.md | 30 ++-- test/fixtures/golden/factory-ship-SKILL.md | 30 ++-- test/gen-skill-docs.test.ts | 114 +++++++------- ...dshake-helpers.ts => plan-mode-helpers.ts} | 120 ++++++++------- test/helpers/touchfiles.ts | 22 +-- test/skill-e2e-plan-ceo-plan-mode.test.ts | 38 +++-- test/skill-e2e-plan-design-plan-mode.test.ts | 23 +-- test/skill-e2e-plan-devex-plan-mode.test.ts | 22 +-- test/skill-e2e-plan-eng-plan-mode.test.ts | 21 +-- test/skill-e2e-plan-mode-no-op.test.ts | 38 ++--- 58 files changed, 917 insertions(+), 1391 deletions(-) delete mode 100644 scripts/resolvers/preamble/generate-plan-mode-handshake.ts rename test/helpers/{plan-mode-handshake-helpers.ts => plan-mode-helpers.ts} (52%) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea5181f3..8fda72d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,57 @@ # Changelog +## [1.12.1.0] - 2026-04-24 + +## **Plan-mode review skills run the review directly, no more "exit and rerun" prompt.** + +Before this release, `/plan-eng-review` (and the three other `interactive: true` review skills) greeted plan-mode users with an A/B/C handshake asking them to exit plan mode and rerun, or cancel. That handshake was vestigial: the preamble already contains an authoritative "Skill Invocation During Plan Mode" rule saying AskUserQuestion satisfies plan mode's end-of-turn requirement. Two contradictory rules, the bossy one at the top won, the review never ran. This release deletes the bossier rule and hoists the correct one to position 1 of the preamble so skills run straight through. + +### What shipped + +The vestigial `scripts/resolvers/preamble/generate-plan-mode-handshake.ts` resolver is deleted. The "Plan Mode Safe Operations" and "Skill Invocation During Plan Mode" blocks are split out of `generate-completion-status.ts` into a sibling `generatePlanModeInfo()` export in the same module, then wired at preamble position 1 where the handshake used to live. The "you see this first" positioning stays; only the content changes. Four dead plan-mode-handshake question-registry IDs are removed. The `interactive: true` frontmatter flag stays on the four review skill templates because `test/e2e-harness-audit.test.ts` reads it to classify which skills must have `canUseTool` coverage, per codex outside-voice review. + +The four per-skill plan-mode E2E tests are rewritten as smoke tests that assert Step 0's actual scope-mode question fires (not an A/B/C handshake), no Write/Edit before the first AskUserQuestion, and no early `ExitPlanMode`. The write-guard helper from the old `plan-mode-handshake-helpers.ts` is preserved in the renamed `plan-mode-helpers.ts` so silent-bypass regressions still get caught. `test/skill-e2e-plan-mode-no-op.test.ts` is kept for the opposite coverage case: the plan-mode-info block stays quiet outside plan mode. `test/gen-skill-docs.test.ts` now scans every generated `SKILL.md` across all 9 host subdirs (`.agents/`, `.openclaw/`, `.kiro/`, etc.) and asserts `## Plan Mode Handshake` is absent. That's a sub-second unit gate blocking any future PR from re-introducing the resolver. + +### The numbers that matter + +Source: `bun test` on HEAD against the pre-change baseline. + +| Metric | Before | After | Δ | +|---|---|---|---| +| Preamble resolvers | 19 (handshake + completion-status) | 18 (completion-status owns both functions) | -1 module | +| Handshake lines in generated SKILL.md | 92 per skill × 4 skills = 368 | 0 | -368 | +| Question-registry entries | 51 | 47 | -4 dead entries | +| Plan-mode gate-tier tests | 5 handshake-asserting | 5 smoke + no-op + write-guard | same count, stronger assertions | +| Multi-host handshake-absence unit test | none | 1 (scans 9 host dirs, <1s) | new regression gate | +| `bun test` on changed files | 360 gen-skill-docs pass | 360 gen-skill-docs pass | no regression | + +The preamble position for the new `## Skill Invocation During Plan Mode` section lands at line ~127 of every `plan-*-review/SKILL.md` (first ~15% of the file), before the upgrade check and onboarding gates, so the authoritative plan-mode rule is the first thing the model reads after bash env setup. + +### What this means for plan-mode users + +Invoke `/plan-eng-review` from plan mode. You get the scope-mode question (`SCOPE EXPANSION` / `SELECTIVE EXPANSION` / `HOLD SCOPE` / `SCOPE REDUCTION`) immediately, the review runs, each finding gets its own `AskUserQuestion`, `ExitPlanMode` fires at the end. No two-step "exit and rerun" friction. Same for `/plan-ceo-review`, `/plan-design-review`, `/plan-devex-review`. + +### Itemized changes + +#### Fixed + +- `/plan-eng-review`, `/plan-ceo-review`, `/plan-design-review`, `/plan-devex-review` no longer show an A/B/C handshake prompt when invoked in plan mode. Each skill runs its interactive review directly, with every finding gated by `AskUserQuestion` just like outside plan mode. + +#### Changed + +- The "Plan Mode Safe Operations" and "Skill Invocation During Plan Mode" preamble sections are now emitted at position 1 (right after the bash env setup) instead of at the tail of the completion-status block. All skills see these two sections earlier in the preamble; nothing else changes about the content. +- `test/helpers/plan-mode-handshake-helpers.ts` is renamed to `test/helpers/plan-mode-helpers.ts`. The exported API is renamed from `runPlanModeHandshakeTest` to `runPlanModeSkillTest` and from `assertHandshakeShape` to `assertNotHandshakeShape`. The write-guard detection (no `Write`/`Edit` tool call before the first `AskUserQuestion`) is preserved and extended with `ExitPlanMode`-before-ask detection. + +#### Removed + +- `scripts/resolvers/preamble/generate-plan-mode-handshake.ts` deleted (vestigial, superseded by `generatePlanModeInfo` in `generate-completion-status.ts`). +- Four question-registry entries removed from `scripts/question-registry.ts`: `plan-ceo-review-plan-mode-handshake`, `plan-eng-review-plan-mode-handshake`, `plan-design-review-plan-mode-handshake`, `plan-devex-review-plan-mode-handshake`. These IDs are no longer emitted by any skill; keeping them in the registry was dead weight. + +#### For contributors + +- `test/gen-skill-docs.test.ts` now has a "plan-mode-info resolver" describe block that (a) scans every generated `SKILL.md` under the repo root plus every host subdir (`.agents/`, `.openclaw/`, `.opencode/`, `.factory/`, `.hermes/`, `.kiro/`, `.cursor/`, `.slate/`) and asserts `## Plan Mode Handshake` is absent, and (b) asserts `## Skill Invocation During Plan Mode` lands in the first 15,000 bytes of each of the four review skills' generated `SKILL.md`. Both assertions run on every `bun test`. Any PR that re-introduces the handshake resolver fails CI immediately. +- The `interactive: true` frontmatter flag on the four review skill templates is preserved. It still has a reader: `test/e2e-harness-audit.test.ts` uses it to enforce `canUseTool` coverage on interactive review E2E tests. Removing the flag was part of the initial plan; codex outside-voice review caught the downstream dependency during review and that decision was reversed. + ## [1.12.0.0] - 2026-04-24 ## **`/setup-gbrain` — any coding agent goes from zero to "gbrain is running, and I can call it" in under five minutes.** diff --git a/SKILL.md b/SKILL.md index ec979715..a0fc120e 100644 --- a/SKILL.md +++ b/SKILL.md @@ -111,6 +111,21 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). + If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: @@ -554,21 +569,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/TODOS.md b/TODOS.md index 27e7896e..a250cd50 100644 --- a/TODOS.md +++ b/TODOS.md @@ -35,37 +35,21 @@ --- -## P1: Structural STOP-Ask forcing function across all skills (v1.11.1.0 follow-up) +## P1: Structural STOP-Ask forcing function across all skills **What:** Design and implement a structural forcing function that catches when a skill mandates per-issue AskUserQuestion but the model silently substitutes batch-synthesis. Candidate mechanisms: question-count assertion (skill declares expected question count in frontmatter; post-run audit logs if model fired ` in this turn for the literal phrase:** +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. -> `Plan mode is active. The user indicated that they do not want you to execute yet` - -If that phrase is **absent**: proceed normally. This section is a no-op. - -If that phrase is **present**, the user is in plan mode. Plan mode's system -reminder says "This supercedes any other instructions you have received," -which conflicts with this skill's interactive STOP-Ask workflow. You MUST -resolve the conflict via AskUserQuestion BEFORE reading any files, running -any bash, or composing any plan content. - -### What to do when plan mode is detected - -Before emitting the AskUserQuestion, run this bash block synchronously to -log that the handshake fired (captures A-exit and C-cancel outcomes that -would terminate the skill before end-of-skill telemetry runs): - -```bash -# PLAN MODE EXCEPTION — ALWAYS RUN (telemetry-only write to ~/.gstack/) -mkdir -p ~/.gstack/analytics -echo '{"skill":"'"${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"fired","branch":"'"${_BRANCH:-unknown}"'","session":"'"${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true -``` - -Then emit exactly **one** AskUserQuestion with `question_id: "${SKILL_NAME}-plan-mode-handshake"` -(e.g., `plan-ceo-review-plan-mode-handshake`, using the current skill's name) -and these two options. The question is classified `door_type: one-way` in -the question registry for every interactive skill, so question-tuning -preferences (`never-ask`, `always-ask`) do NOT apply — this gate always fires. - -**Question body (follow the AskUserQuestion Format section below):** - -> This skill runs an interactive review that stops at every finding to ask -> you a question. Plan mode's default workflow is "read files, write plan, -> exit" — that silently bypasses every STOP gate in this skill. How do you -> want to proceed? -> -> **Recommendation: A** because this skill was designed for back-and-forth. -> Each scope call and each per-section finding needs your decision before it -> lands in the plan. Exiting plan mode and running the skill normally is the -> only path that preserves the interactive contract. -> -> *Note: options differ in kind (workflow shape), not coverage — no -> completeness score.* -> -> **A) Exit plan mode and run interactively (recommended)** -> ✅ Every STOP gate in this skill fires as designed — you approve each -> scope call, each per-section finding, each cross-model tension before any -> decision lands in the plan. No silent bypass. -> ✅ Matches the skill's documented workflow. Each AskUserQuestion has a -> clear recommendation, pros/cons, and net line you can skim in ~5 seconds. -> ❌ Two-step: press esc-esc to exit plan mode, then rerun -> `/plan-{skill-name}`. Slight context-switch friction, but the alternative -> is shipping a rubber-stamp review. -> -> **C) Cancel — I meant to run something else** -> ✅ Clean exit, no partial state, no plan file written, no findings -> recorded. Use this if you invoked the skill by mistake. -> ❌ No output at all — no review, no plan file. Fine if that's what you -> want; otherwise pick A. -> -> **Net.** Plan mode is incompatible with this skill's per-finding STOP -> gates. A is the right choice for any real review; C is the bail-out. - -### Routing the user's answer - -**If the user picks A (exit and rerun):** - -1. Append the outcome to the telemetry log (synchronous, before ExitPlanMode): - ```bash - echo '{"skill":"'"${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"A-exit","branch":"'"${_BRANCH:-unknown}"'","session":"'"${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true - ``` -2. Respond to the user: "Press **esc-esc** to exit plan mode, then rerun - `/{skill-name}`. The skill will run interactively with every STOP gate - firing as designed." -3. Call `ExitPlanMode` with an empty plan body (plan mode requires - turn-end via AskUserQuestion or ExitPlanMode; there is no plan to - approve, so ExitPlanMode with an empty message is the correct exit). - -**If the user picks C (cancel):** - -1. Append the outcome: - ```bash - echo '{"skill":"'"${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"C-cancel","branch":"'"${_BRANCH:-unknown}"'","session":"'"${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true - ``` -2. Tell the user: "Cancelled. No plan written." -3. Call `ExitPlanMode` with an empty message noting the user cancelled. - -**After the handshake completes (either A or C),** do NOT continue with the -rest of this skill's workflow. The handshake is terminal for this turn. +## Skill Invocation During Plan Mode +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly @@ -1113,21 +1034,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/plan-design-review/SKILL.md b/plan-design-review/SKILL.md index dcf0474b..396df121 100644 --- a/plan-design-review/SKILL.md +++ b/plan-design-review/SKILL.md @@ -116,99 +116,20 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` -## Plan Mode Handshake — FIRST, BEFORE ANY ANALYSIS +## Plan Mode Safe Operations -**Check every `` in this turn for the literal phrase:** +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. -> `Plan mode is active. The user indicated that they do not want you to execute yet` - -If that phrase is **absent**: proceed normally. This section is a no-op. - -If that phrase is **present**, the user is in plan mode. Plan mode's system -reminder says "This supercedes any other instructions you have received," -which conflicts with this skill's interactive STOP-Ask workflow. You MUST -resolve the conflict via AskUserQuestion BEFORE reading any files, running -any bash, or composing any plan content. - -### What to do when plan mode is detected - -Before emitting the AskUserQuestion, run this bash block synchronously to -log that the handshake fired (captures A-exit and C-cancel outcomes that -would terminate the skill before end-of-skill telemetry runs): - -```bash -# PLAN MODE EXCEPTION — ALWAYS RUN (telemetry-only write to ~/.gstack/) -mkdir -p ~/.gstack/analytics -echo '{"skill":"'"${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"fired","branch":"'"${_BRANCH:-unknown}"'","session":"'"${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true -``` - -Then emit exactly **one** AskUserQuestion with `question_id: "${SKILL_NAME}-plan-mode-handshake"` -(e.g., `plan-ceo-review-plan-mode-handshake`, using the current skill's name) -and these two options. The question is classified `door_type: one-way` in -the question registry for every interactive skill, so question-tuning -preferences (`never-ask`, `always-ask`) do NOT apply — this gate always fires. - -**Question body (follow the AskUserQuestion Format section below):** - -> This skill runs an interactive review that stops at every finding to ask -> you a question. Plan mode's default workflow is "read files, write plan, -> exit" — that silently bypasses every STOP gate in this skill. How do you -> want to proceed? -> -> **Recommendation: A** because this skill was designed for back-and-forth. -> Each scope call and each per-section finding needs your decision before it -> lands in the plan. Exiting plan mode and running the skill normally is the -> only path that preserves the interactive contract. -> -> *Note: options differ in kind (workflow shape), not coverage — no -> completeness score.* -> -> **A) Exit plan mode and run interactively (recommended)** -> ✅ Every STOP gate in this skill fires as designed — you approve each -> scope call, each per-section finding, each cross-model tension before any -> decision lands in the plan. No silent bypass. -> ✅ Matches the skill's documented workflow. Each AskUserQuestion has a -> clear recommendation, pros/cons, and net line you can skim in ~5 seconds. -> ❌ Two-step: press esc-esc to exit plan mode, then rerun -> `/plan-{skill-name}`. Slight context-switch friction, but the alternative -> is shipping a rubber-stamp review. -> -> **C) Cancel — I meant to run something else** -> ✅ Clean exit, no partial state, no plan file written, no findings -> recorded. Use this if you invoked the skill by mistake. -> ❌ No output at all — no review, no plan file. Fine if that's what you -> want; otherwise pick A. -> -> **Net.** Plan mode is incompatible with this skill's per-finding STOP -> gates. A is the right choice for any real review; C is the bail-out. - -### Routing the user's answer - -**If the user picks A (exit and rerun):** - -1. Append the outcome to the telemetry log (synchronous, before ExitPlanMode): - ```bash - echo '{"skill":"'"${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"A-exit","branch":"'"${_BRANCH:-unknown}"'","session":"'"${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true - ``` -2. Respond to the user: "Press **esc-esc** to exit plan mode, then rerun - `/{skill-name}`. The skill will run interactively with every STOP gate - firing as designed." -3. Call `ExitPlanMode` with an empty plan body (plan mode requires - turn-end via AskUserQuestion or ExitPlanMode; there is no plan to - approve, so ExitPlanMode with an empty message is the correct exit). - -**If the user picks C (cancel):** - -1. Append the outcome: - ```bash - echo '{"skill":"'"${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"C-cancel","branch":"'"${_BRANCH:-unknown}"'","session":"'"${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true - ``` -2. Tell the user: "Cancelled. No plan written." -3. Call `ExitPlanMode` with an empty message noting the user cancelled. - -**After the handshake completes (either A or C),** do NOT continue with the -rest of this skill's workflow. The handshake is terminal for this turn. +## Skill Invocation During Plan Mode +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly @@ -1110,21 +1031,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/plan-devex-review/SKILL.md b/plan-devex-review/SKILL.md index e2fccc5d..3869d47d 100644 --- a/plan-devex-review/SKILL.md +++ b/plan-devex-review/SKILL.md @@ -120,99 +120,20 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` -## Plan Mode Handshake — FIRST, BEFORE ANY ANALYSIS +## Plan Mode Safe Operations -**Check every `` in this turn for the literal phrase:** +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. -> `Plan mode is active. The user indicated that they do not want you to execute yet` - -If that phrase is **absent**: proceed normally. This section is a no-op. - -If that phrase is **present**, the user is in plan mode. Plan mode's system -reminder says "This supercedes any other instructions you have received," -which conflicts with this skill's interactive STOP-Ask workflow. You MUST -resolve the conflict via AskUserQuestion BEFORE reading any files, running -any bash, or composing any plan content. - -### What to do when plan mode is detected - -Before emitting the AskUserQuestion, run this bash block synchronously to -log that the handshake fired (captures A-exit and C-cancel outcomes that -would terminate the skill before end-of-skill telemetry runs): - -```bash -# PLAN MODE EXCEPTION — ALWAYS RUN (telemetry-only write to ~/.gstack/) -mkdir -p ~/.gstack/analytics -echo '{"skill":"'"${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"fired","branch":"'"${_BRANCH:-unknown}"'","session":"'"${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true -``` - -Then emit exactly **one** AskUserQuestion with `question_id: "${SKILL_NAME}-plan-mode-handshake"` -(e.g., `plan-ceo-review-plan-mode-handshake`, using the current skill's name) -and these two options. The question is classified `door_type: one-way` in -the question registry for every interactive skill, so question-tuning -preferences (`never-ask`, `always-ask`) do NOT apply — this gate always fires. - -**Question body (follow the AskUserQuestion Format section below):** - -> This skill runs an interactive review that stops at every finding to ask -> you a question. Plan mode's default workflow is "read files, write plan, -> exit" — that silently bypasses every STOP gate in this skill. How do you -> want to proceed? -> -> **Recommendation: A** because this skill was designed for back-and-forth. -> Each scope call and each per-section finding needs your decision before it -> lands in the plan. Exiting plan mode and running the skill normally is the -> only path that preserves the interactive contract. -> -> *Note: options differ in kind (workflow shape), not coverage — no -> completeness score.* -> -> **A) Exit plan mode and run interactively (recommended)** -> ✅ Every STOP gate in this skill fires as designed — you approve each -> scope call, each per-section finding, each cross-model tension before any -> decision lands in the plan. No silent bypass. -> ✅ Matches the skill's documented workflow. Each AskUserQuestion has a -> clear recommendation, pros/cons, and net line you can skim in ~5 seconds. -> ❌ Two-step: press esc-esc to exit plan mode, then rerun -> `/plan-{skill-name}`. Slight context-switch friction, but the alternative -> is shipping a rubber-stamp review. -> -> **C) Cancel — I meant to run something else** -> ✅ Clean exit, no partial state, no plan file written, no findings -> recorded. Use this if you invoked the skill by mistake. -> ❌ No output at all — no review, no plan file. Fine if that's what you -> want; otherwise pick A. -> -> **Net.** Plan mode is incompatible with this skill's per-finding STOP -> gates. A is the right choice for any real review; C is the bail-out. - -### Routing the user's answer - -**If the user picks A (exit and rerun):** - -1. Append the outcome to the telemetry log (synchronous, before ExitPlanMode): - ```bash - echo '{"skill":"'"${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"A-exit","branch":"'"${_BRANCH:-unknown}"'","session":"'"${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true - ``` -2. Respond to the user: "Press **esc-esc** to exit plan mode, then rerun - `/{skill-name}`. The skill will run interactively with every STOP gate - firing as designed." -3. Call `ExitPlanMode` with an empty plan body (plan mode requires - turn-end via AskUserQuestion or ExitPlanMode; there is no plan to - approve, so ExitPlanMode with an empty message is the correct exit). - -**If the user picks C (cancel):** - -1. Append the outcome: - ```bash - echo '{"skill":"'"${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"C-cancel","branch":"'"${_BRANCH:-unknown}"'","session":"'"${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true - ``` -2. Tell the user: "Cancelled. No plan written." -3. Call `ExitPlanMode` with an empty message noting the user cancelled. - -**After the handshake completes (either A or C),** do NOT continue with the -rest of this skill's workflow. The handshake is terminal for this turn. +## Skill Invocation During Plan Mode +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly @@ -1114,21 +1035,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md index a90314f0..06fbf7b8 100644 --- a/plan-eng-review/SKILL.md +++ b/plan-eng-review/SKILL.md @@ -118,99 +118,20 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` -## Plan Mode Handshake — FIRST, BEFORE ANY ANALYSIS +## Plan Mode Safe Operations -**Check every `` in this turn for the literal phrase:** +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. -> `Plan mode is active. The user indicated that they do not want you to execute yet` - -If that phrase is **absent**: proceed normally. This section is a no-op. - -If that phrase is **present**, the user is in plan mode. Plan mode's system -reminder says "This supercedes any other instructions you have received," -which conflicts with this skill's interactive STOP-Ask workflow. You MUST -resolve the conflict via AskUserQuestion BEFORE reading any files, running -any bash, or composing any plan content. - -### What to do when plan mode is detected - -Before emitting the AskUserQuestion, run this bash block synchronously to -log that the handshake fired (captures A-exit and C-cancel outcomes that -would terminate the skill before end-of-skill telemetry runs): - -```bash -# PLAN MODE EXCEPTION — ALWAYS RUN (telemetry-only write to ~/.gstack/) -mkdir -p ~/.gstack/analytics -echo '{"skill":"'"${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"fired","branch":"'"${_BRANCH:-unknown}"'","session":"'"${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true -``` - -Then emit exactly **one** AskUserQuestion with `question_id: "${SKILL_NAME}-plan-mode-handshake"` -(e.g., `plan-ceo-review-plan-mode-handshake`, using the current skill's name) -and these two options. The question is classified `door_type: one-way` in -the question registry for every interactive skill, so question-tuning -preferences (`never-ask`, `always-ask`) do NOT apply — this gate always fires. - -**Question body (follow the AskUserQuestion Format section below):** - -> This skill runs an interactive review that stops at every finding to ask -> you a question. Plan mode's default workflow is "read files, write plan, -> exit" — that silently bypasses every STOP gate in this skill. How do you -> want to proceed? -> -> **Recommendation: A** because this skill was designed for back-and-forth. -> Each scope call and each per-section finding needs your decision before it -> lands in the plan. Exiting plan mode and running the skill normally is the -> only path that preserves the interactive contract. -> -> *Note: options differ in kind (workflow shape), not coverage — no -> completeness score.* -> -> **A) Exit plan mode and run interactively (recommended)** -> ✅ Every STOP gate in this skill fires as designed — you approve each -> scope call, each per-section finding, each cross-model tension before any -> decision lands in the plan. No silent bypass. -> ✅ Matches the skill's documented workflow. Each AskUserQuestion has a -> clear recommendation, pros/cons, and net line you can skim in ~5 seconds. -> ❌ Two-step: press esc-esc to exit plan mode, then rerun -> `/plan-{skill-name}`. Slight context-switch friction, but the alternative -> is shipping a rubber-stamp review. -> -> **C) Cancel — I meant to run something else** -> ✅ Clean exit, no partial state, no plan file written, no findings -> recorded. Use this if you invoked the skill by mistake. -> ❌ No output at all — no review, no plan file. Fine if that's what you -> want; otherwise pick A. -> -> **Net.** Plan mode is incompatible with this skill's per-finding STOP -> gates. A is the right choice for any real review; C is the bail-out. - -### Routing the user's answer - -**If the user picks A (exit and rerun):** - -1. Append the outcome to the telemetry log (synchronous, before ExitPlanMode): - ```bash - echo '{"skill":"'"${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"A-exit","branch":"'"${_BRANCH:-unknown}"'","session":"'"${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true - ``` -2. Respond to the user: "Press **esc-esc** to exit plan mode, then rerun - `/{skill-name}`. The skill will run interactively with every STOP gate - firing as designed." -3. Call `ExitPlanMode` with an empty plan body (plan mode requires - turn-end via AskUserQuestion or ExitPlanMode; there is no plan to - approve, so ExitPlanMode with an empty message is the correct exit). - -**If the user picks C (cancel):** - -1. Append the outcome: - ```bash - echo '{"skill":"'"${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"C-cancel","branch":"'"${_BRANCH:-unknown}"'","session":"'"${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true - ``` -2. Tell the user: "Cancelled. No plan written." -3. Call `ExitPlanMode` with an empty message noting the user cancelled. - -**After the handshake completes (either A or C),** do NOT continue with the -rest of this skill's workflow. The handshake is terminal for this turn. +## Skill Invocation During Plan Mode +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly @@ -1112,21 +1033,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/plan-tune/SKILL.md b/plan-tune/SKILL.md index 5bd85c96..9e230445 100644 --- a/plan-tune/SKILL.md +++ b/plan-tune/SKILL.md @@ -125,6 +125,21 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). + If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: @@ -1007,21 +1022,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/qa-only/SKILL.md b/qa-only/SKILL.md index 00c61143..bec8ff9a 100644 --- a/qa-only/SKILL.md +++ b/qa-only/SKILL.md @@ -113,6 +113,21 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). + If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: @@ -1013,21 +1028,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/qa/SKILL.md b/qa/SKILL.md index d4be2056..ba1d1e2d 100644 --- a/qa/SKILL.md +++ b/qa/SKILL.md @@ -119,6 +119,21 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). + If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: @@ -1019,21 +1034,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/retro/SKILL.md b/retro/SKILL.md index 9a350180..34cb73e3 100644 --- a/retro/SKILL.md +++ b/retro/SKILL.md @@ -112,6 +112,21 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). + If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: @@ -994,21 +1009,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/review/SKILL.md b/review/SKILL.md index 6354a75b..a2092af9 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -116,6 +116,21 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). + If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: @@ -1016,21 +1031,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/scripts/question-registry.ts b/scripts/question-registry.ts index 3d90222a..bae5950c 100644 --- a/scripts/question-registry.ts +++ b/scripts/question-registry.ts @@ -261,45 +261,6 @@ export const QUESTIONS = { description: "Approve the design doc, revise sections, or start over?", }, - // ----------------------------------------------------------------------- - // Plan-mode handshake — fires at the top of any interactive review skill - // when the user is in plan mode. Safety-critical, always asked regardless - // of user's tuning preferences. See scripts/resolvers/preamble/generate- - // plan-mode-handshake.ts. - // ----------------------------------------------------------------------- - 'plan-ceo-review-plan-mode-handshake': { - id: 'plan-ceo-review-plan-mode-handshake', - skill: 'plan-ceo-review', - category: 'routing', - door_type: 'one-way', - options: ['exit-and-rerun', 'cancel'], - description: "Plan mode detected — exit and rerun interactively, or cancel?", - }, - 'plan-eng-review-plan-mode-handshake': { - id: 'plan-eng-review-plan-mode-handshake', - skill: 'plan-eng-review', - category: 'routing', - door_type: 'one-way', - options: ['exit-and-rerun', 'cancel'], - description: "Plan mode detected — exit and rerun interactively, or cancel?", - }, - 'plan-design-review-plan-mode-handshake': { - id: 'plan-design-review-plan-mode-handshake', - skill: 'plan-design-review', - category: 'routing', - door_type: 'one-way', - options: ['exit-and-rerun', 'cancel'], - description: "Plan mode detected — exit and rerun interactively, or cancel?", - }, - 'plan-devex-review-plan-mode-handshake': { - id: 'plan-devex-review-plan-mode-handshake', - skill: 'plan-devex-review', - category: 'routing', - door_type: 'one-way', - options: ['exit-and-rerun', 'cancel'], - description: "Plan mode detected — exit and rerun interactively, or cancel?", - }, - // ----------------------------------------------------------------------- // /plan-ceo-review — scope & strategy // ----------------------------------------------------------------------- diff --git a/scripts/resolvers/preamble.ts b/scripts/resolvers/preamble.ts index ac32f4a9..b866e90b 100644 --- a/scripts/resolvers/preamble.ts +++ b/scripts/resolvers/preamble.ts @@ -22,9 +22,11 @@ import { generateQuestionTuning } from './question-tuning'; // Core bootstrap import { generatePreambleBash } from './preamble/generate-preamble-bash'; -import { generatePlanModeHandshake } from './preamble/generate-plan-mode-handshake'; import { generateUpgradeCheck } from './preamble/generate-upgrade-check'; -import { generateCompletionStatus } from './preamble/generate-completion-status'; +import { + generateCompletionStatus, + generatePlanModeInfo, +} from './preamble/generate-completion-status'; // One-time onboarding prompts import { generateLakeIntro } from './preamble/generate-lake-intro'; @@ -79,13 +81,12 @@ export function generatePreamble(ctx: TemplateContext): string { } const sections = [ generatePreambleBash(ctx), - // Plan-mode handshake at position 1: after bash (so _SESSION_ID / _BRANCH / - // _TEL env vars are live for the synchronous telemetry write) and before - // all onboarding AskUserQuestion gates (so fresh-install users in plan mode - // see the handshake first, not drowned in telemetry / proactive / routing - // prompts). Host-scoped to Claude + interactive-frontmatter-scoped inside - // the resolver — no-op for every other skill/host combination. - generatePlanModeHandshake(ctx), + // Plan-mode-skill semantics at position 1: after bash (so _SESSION_ID / + // _BRANCH / _TEL env vars are live) and before all onboarding gates so + // models read the authoritative "AskUserQuestion satisfies plan mode's + // end-of-turn" rule before any other instruction. Renders for all skills + // (not interactive-gated); the text applies universally. + generatePlanModeInfo(ctx), generateUpgradeCheck(ctx), generateWritingStyleMigration(ctx), generateLakeIntro(), diff --git a/scripts/resolvers/preamble/generate-completion-status.ts b/scripts/resolvers/preamble/generate-completion-status.ts index bbaac9c9..11d1c368 100644 --- a/scripts/resolvers/preamble/generate-completion-status.ts +++ b/scripts/resolvers/preamble/generate-completion-status.ts @@ -1,5 +1,41 @@ import type { TemplateContext } from '../types'; +/** + * Plan-mode-skill semantics block. + * + * Lives at the TOP of the preamble (position 1) so models read the authoritative + * plan-mode rule before any other instructions. Replaces the vestigial + * generate-plan-mode-handshake.ts that used to sit at this position and told + * interactive review skills to emit an exit-and-rerun handshake instead of + * running their interactive STOP-Ask workflow. + * + * Text is the same "Plan Mode Safe Operations" + "Skill Invocation During Plan + * Mode" blocks that previously lived at the tail of generateCompletionStatus(). + * Only the position changes. All skills (not just interactive: true) see this. + * + * Composition position: index 1 in scripts/resolvers/preamble.ts — after + * generatePreambleBash (so _SESSION_ID / _BRANCH / _TEL env vars exist before + * any plan-mode-aware telemetry) and before generateUpgradeCheck + onboarding + * gates. See ceo-plan 2026-04-24 "remove vestigial plan-mode handshake" for + * the full rationale. + */ +export function generatePlanModeInfo(_ctx: TemplateContext): string { + return `## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +\`$B\` (browse), \`$D\` (design), \`codex exec\`/\`codex review\`, writes to \`~/.gstack/\`, +writes to the plan file, \`open\` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode).`; +} + export function generateCompletionStatus(ctx: TemplateContext): string { return `## Completion Status Protocol @@ -81,21 +117,6 @@ success/error/abort, and \`USED_BROWSE\` with true/false based on whether \`$B\` If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -\`$B\` (browse), \`$D\` (design), \`codex exec\`/\`codex review\`, writes to \`~/.gstack/\`, -writes to the plan file, \`open\` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a \`## GSTACK REVIEW REPORT\` diff --git a/scripts/resolvers/preamble/generate-plan-mode-handshake.ts b/scripts/resolvers/preamble/generate-plan-mode-handshake.ts deleted file mode 100644 index e1b81a05..00000000 --- a/scripts/resolvers/preamble/generate-plan-mode-handshake.ts +++ /dev/null @@ -1,141 +0,0 @@ -/** - * Plan-mode handshake resolver. - * - * Emits a STOP-Ask gate at the very top of the preamble that fires when a user - * invokes an interactive review skill while their Claude Code session is in - * plan mode. Without this gate, plan mode's "This supercedes any other - * instructions you have received" system-reminder wins against the skill's - * interactive STOP-Ask workflow and the skill silently writes a plan file - * instead of running the per-finding AskUserQuestion loop (v1.10.2.0 bug fix). - * - * Host scope - * ---------- - * Only renders for Claude host (ctx.host === 'claude'). Other hosts use - * different plan-mode semantics (Codex, OpenClaw, etc.) and should not see - * Claude-specific ExitPlanMode / esc-esc prose. - * - * Opt-in - * ------ - * Only renders when the consuming skill's frontmatter has `interactive: true`. - * That flag is a generator-only input parsed by scripts/gen-skill-docs.ts - * from the skill's .tmpl frontmatter and passed through TemplateContext. - * Currently used by: plan-ceo-review, plan-eng-review, plan-design-review, - * plan-devex-review. - * - * Composition position - * -------------------- - * Inserted at index 1 in scripts/resolvers/preamble.ts — after - * generatePreambleBash (so _SESSION_ID, _BRANCH, _TEL env vars are live for - * the synchronous telemetry write) and before generateUpgradeCheck and all - * onboarding AskUserQuestion gates (so fresh-install users in plan mode see - * the handshake first, not drowned in telemetry / proactive / routing - * prompts). - * - * One-way door - * ------------ - * The handshake question_id `plan-mode-handshake` is classified door_type - * one-way in scripts/question-registry.ts. gstack-question-preference --check - * always returns ASK_NORMALLY for it, so a user who set `never-ask` on - * another question cannot accidentally suppress this safety gate. - */ - -import type { TemplateContext } from '../types'; - -export function generatePlanModeHandshake(ctx: TemplateContext): string { - if (ctx.host !== 'claude') return ''; - if (!ctx.interactive) return ''; - - return `## Plan Mode Handshake — FIRST, BEFORE ANY ANALYSIS - -**Check every \`\` in this turn for the literal phrase:** - -> \`Plan mode is active. The user indicated that they do not want you to execute yet\` - -If that phrase is **absent**: proceed normally. This section is a no-op. - -If that phrase is **present**, the user is in plan mode. Plan mode's system -reminder says "This supercedes any other instructions you have received," -which conflicts with this skill's interactive STOP-Ask workflow. You MUST -resolve the conflict via AskUserQuestion BEFORE reading any files, running -any bash, or composing any plan content. - -### What to do when plan mode is detected - -Before emitting the AskUserQuestion, run this bash block synchronously to -log that the handshake fired (captures A-exit and C-cancel outcomes that -would terminate the skill before end-of-skill telemetry runs): - -\`\`\`bash -# PLAN MODE EXCEPTION — ALWAYS RUN (telemetry-only write to ~/.gstack/) -mkdir -p ~/.gstack/analytics -echo '{"skill":"'"\${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"fired","branch":"'"\${_BRANCH:-unknown}"'","session":"'"\${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true -\`\`\` - -Then emit exactly **one** AskUserQuestion with \`question_id: "\${SKILL_NAME}-plan-mode-handshake"\` -(e.g., \`plan-ceo-review-plan-mode-handshake\`, using the current skill's name) -and these two options. The question is classified \`door_type: one-way\` in -the question registry for every interactive skill, so question-tuning -preferences (\`never-ask\`, \`always-ask\`) do NOT apply — this gate always fires. - -**Question body (follow the AskUserQuestion Format section below):** - -> This skill runs an interactive review that stops at every finding to ask -> you a question. Plan mode's default workflow is "read files, write plan, -> exit" — that silently bypasses every STOP gate in this skill. How do you -> want to proceed? -> -> **Recommendation: A** because this skill was designed for back-and-forth. -> Each scope call and each per-section finding needs your decision before it -> lands in the plan. Exiting plan mode and running the skill normally is the -> only path that preserves the interactive contract. -> -> *Note: options differ in kind (workflow shape), not coverage — no -> completeness score.* -> -> **A) Exit plan mode and run interactively (recommended)** -> ✅ Every STOP gate in this skill fires as designed — you approve each -> scope call, each per-section finding, each cross-model tension before any -> decision lands in the plan. No silent bypass. -> ✅ Matches the skill's documented workflow. Each AskUserQuestion has a -> clear recommendation, pros/cons, and net line you can skim in ~5 seconds. -> ❌ Two-step: press esc-esc to exit plan mode, then rerun -> \`/plan-{skill-name}\`. Slight context-switch friction, but the alternative -> is shipping a rubber-stamp review. -> -> **C) Cancel — I meant to run something else** -> ✅ Clean exit, no partial state, no plan file written, no findings -> recorded. Use this if you invoked the skill by mistake. -> ❌ No output at all — no review, no plan file. Fine if that's what you -> want; otherwise pick A. -> -> **Net.** Plan mode is incompatible with this skill's per-finding STOP -> gates. A is the right choice for any real review; C is the bail-out. - -### Routing the user's answer - -**If the user picks A (exit and rerun):** - -1. Append the outcome to the telemetry log (synchronous, before ExitPlanMode): - \`\`\`bash - echo '{"skill":"'"\${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"A-exit","branch":"'"\${_BRANCH:-unknown}"'","session":"'"\${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true - \`\`\` -2. Respond to the user: "Press **esc-esc** to exit plan mode, then rerun - \`/{skill-name}\`. The skill will run interactively with every STOP gate - firing as designed." -3. Call \`ExitPlanMode\` with an empty plan body (plan mode requires - turn-end via AskUserQuestion or ExitPlanMode; there is no plan to - approve, so ExitPlanMode with an empty message is the correct exit). - -**If the user picks C (cancel):** - -1. Append the outcome: - \`\`\`bash - echo '{"skill":"'"\${_SKILL_NAME:-unknown}"'","event":"plan_mode_handshake","outcome":"C-cancel","branch":"'"\${_BRANCH:-unknown}"'","session":"'"\${_SESSION_ID:-unknown}"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true - \`\`\` -2. Tell the user: "Cancelled. No plan written." -3. Call \`ExitPlanMode\` with an empty message noting the user cancelled. - -**After the handshake completes (either A or C),** do NOT continue with the -rest of this skill's workflow. The handshake is terminal for this turn. -`; -} diff --git a/setup-browser-cookies/SKILL.md b/setup-browser-cookies/SKILL.md index 64890e09..d1fc1616 100644 --- a/setup-browser-cookies/SKILL.md +++ b/setup-browser-cookies/SKILL.md @@ -109,6 +109,21 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). + If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: @@ -552,21 +567,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/setup-deploy/SKILL.md b/setup-deploy/SKILL.md index 519e7af6..256ea858 100644 --- a/setup-deploy/SKILL.md +++ b/setup-deploy/SKILL.md @@ -115,6 +115,21 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). + If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: @@ -997,21 +1012,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/setup-gbrain/SKILL.md b/setup-gbrain/SKILL.md index cc1c752e..9db96bd4 100644 --- a/setup-gbrain/SKILL.md +++ b/setup-gbrain/SKILL.md @@ -116,6 +116,21 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). + If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: @@ -998,21 +1013,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/ship/SKILL.md b/ship/SKILL.md index 173628ff..75484152 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -117,6 +117,21 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). + If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: @@ -1017,21 +1032,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/test/e2e-harness-audit.test.ts b/test/e2e-harness-audit.test.ts index b517ef84..ce709cae 100644 --- a/test/e2e-harness-audit.test.ts +++ b/test/e2e-harness-audit.test.ts @@ -78,12 +78,12 @@ function findInteractiveSkills(): string[] { /** * Scan a test file's contents for the canUseTool-via-harness pattern. * Either: direct canUseTool usage in runAgentSdkTest, or usage of the - * shared plan-mode-handshake-helpers that wrap it. + * shared plan-mode-helpers that wrap it. */ function hasCanUseToolCoverage(testFile: string): boolean { const content = fs.readFileSync(testFile, 'utf-8'); if (content.includes('canUseTool')) return true; - if (content.includes('runPlanModeHandshakeTest')) return true; + if (content.includes('runPlanModeSkillTest')) return true; return false; } diff --git a/test/fixtures/golden/claude-ship-SKILL.md b/test/fixtures/golden/claude-ship-SKILL.md index 173628ff..75484152 100644 --- a/test/fixtures/golden/claude-ship-SKILL.md +++ b/test/fixtures/golden/claude-ship-SKILL.md @@ -117,6 +117,21 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). + If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: @@ -1017,21 +1032,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/test/fixtures/golden/codex-ship-SKILL.md b/test/fixtures/golden/codex-ship-SKILL.md index f3f4f7e8..99b93054 100644 --- a/test/fixtures/golden/codex-ship-SKILL.md +++ b/test/fixtures/golden/codex-ship-SKILL.md @@ -106,6 +106,21 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). + If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: @@ -1006,21 +1021,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/test/fixtures/golden/factory-ship-SKILL.md b/test/fixtures/golden/factory-ship-SKILL.md index 57241dcc..9a5e09b6 100644 --- a/test/fixtures/golden/factory-ship-SKILL.md +++ b/test/fixtures/golden/factory-ship-SKILL.md @@ -108,6 +108,21 @@ echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH" [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true ``` +## Plan Mode Safe Operations + +In plan mode, these are always allowed (they inform the plan, don't modify source): +`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, +writes to the plan file, `open` for generated artifacts. + +## Skill Invocation During Plan Mode + +If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step +by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP +point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN +MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted +above or explicitly exception-marked. Call ExitPlanMode only after the skill +workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). + If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not auto-invoke skills based on conversation context. Only run skills the user explicitly types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: @@ -1008,21 +1023,6 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". The local JSONL always logs. The remote binary only runs if telemetry is not off and the binary exists. -## Plan Mode Safe Operations - -In plan mode, these are always allowed (they inform the plan, don't modify source): -`$B` (browse), `$D` (design), `codex exec`/`codex review`, writes to `~/.gstack/`, -writes to the plan file, `open` for generated artifacts. - -## Skill Invocation During Plan Mode - -If the user invokes a skill in plan mode, that skill takes precedence over generic plan mode behavior. Treat it as executable instructions, not reference. Follow step -by step. AskUserQuestion calls satisfy plan mode's end-of-turn requirement. At a STOP -point, stop immediately. Do not continue the workflow past a STOP point and do not call ExitPlanMode there. Commands marked "PLAN -MODE EXCEPTION — ALWAYS RUN" execute. Other writes need to be already permitted -above or explicitly exception-marked. Call ExitPlanMode only after the skill -workflow completes — only then call ExitPlanMode (or if the user tells you to cancel the skill or leave plan mode). - ## Plan Status Footer In plan mode, before ExitPlanMode: if the plan file lacks a `## GSTACK REVIEW REPORT` diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 60dc8ad9..8afc7b8e 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -2775,72 +2775,90 @@ describe('voice-triggers processing', () => { }); }); -describe('plan-mode handshake (interactive: true) resolver', () => { - const INTERACTIVE_SKILLS = [ +describe('plan-mode-info resolver (handshake-replacement)', () => { + const REVIEW_SKILLS = [ 'plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review', ]; + // Header for the vestigial handshake that was removed. If it ever reappears, + // someone accidentally re-introduced the resolver. const HANDSHAKE_MARKER = '## Plan Mode Handshake'; + // Header for the new plan-mode-info section (previously lived at the tail + // of completion-status.ts; now hoisted to position 1 of the preamble). + const PLAN_MODE_INFO_MARKER = '## Skill Invocation During Plan Mode'; - test.each(INTERACTIVE_SKILLS)( - '%s (Claude host) SKILL.md contains the handshake section', - (skill) => { - const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8'); - expect(content).toContain(HANDSHAKE_MARKER); - expect(content).toContain( - 'Plan mode is active. The user indicated that they do not want you to execute yet', - ); - }, - ); - - test('handshake is absent from non-interactive Claude skills', () => { - const nonInteractive = ['ship', 'review', 'qa', 'office-hours', 'codex', 'retro', 'cso']; - for (const skill of nonInteractive) { - const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8'); - expect(content).not.toContain(HANDSHAKE_MARKER); + test('vestigial handshake is absent from all generated Claude SKILL.md files', () => { + // Scan every generated SKILL.md under ROOT (top-level directory per skill). + // Using fs.readdirSync + filter instead of a glob so we catch any skill + // that gets added later without updating this list. + const entries = fs.readdirSync(ROOT, { withFileTypes: true }); + let checked = 0; + for (const entry of entries) { + if (!entry.isDirectory()) continue; + const skillMd = path.join(ROOT, entry.name, 'SKILL.md'); + if (!fs.existsSync(skillMd)) continue; + const content = fs.readFileSync(skillMd, 'utf-8'); + expect(content, `handshake marker in ${entry.name}/SKILL.md`).not.toContain(HANDSHAKE_MARKER); + checked++; } + expect(checked).toBeGreaterThan(0); }); - test('handshake is absent from non-Claude host outputs when present on disk', () => { + test('vestigial handshake is absent from non-Claude host outputs when present on disk', () => { // Non-Claude hosts render to hostSubdirs (.agents/, .openclaw/, etc). The - // handshake resolver returns '' when ctx.host !== 'claude', so those - // outputs must not contain the marker. The current gen-skill-docs layout - // prefixes skill names as `gstack-` under the hostSubdir; older - // layouts used `gstack/` (no prefix). Only stable-present paths - // are asserted — older ones may or may not exist per install history. - const candidateOutputs = [ - // Current prefixed layout - path.join(ROOT, '.agents', 'skills', 'gstack-plan-ceo-review', 'SKILL.md'), - path.join(ROOT, '.openclaw', 'skills', 'gstack-plan-ceo-review', 'SKILL.md'), - path.join(ROOT, '.opencode', 'skills', 'gstack-plan-ceo-review', 'SKILL.md'), - path.join(ROOT, '.factory', 'skills', 'gstack-plan-ceo-review', 'SKILL.md'), - path.join(ROOT, '.hermes', 'skills', 'gstack-plan-ceo-review', 'SKILL.md'), - ]; + // plan-mode-info resolver has no host-scoping — all hosts get the new + // section, none get the old handshake. Scan all candidate host dirs. + const hostDirs = ['.agents', '.openclaw', '.opencode', '.factory', '.hermes', '.kiro', '.cursor', '.slate']; let checked = 0; - for (const out of candidateOutputs) { - if (fs.existsSync(out)) { - const content = fs.readFileSync(out, 'utf-8'); - expect(content).not.toContain(HANDSHAKE_MARKER); + for (const host of hostDirs) { + const skillsRoot = path.join(ROOT, host, 'skills'); + if (!fs.existsSync(skillsRoot)) continue; + const entries = fs.readdirSync(skillsRoot, { withFileTypes: true }); + for (const entry of entries) { + if (!entry.isDirectory()) continue; + const skillMd = path.join(skillsRoot, entry.name, 'SKILL.md'); + if (!fs.existsSync(skillMd)) continue; + const content = fs.readFileSync(skillMd, 'utf-8'); + expect(content, `handshake marker in ${host}/skills/${entry.name}/SKILL.md`).not.toContain(HANDSHAKE_MARKER); checked++; } } - // At least one non-Claude host's output should exist after a full gen - // run; this test is meaningful only if we checked something. If no - // non-Claude outputs exist locally, the cross-host guarantee is still - // enforced by the resolver's ctx.host check; this test is belt-and- - // suspenders and becomes a no-op rather than a false positive. if (checked === 0) { // eslint-disable-next-line no-console console.warn( - 'plan-mode handshake: no non-Claude host outputs found for cross-host absence check — ' + + 'plan-mode-info: no non-Claude host outputs found for cross-host absence check — ' + 'run `bun run gen:skill-docs --host all` to populate', ); } }); + test.each(REVIEW_SKILLS)( + '%s/SKILL.md contains the new plan-mode-info section near the top', + (skill) => { + const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8'); + const idx = content.indexOf(PLAN_MODE_INFO_MARKER); + expect(idx).toBeGreaterThan(0); + // Position 1 in preamble composition = within the first ~300 lines. + // Roughly translates to first ~15KB of text. + expect(idx).toBeLessThan(15_000); + }, + ); + + test('plan-mode-info is wired BEFORE generateUpgradeCheck in preamble', () => { + const content = fs.readFileSync( + path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), + 'utf-8', + ); + const planModeIdx = content.indexOf(PLAN_MODE_INFO_MARKER); + const upgradeIdx = content.indexOf('UPGRADE_AVAILABLE'); + expect(planModeIdx).toBeGreaterThan(0); + expect(upgradeIdx).toBeGreaterThan(0); + expect(planModeIdx).toBeLessThan(upgradeIdx); + }); + test('0C-bis STOP block present in plan-ceo-review/SKILL.md', () => { const content = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8'); const presentIdx = content.indexOf('Present these approach options via AskUserQuestion'); @@ -2851,16 +2869,4 @@ describe('plan-mode handshake (interactive: true) resolver', () => { expect(between).toContain('**STOP.**'); expect(between).toContain('Do NOT proceed to Step 0D or 0F until the user responds to 0C-bis'); }); - - test('handshake resolver is wired BEFORE generateUpgradeCheck in preamble', () => { - const content = fs.readFileSync( - path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), - 'utf-8', - ); - const handshakeIdx = content.indexOf(HANDSHAKE_MARKER); - const upgradeIdx = content.indexOf('UPGRADE_AVAILABLE'); - expect(handshakeIdx).toBeGreaterThan(0); - expect(upgradeIdx).toBeGreaterThan(0); - expect(handshakeIdx).toBeLessThan(upgradeIdx); - }); }); diff --git a/test/helpers/plan-mode-handshake-helpers.ts b/test/helpers/plan-mode-helpers.ts similarity index 52% rename from test/helpers/plan-mode-handshake-helpers.ts rename to test/helpers/plan-mode-helpers.ts index 581932be..cf0025b6 100644 --- a/test/helpers/plan-mode-handshake-helpers.ts +++ b/test/helpers/plan-mode-helpers.ts @@ -1,16 +1,18 @@ /** - * Shared helpers for plan-mode handshake E2E tests. + * Shared helpers for plan-mode E2E tests. * - * Four sibling test files (plan-ceo, plan-eng, plan-design, plan-devex) exercise - * the identical handshake contract against different skills. This helper - * centralizes the canUseTool interceptor and the assertion shape so the four - * test files are thin wiring (~40 LOC each) and can't drift out of sync. + * Four sibling per-skill smoke tests (plan-ceo, plan-eng, plan-design, plan-devex) + * plus the no-op regression test use this helper. The goal: run a review skill + * in plan mode, confirm it goes straight to its Step 0 AskUserQuestion without + * writing files or calling ExitPlanMode first (the vestigial handshake + * regression we fixed in ceo-plan 2026-04-24). * - * See scripts/resolvers/preamble/generate-plan-mode-handshake.ts for the - * handshake prose that the tests below assert against. + * This file was renamed from `plan-mode-handshake-helpers.ts` when the + * handshake was removed. The write-guard detection (no Write/Edit before the + * first AskUserQuestion) is the load-bearing piece that catches silent + * regressions a simple "first question text matches" check would miss. */ -import { expect } from 'bun:test'; import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; @@ -26,7 +28,7 @@ import { export const PLAN_MODE_REMINDER = 'Plan mode is active. The user indicated that they do not want you to execute yet'; -export interface HandshakeCaptureResult { +export interface PlanModeCaptureResult { sdkResult: AgentSdkResult; /** Each AskUserQuestion that fired, with its input payload. */ askUserQuestions: Array<{ input: Record; orderIndex: number }>; @@ -34,45 +36,46 @@ export interface HandshakeCaptureResult { toolOrder: string[]; /** Whether any Write or Edit tool fired BEFORE the first AskUserQuestion. */ writeOrEditBeforeAsk: boolean; + /** Whether ExitPlanMode fired BEFORE the first AskUserQuestion. */ + exitPlanModeBeforeAsk: boolean; } /** * Run a skill via the Agent SDK with canUseTool intercepting every tool use. - * Inject the plan-mode distinctive phrase into the system prompt and auto- - * answer the handshake with the given answerLabel ("Exit" or "Cancel"). Return - * the captured events for assertion. + * Inject the plan-mode distinctive phrase into the system prompt, auto-answer + * the first AskUserQuestion (so the skill stops cleanly after Step 0), and + * return the captured events for assertion. */ -export async function runPlanModeHandshakeTest(opts: { +export async function runPlanModeSkillTest(opts: { /** Skill name, e.g. 'plan-ceo-review'. */ skillName: string; - /** "Exit" to pick option A (exit-and-rerun) or "Cancel" for option C. */ - answerLabel: 'Exit' | 'Cancel'; + /** + * For the first AskUserQuestion, pick the option whose label contains this + * substring. Pick a "cheap" answer that terminates the skill quickly (e.g. + * "HOLD SCOPE" for plan-ceo-review). + */ + firstAnswerSubstring: string; /** If true, DO NOT inject the reminder — used by the no-op regression test. */ omitPlanModeReminder?: boolean; - /** Max turns for the SDK call (default 4 — handshake + exit should fit easily). */ + /** Max turns for the SDK call (default 4 — Step 0 + answer should fit). */ maxTurns?: number; -}): Promise { - const { skillName, answerLabel, omitPlanModeReminder, maxTurns } = opts; +}): Promise { + const { skillName, firstAnswerSubstring, omitPlanModeReminder, maxTurns } = opts; - const askUserQuestions: HandshakeCaptureResult['askUserQuestions'] = []; + const askUserQuestions: PlanModeCaptureResult['askUserQuestions'] = []; const toolOrder: string[] = []; let toolIndex = 0; let firstAskIndex = -1; const workingDir = fs.mkdtempSync( - path.join(os.tmpdir(), `plan-mode-handshake-${skillName}-`), + path.join(os.tmpdir(), `plan-mode-${skillName}-`), ); - // The SDK requires AskUserQuestion to be in the allowed tools list. The - // harness auto-adds it when canUseTool is supplied, but we also want Read - // so the skill can load its own file if it tries to. const binary = resolveClaudeBinary(); try { - // Inject the distinctive phrase into the system prompt by appending it to - // the default Claude Code preset. Claude Code's real plan mode uses an - // injected system-reminder; in SDK tests we use systemPrompt.append which - // the model treats as equally authoritative. + // In real plan mode Claude Code injects a system-reminder; in SDK tests we + // use systemPrompt.append which the model treats as equally authoritative. const reminderAppend = omitPlanModeReminder ? '' : `\n\n\n${PLAN_MODE_REMINDER}. This supercedes any other instructions you have received.\n\n`; @@ -100,9 +103,13 @@ export async function runPlanModeHandshakeTest(opts: { if (firstAskIndex === -1) firstAskIndex = toolIndex; askUserQuestions.push({ input, orderIndex: toolIndex }); toolIndex++; - // Auto-answer with the label the test specified. + // Auto-answer the FIRST question with the configured substring; for + // later questions, pick the first option to keep the run short. const q = (input.questions as Array<{ question: string; options: Array<{ label: string }> }>)[0]; - const matched = q.options.find((o) => o.label.includes(answerLabel)); + const isFirst = askUserQuestions.length === 1; + const matched = isFirst + ? q.options.find((o) => o.label.toLowerCase().includes(firstAnswerSubstring.toLowerCase())) + : undefined; const answer = matched ? matched.label : q.options[0]!.label; return { behavior: 'allow', @@ -121,7 +128,17 @@ export async function runPlanModeHandshakeTest(opts: { firstAskIndex > 0 && toolOrder.slice(0, firstAskIndex).some((t) => t === 'Write' || t === 'Edit'); - return { sdkResult, askUserQuestions, toolOrder, writeOrEditBeforeAsk }; + const exitPlanModeBeforeAsk = + firstAskIndex > 0 && + toolOrder.slice(0, firstAskIndex).some((t) => t === 'ExitPlanMode'); + + return { + sdkResult, + askUserQuestions, + toolOrder, + writeOrEditBeforeAsk, + exitPlanModeBeforeAsk, + }; } finally { try { fs.rmSync(workingDir, { recursive: true, force: true }); @@ -129,38 +146,31 @@ export async function runPlanModeHandshakeTest(opts: { } } -/** Assert the shape of a fired handshake AskUserQuestion. */ -export function assertHandshakeShape( +/** + * Assert a captured AskUserQuestion is NOT the old vestigial handshake + * (A=exit-and-rerun / C=cancel). The handshake is gone — if a test ever sees + * one again, that's the regression we're guarding against. + */ +export function assertNotHandshakeShape( aq: { input: Record }, ): void { const questions = aq.input.questions as Array<{ question: string; options: Array<{ label: string }>; }>; - expect(questions).toBeDefined(); - expect(questions.length).toBe(1); + if (!questions || questions.length === 0) return; const q = questions[0]!; - // D8 dropped Option B; handshake has exactly 2 options. - expect(q.options.length).toBe(2); - const labels = q.options.map((o) => o.label); - expect(labels.some((l) => l.includes('Exit'))).toBe(true); - expect(labels.some((l) => l.includes('Cancel'))).toBe(true); -} - -/** Read the skill-usage.jsonl log and return handshake entries. */ -export function readHandshakeLog(): Array> { - const logPath = path.join(os.homedir(), '.gstack', 'analytics', 'skill-usage.jsonl'); - if (!fs.existsSync(logPath)) return []; - const lines = fs.readFileSync(logPath, 'utf-8').split('\n').filter(Boolean); - return lines - .map((line) => { - try { - return JSON.parse(line); - } catch { - return null; - } - }) - .filter((x): x is Record => x !== null && x.event === 'plan_mode_handshake'); + const labels = q.options.map((o) => o.label.toLowerCase()); + const looksLikeHandshake = + labels.some((l) => l.includes('exit') && l.includes('rerun')) && + labels.some((l) => l.includes('cancel')); + if (looksLikeHandshake) { + throw new Error( + `First AskUserQuestion looks like the vestigial plan-mode handshake ` + + `(options: ${labels.join(', ')}). The handshake was removed; skills ` + + `should go straight to their Step 0 question in plan mode.`, + ); + } } export { execSync }; diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts index d039e771..abc76023 100644 --- a/test/helpers/touchfiles.ts +++ b/test/helpers/touchfiles.ts @@ -82,16 +82,16 @@ export const E2E_TOUCHFILES: Record = { 'plan-eng-review-artifact': ['plan-eng-review/**'], 'plan-review-report': ['plan-eng-review/**', 'scripts/gen-skill-docs.ts'], - // Plan-mode handshake (v1.10.2.0) — gate-tier safety regression tests. - // Each fires when any of: the interactive skill's template, the resolver, - // preamble composition, the Agent SDK harness, the question registry, or - // the one-way-door classifier changes. - 'plan-ceo-review-plan-mode': ['plan-ceo-review/**', 'scripts/resolvers/preamble/generate-plan-mode-handshake.ts', 'scripts/resolvers/preamble.ts', 'scripts/question-registry.ts', 'scripts/one-way-doors.ts', 'test/helpers/agent-sdk-runner.ts'], - 'plan-eng-review-plan-mode': ['plan-eng-review/**', 'scripts/resolvers/preamble/generate-plan-mode-handshake.ts', 'scripts/resolvers/preamble.ts', 'scripts/question-registry.ts', 'scripts/one-way-doors.ts', 'test/helpers/agent-sdk-runner.ts'], - 'plan-design-review-plan-mode-handshake': ['plan-design-review/**', 'scripts/resolvers/preamble/generate-plan-mode-handshake.ts', 'scripts/resolvers/preamble.ts', 'scripts/question-registry.ts', 'scripts/one-way-doors.ts', 'test/helpers/agent-sdk-runner.ts'], - 'plan-devex-review-plan-mode': ['plan-devex-review/**', 'scripts/resolvers/preamble/generate-plan-mode-handshake.ts', 'scripts/resolvers/preamble.ts', 'scripts/question-registry.ts', 'scripts/one-way-doors.ts', 'test/helpers/agent-sdk-runner.ts'], - 'plan-mode-no-op': ['plan-ceo-review/**', 'scripts/resolvers/preamble/generate-plan-mode-handshake.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/agent-sdk-runner.ts'], - 'e2e-harness-audit': ['plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**', 'plan-devex-review/**', 'scripts/resolvers/preamble/generate-plan-mode-handshake.ts', 'test/helpers/agent-sdk-runner.ts'], + // Plan-mode smoke tests — gate-tier safety regression tests. Each fires when + // any of: the interactive skill's template, the plan-mode resolver + // (completion-status now owns generatePlanModeInfo), preamble composition, + // the Agent SDK harness, or the shared plan-mode-helpers change. + 'plan-ceo-review-plan-mode': ['plan-ceo-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/plan-mode-helpers.ts'], + 'plan-eng-review-plan-mode': ['plan-eng-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/plan-mode-helpers.ts'], + 'plan-design-review-plan-mode': ['plan-design-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/plan-mode-helpers.ts'], + 'plan-devex-review-plan-mode': ['plan-devex-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/plan-mode-helpers.ts'], + 'plan-mode-no-op': ['plan-ceo-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'scripts/resolvers/preamble.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/plan-mode-helpers.ts'], + 'e2e-harness-audit': ['plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**', 'plan-devex-review/**', 'scripts/resolvers/preamble/generate-completion-status.ts', 'test/helpers/agent-sdk-runner.ts', 'test/helpers/plan-mode-helpers.ts'], 'brain-privacy-gate': ['scripts/resolvers/preamble/generate-brain-sync-block.ts', 'scripts/resolvers/preamble.ts', 'bin/gstack-brain-sync', 'bin/gstack-brain-init', 'bin/gstack-config', 'test/helpers/agent-sdk-runner.ts'], // AskUserQuestion format regression (RECOMMENDATION + Completeness: N/10) @@ -332,7 +332,7 @@ export const E2E_TIERS: Record = { // Plan-mode handshake — deterministic safety regression, gate-tier 'plan-ceo-review-plan-mode': 'gate', 'plan-eng-review-plan-mode': 'gate', - 'plan-design-review-plan-mode-handshake': 'gate', + 'plan-design-review-plan-mode': 'gate', 'plan-devex-review-plan-mode': 'gate', 'plan-mode-no-op': 'gate', 'e2e-harness-audit': 'gate', diff --git a/test/skill-e2e-plan-ceo-plan-mode.test.ts b/test/skill-e2e-plan-ceo-plan-mode.test.ts index 858e07eb..8914967e 100644 --- a/test/skill-e2e-plan-ceo-plan-mode.test.ts +++ b/test/skill-e2e-plan-ceo-plan-mode.test.ts @@ -1,40 +1,38 @@ /** - * plan-ceo-review plan-mode handshake E2E (gate tier, paid). + * plan-ceo-review plan-mode smoke test (gate tier, paid). * * Asserts: when /plan-ceo-review is invoked with the plan-mode distinctive - * phrase in the system reminder, the skill fires AskUserQuestion FIRST - * (before any Write or Edit), the question has exactly 2 options (A exit, - * C cancel), picking "Exit" leads to an orderly exit with no plan file - * written. + * phrase in the system reminder, the skill goes STRAIGHT to its Step 0 + * scope-mode AskUserQuestion. Specifically: + * 1. First AskUserQuestion is NOT the old vestigial handshake + * (A=exit-and-rerun / C=cancel). + * 2. No Write or Edit tool fires before the first AskUserQuestion + * (catches silent plan-file-write bypass). + * 3. ExitPlanMode does not fire before the first AskUserQuestion. * * Cost: ~$0.50–$1.00 per run. Gated: EVALS=1 EVALS_TIER=gate. - * Depends on: scripts/resolvers/preamble/generate-plan-mode-handshake.ts, - * test/helpers/agent-sdk-runner.ts (canUseTool extension). */ import { describe, test, expect } from 'bun:test'; import { - runPlanModeHandshakeTest, - assertHandshakeShape, -} from './helpers/plan-mode-handshake-helpers'; + runPlanModeSkillTest, + assertNotHandshakeShape, +} from './helpers/plan-mode-helpers'; const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'gate'; const describeE2E = shouldRun ? describe : describe.skip; -describeE2E('plan-ceo-review plan-mode handshake (gate)', () => { - test('handshake fires before any Write/Edit when plan mode is detected', async () => { - const result = await runPlanModeHandshakeTest({ +describeE2E('plan-ceo-review plan-mode smoke (gate)', () => { + test('goes straight to scope-mode question, no handshake, no silent writes', async () => { + const result = await runPlanModeSkillTest({ skillName: 'plan-ceo-review', - answerLabel: 'Exit', + // Step 0 asks for review mode; HOLD is the cheapest, most-neutral answer. + firstAnswerSubstring: 'HOLD', }); - // Handshake must have fired at least once. expect(result.askUserQuestions.length).toBeGreaterThanOrEqual(1); - // Critically: no Write or Edit fired before the first AskUserQuestion. - // This is the bug v1.10.2.0 fixes — plan mode used to allow silent - // plan-file writes without any interactive gate. + assertNotHandshakeShape(result.askUserQuestions[0]!); expect(result.writeOrEditBeforeAsk).toBe(false); - // Handshake shape: 2 options (Exit/Cancel), Option B dropped per D8. - assertHandshakeShape(result.askUserQuestions[0]!); + expect(result.exitPlanModeBeforeAsk).toBe(false); }, 120_000); }); diff --git a/test/skill-e2e-plan-design-plan-mode.test.ts b/test/skill-e2e-plan-design-plan-mode.test.ts index 1fb7aaf5..98d050e1 100644 --- a/test/skill-e2e-plan-design-plan-mode.test.ts +++ b/test/skill-e2e-plan-design-plan-mode.test.ts @@ -1,28 +1,31 @@ /** - * plan-design-review plan-mode handshake E2E (gate tier, paid). + * plan-design-review plan-mode smoke test (gate tier, paid). * * See test/skill-e2e-plan-ceo-plan-mode.test.ts for the shared assertion - * contract. This file exercises the same handshake against /plan-design-review. + * contract. Exercises the same assertions against /plan-design-review. */ import { describe, test, expect } from 'bun:test'; import { - runPlanModeHandshakeTest, - assertHandshakeShape, -} from './helpers/plan-mode-handshake-helpers'; + runPlanModeSkillTest, + assertNotHandshakeShape, +} from './helpers/plan-mode-helpers'; const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'gate'; const describeE2E = shouldRun ? describe : describe.skip; -describeE2E('plan-design-review plan-mode handshake (gate)', () => { - test('handshake fires before any Write/Edit when plan mode is detected', async () => { - const result = await runPlanModeHandshakeTest({ +describeE2E('plan-design-review plan-mode smoke (gate)', () => { + test('goes straight to first design question, no handshake, no silent writes', async () => { + const result = await runPlanModeSkillTest({ skillName: 'plan-design-review', - answerLabel: 'Cancel', // exercise the C-cancel branch instead of A-exit + // First question for design review varies; pick any reasonable match. + // The substring match falls back to the first option if no match. + firstAnswerSubstring: '7', }); expect(result.askUserQuestions.length).toBeGreaterThanOrEqual(1); + assertNotHandshakeShape(result.askUserQuestions[0]!); expect(result.writeOrEditBeforeAsk).toBe(false); - assertHandshakeShape(result.askUserQuestions[0]!); + expect(result.exitPlanModeBeforeAsk).toBe(false); }, 120_000); }); diff --git a/test/skill-e2e-plan-devex-plan-mode.test.ts b/test/skill-e2e-plan-devex-plan-mode.test.ts index 2ede50e2..64bc447e 100644 --- a/test/skill-e2e-plan-devex-plan-mode.test.ts +++ b/test/skill-e2e-plan-devex-plan-mode.test.ts @@ -1,28 +1,30 @@ /** - * plan-devex-review plan-mode handshake E2E (gate tier, paid). + * plan-devex-review plan-mode smoke test (gate tier, paid). * * See test/skill-e2e-plan-ceo-plan-mode.test.ts for the shared assertion - * contract. This file exercises the same handshake against /plan-devex-review. + * contract. Exercises the same assertions against /plan-devex-review. */ import { describe, test, expect } from 'bun:test'; import { - runPlanModeHandshakeTest, - assertHandshakeShape, -} from './helpers/plan-mode-handshake-helpers'; + runPlanModeSkillTest, + assertNotHandshakeShape, +} from './helpers/plan-mode-helpers'; const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'gate'; const describeE2E = shouldRun ? describe : describe.skip; -describeE2E('plan-devex-review plan-mode handshake (gate)', () => { - test('handshake fires before any Write/Edit when plan mode is detected', async () => { - const result = await runPlanModeHandshakeTest({ +describeE2E('plan-devex-review plan-mode smoke (gate)', () => { + test('goes straight to DX-mode question, no handshake, no silent writes', async () => { + const result = await runPlanModeSkillTest({ skillName: 'plan-devex-review', - answerLabel: 'Exit', + // Step 0 asks for DX review mode; TRIAGE is the lightest-weight mode. + firstAnswerSubstring: 'TRIAGE', }); expect(result.askUserQuestions.length).toBeGreaterThanOrEqual(1); + assertNotHandshakeShape(result.askUserQuestions[0]!); expect(result.writeOrEditBeforeAsk).toBe(false); - assertHandshakeShape(result.askUserQuestions[0]!); + expect(result.exitPlanModeBeforeAsk).toBe(false); }, 120_000); }); diff --git a/test/skill-e2e-plan-eng-plan-mode.test.ts b/test/skill-e2e-plan-eng-plan-mode.test.ts index 16da9d7a..c57bbee4 100644 --- a/test/skill-e2e-plan-eng-plan-mode.test.ts +++ b/test/skill-e2e-plan-eng-plan-mode.test.ts @@ -1,28 +1,29 @@ /** - * plan-eng-review plan-mode handshake E2E (gate tier, paid). + * plan-eng-review plan-mode smoke test (gate tier, paid). * * See test/skill-e2e-plan-ceo-plan-mode.test.ts for the shared assertion - * contract. This file exercises the same handshake against /plan-eng-review. + * contract. This file exercises the same assertions against /plan-eng-review. */ import { describe, test, expect } from 'bun:test'; import { - runPlanModeHandshakeTest, - assertHandshakeShape, -} from './helpers/plan-mode-handshake-helpers'; + runPlanModeSkillTest, + assertNotHandshakeShape, +} from './helpers/plan-mode-helpers'; const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'gate'; const describeE2E = shouldRun ? describe : describe.skip; -describeE2E('plan-eng-review plan-mode handshake (gate)', () => { - test('handshake fires before any Write/Edit when plan mode is detected', async () => { - const result = await runPlanModeHandshakeTest({ +describeE2E('plan-eng-review plan-mode smoke (gate)', () => { + test('goes straight to scope-mode question, no handshake, no silent writes', async () => { + const result = await runPlanModeSkillTest({ skillName: 'plan-eng-review', - answerLabel: 'Exit', + firstAnswerSubstring: 'HOLD', }); expect(result.askUserQuestions.length).toBeGreaterThanOrEqual(1); + assertNotHandshakeShape(result.askUserQuestions[0]!); expect(result.writeOrEditBeforeAsk).toBe(false); - assertHandshakeShape(result.askUserQuestions[0]!); + expect(result.exitPlanModeBeforeAsk).toBe(false); }, 120_000); }); diff --git a/test/skill-e2e-plan-mode-no-op.test.ts b/test/skill-e2e-plan-mode-no-op.test.ts index e222fbff..061e9cd8 100644 --- a/test/skill-e2e-plan-mode-no-op.test.ts +++ b/test/skill-e2e-plan-mode-no-op.test.ts @@ -1,41 +1,45 @@ /** - * Plan-mode handshake negative regression (gate tier, paid). + * Plan-mode-info no-op regression (gate tier, paid). * * Asserts: when /plan-ceo-review is invoked WITHOUT the plan-mode distinctive - * phrase in the system reminder, the handshake does NOT fire. The skill - * should proceed to its normal Step 0 flow. This is the REGRESSION RULE - * guardrail — the handshake must be a no-op outside plan mode or it breaks - * every existing interactive-review session. + * phrase in the system reminder, the plan-mode-info preamble section is a + * no-op. The skill should proceed to its normal Step 0 flow with no + * AskUserQuestion echoing or referencing the plan-mode reminder text. + * + * This guardrails the "outside plan mode, this block doesn't interfere" + * case — a different coverage case from the per-skill in-plan-mode smokes. + * If the plan-mode-info section ever starts misfiring for non-plan-mode + * sessions, this test catches it. * * Cost: ~$0.50 per run. Gated: EVALS=1 EVALS_TIER=gate. */ import { describe, test, expect } from 'bun:test'; import { - runPlanModeHandshakeTest, + runPlanModeSkillTest, PLAN_MODE_REMINDER, -} from './helpers/plan-mode-handshake-helpers'; +} from './helpers/plan-mode-helpers'; const shouldRun = !!process.env.EVALS && process.env.EVALS_TIER === 'gate'; const describeE2E = shouldRun ? describe : describe.skip; -describeE2E('plan-mode handshake no-op outside plan mode (gate regression)', () => { - test('handshake does NOT fire when distinctive phrase is absent', async () => { - const result = await runPlanModeHandshakeTest({ +describeE2E('plan-mode-info no-op outside plan mode (gate regression)', () => { + test('no AskUserQuestion echoes the plan-mode reminder when absent', async () => { + const result = await runPlanModeSkillTest({ skillName: 'plan-ceo-review', - answerLabel: 'Exit', // ignored — handshake should never fire + firstAnswerSubstring: 'HOLD', omitPlanModeReminder: true, - maxTurns: 3, // enough to see Step 0 start, but bounded + maxTurns: 3, }); - // The handshake AskUserQuestion should NOT have fired during Step 0 entry. - // Other AskUserQuestions may fire later in the skill (e.g., Step 0C-bis), - // but they will NOT have the handshake's question text. + // Skill should still hit Step 0 normally outside plan mode. + expect(result.askUserQuestions.length).toBeGreaterThanOrEqual(1); + + // No AskUserQuestion should echo the plan-mode distinctive phrase. + // If one does, the plan-mode-info section is leaking outside plan mode. for (const aq of result.askUserQuestions) { const questions = aq.input.questions as Array<{ question: string }>; for (const q of questions) { - // The handshake's question mentions the distinctive phrase in its - // prose; a non-handshake AskUserQuestion won't. expect(q.question).not.toContain(PLAN_MODE_REMINDER); } }