diff --git a/design-review/SKILL.md b/design-review/SKILL.md
index b87c509d..bac6b640 100644
--- a/design-review/SKILL.md
+++ b/design-review/SKILL.md
@@ -669,7 +669,7 @@ ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null
**If test framework detected** (config files or test directories found):
Print "Test framework detected: {name} ({N} existing tests). Skipping bootstrap."
Read 2-3 existing test files to learn conventions (naming, imports, assertion style, setup patterns).
-Store conventions as prose context for use in Phase 8e.5 or Step 3.4. **Skip the rest of bootstrap.**
+Store conventions as prose context for use in Phase 8e.5 or Step 7. **Skip the rest of bootstrap.**
**If BOOTSTRAP_DECLINED** appears: Print "Test bootstrap previously declined — skipping." **Skip the rest of bootstrap.**
diff --git a/qa/SKILL.md b/qa/SKILL.md
index edb475c9..657e0fe7 100644
--- a/qa/SKILL.md
+++ b/qa/SKILL.md
@@ -711,7 +711,7 @@ ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null
**If test framework detected** (config files or test directories found):
Print "Test framework detected: {name} ({N} existing tests). Skipping bootstrap."
Read 2-3 existing test files to learn conventions (naming, imports, assertion style, setup patterns).
-Store conventions as prose context for use in Phase 8e.5 or Step 3.4. **Skip the rest of bootstrap.**
+Store conventions as prose context for use in Phase 8e.5 or Step 7. **Skip the rest of bootstrap.**
**If BOOTSTRAP_DECLINED** appears: Print "Test bootstrap previously declined — skipping." **Skip the rest of bootstrap.**
diff --git a/scripts/resolvers/review-army.ts b/scripts/resolvers/review-army.ts
index 1240b839..516ce3c8 100644
--- a/scripts/resolvers/review-army.ts
+++ b/scripts/resolvers/review-army.ts
@@ -13,8 +13,8 @@ import type { TemplateContext } from './types';
function generateSpecialistSelection(ctx: TemplateContext): string {
const isShip = ctx.skillName === 'ship';
- const stepSel = isShip ? '3.55' : '4.5';
- const stepMerge = isShip ? '3.56' : '4.6';
+ const stepSel = isShip ? '9.1' : '4.5';
+ const stepMerge = isShip ? '9.2' : '4.6';
const nextStep = isShip ? 'the Fix-First flow (item 4)' : 'Step 5';
return `## Step ${stepSel}: Review Army — Specialist Dispatch
@@ -134,10 +134,10 @@ CHECKLIST:
function generateFindingsMerge(ctx: TemplateContext): string {
const isShip = ctx.skillName === 'ship';
- const stepMerge = isShip ? '3.56' : '4.6';
- const stepSel = isShip ? '3.55' : '4.5';
+ const stepMerge = isShip ? '9.2' : '4.6';
+ const stepSel = isShip ? '9.1' : '4.5';
const fixFirstRef = isShip ? 'the Fix-First flow (item 4)' : 'Step 5 Fix-First';
- const critPassRef = isShip ? 'the checklist pass (Step 3.5)' : 'the CRITICAL pass findings from Step 4';
+ const critPassRef = isShip ? 'the checklist pass (Step 9)' : 'the CRITICAL pass findings from Step 4';
const persistRef = isShip ? 'the review-log persist' : 'the review-log entry in Step 5.8';
return `### Step ${stepMerge}: Collect and merge findings
@@ -202,7 +202,7 @@ Remember these stats — you will need them for the review-log entry in Step 5.8
function generateRedTeam(ctx: TemplateContext): string {
const isShip = ctx.skillName === 'ship';
- const stepMerge = isShip ? '3.56' : '4.6';
+ const stepMerge = isShip ? '9.2' : '4.6';
const fixFirstRef = isShip ? 'the Fix-First flow (item 4)' : 'Step 5 Fix-First';
return `### Red Team dispatch (conditional)
diff --git a/scripts/resolvers/review.ts b/scripts/resolvers/review.ts
index cbc8053c..57c5596c 100644
--- a/scripts/resolvers/review.ts
+++ b/scripts/resolvers/review.ts
@@ -368,7 +368,7 @@ If A: revise the premise and note the revision. If B: proceed (and note that the
export function generateScopeDrift(ctx: TemplateContext): string {
const isShip = ctx.skillName === 'ship';
- const stepNum = isShip ? '3.48' : '1.5';
+ const stepNum = isShip ? '8.2' : '1.5';
return `## Step ${stepNum}: Scope Drift Detection
@@ -413,7 +413,7 @@ export function generateAdversarialStep(ctx: TemplateContext): string {
if (ctx.host === 'codex') return '';
const isShip = ctx.skillName === 'ship';
- const stepNum = isShip ? '3.8' : '5.7';
+ const stepNum = isShip ? '11' : '5.7';
return `## Step ${stepNum}: Adversarial review (always-on)
@@ -501,7 +501,7 @@ A) Investigate and fix now (recommended)
B) Continue — review will still complete
\`\`\`
-If A: address the findings${isShip ? '. After fixing, re-run tests (Step 3) since code has changed' : ''}. Re-run \`codex review\` to verify.
+If A: address the findings${isShip ? '. After fixing, re-run tests (Step 5) since code has changed' : ''}. Re-run \`codex review\` to verify.
Read stderr for errors (same error handling as Codex adversarial above).
@@ -917,16 +917,16 @@ export function generatePlanCompletionAuditReview(_ctx: TemplateContext): string
// ─── Plan Verification Execution ──────────────────────────────────────
export function generatePlanVerificationExec(_ctx: TemplateContext): string {
- return `## Step 3.47: Plan Verification
+ return `## Step 8.1: Plan Verification
Automatically verify the plan's testing/verification steps using the \`/qa-only\` skill.
### 1. Check for verification section
-Using the plan file already discovered in Step 3.45, look for a verification section. Match any of these headings: \`## Verification\`, \`## Test plan\`, \`## Testing\`, \`## How to test\`, \`## Manual testing\`, or any section with verification-flavored items (URLs to visit, things to check visually, interactions to test).
+Using the plan file already discovered in Step 8, look for a verification section. Match any of these headings: \`## Verification\`, \`## Test plan\`, \`## Testing\`, \`## How to test\`, \`## Manual testing\`, or any section with verification-flavored items (URLs to visit, things to check visually, interactions to test).
**If no verification section found:** Skip with "No verification steps found in plan — skipping auto-verification."
-**If no plan file was found in Step 3.45:** Skip (already handled).
+**If no plan file was found in Step 8:** Skip (already handled).
### 2. Check for running dev server
@@ -971,7 +971,7 @@ Follow the /qa-only workflow with these modifications:
### 5. Include in PR body
-Add a \`## Verification Results\` section to the PR body (Step 8):
+Add a \`## Verification Results\` section to the PR body (Step 19):
- If verification ran: summary of results (N PASS, M FAIL, K SKIPPED)
- If skipped: reason for skipping (no plan, no server, no verification section)`;
}
@@ -980,9 +980,9 @@ Add a \`## Verification Results\` section to the PR body (Step 8):
export function generateCrossReviewDedup(ctx: TemplateContext): string {
const isShip = ctx.skillName === 'ship';
- const stepNum = isShip ? '3.57' : '5.0';
+ const stepNum = isShip ? '9.3' : '5.0';
const findingsRef = isShip
- ? 'the checklist pass (Step 3.5) and specialist review (Step 3.55-3.56)'
+ ? 'the checklist pass (Step 9) and specialist review (Step 9.1-9.2)'
: 'Step 4 critical pass and Step 4.5-4.6 specialists';
return `### Step ${stepNum}: Cross-review finding dedup
diff --git a/scripts/resolvers/testing.ts b/scripts/resolvers/testing.ts
index da1381c2..f372aee1 100644
--- a/scripts/resolvers/testing.ts
+++ b/scripts/resolvers/testing.ts
@@ -28,7 +28,7 @@ ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null
**If test framework detected** (config files or test directories found):
Print "Test framework detected: {name} ({N} existing tests). Skipping bootstrap."
Read 2-3 existing test files to learn conventions (naming, imports, assertion style, setup patterns).
-Store conventions as prose context for use in Phase 8e.5 or Step 3.4. **Skip the rest of bootstrap.**
+Store conventions as prose context for use in Phase 8e.5 or Step 7. **Skip the rest of bootstrap.**
**If BOOTSTRAP_DECLINED** appears: Print "Test bootstrap previously declined — skipping." **Skip the rest of bootstrap.**
@@ -213,7 +213,7 @@ ls jest.config.* vitest.config.* playwright.config.* cypress.config.* .rspec pyt
ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null
\`\`\`
-3. **If no framework detected:**${mode === 'ship' ? ' falls through to the Test Framework Bootstrap step (Step 2.5) which handles full setup.' : ' still produce the coverage diagram, but skip test generation.'}`);
+3. **If no framework detected:**${mode === 'ship' ? ' falls through to the Test Framework Bootstrap step (Step 4) which handles full setup.' : ' still produce the coverage diagram, but skip test generation.'}`);
// ── Before/after count (ship only) ──
if (mode === 'ship') {
@@ -379,7 +379,7 @@ GAPS: 8 paths need tests (2 need E2E, 1 needs eval)
─────────────────────────────────
\`\`\`
-**Fast path:** All paths covered → "${mode === 'ship' ? 'Step 3.4' : mode === 'review' ? 'Step 4.75' : 'Test review'}: All new code paths have test coverage ✓" Continue.`);
+**Fast path:** All paths covered → "${mode === 'ship' ? 'Step 7' : mode === 'review' ? 'Step 4.75' : 'Test review'}: All new code paths have test coverage ✓" Continue.`);
// ── Mode-specific action section ──
if (mode === 'plan') {
@@ -432,7 +432,7 @@ This file is consumed by \`/qa\` and \`/qa-only\` as primary test input. Include
sections.push(`
**5. Generate tests for uncovered paths:**
-If test framework detected (or bootstrapped in Step 2.5):
+If test framework detected (or bootstrapped in Step 4):
- Prioritize error handlers and edge cases first (happy paths are more likely already tested)
- Read 2-3 existing test files to match conventions exactly
- Generate unit tests. Mock all external dependencies (DB, API, Redis).
@@ -446,7 +446,7 @@ Caps: 30 code paths max, 20 tests generated max (code + user flow combined), 2-m
If no test framework AND user declined bootstrap → diagram only, no generation. Note: "Test generation skipped — no test framework configured."
-**Diff is test-only changes:** Skip Step 3.4 entirely: "No new application code paths to audit."
+**Diff is test-only changes:** Skip Step 7 entirely: "No new application code paths to audit."
**6. After-count and coverage summary:**
diff --git a/scripts/resolvers/utility.ts b/scripts/resolvers/utility.ts
index c3e6d690..83934b07 100644
--- a/scripts/resolvers/utility.ts
+++ b/scripts/resolvers/utility.ts
@@ -373,7 +373,7 @@ export function generateCoAuthorTrailer(ctx: TemplateContext): string {
}
export function generateChangelogWorkflow(_ctx: TemplateContext): string {
- return `## CHANGELOG (auto-generate)
+ return `## Step 13: CHANGELOG (auto-generate)
1. Read \`CHANGELOG.md\` header to know the format.
diff --git a/ship/SKILL.md b/ship/SKILL.md
index f3bfd626..a8b6a75e 100644
--- a/ship/SKILL.md
+++ b/ship/SKILL.md
@@ -602,17 +602,17 @@ You are running the `/ship` workflow. This is a **non-interactive, fully automat
- Merge conflicts that can't be auto-resolved (stop, show conflicts)
- In-branch test failures (pre-existing failures are triaged, not auto-blocking)
- Pre-landing review finds ASK items that need user judgment
-- MINOR or MAJOR version bump needed (ask — see Step 4)
+- MINOR or MAJOR version bump needed (ask — see Step 12)
- Greptile review comments that need user decision (complex fixes, false positives)
-- AI-assessed coverage below minimum threshold (hard gate with user override — see Step 3.4)
-- Plan items NOT DONE with no user override (see Step 3.45)
-- Plan verification failures (see Step 3.47)
-- TODOS.md missing and user wants to create one (ask — see Step 5.5)
-- TODOS.md disorganized and user wants to reorganize (ask — see Step 5.5)
+- AI-assessed coverage below minimum threshold (hard gate with user override — see Step 7)
+- Plan items NOT DONE with no user override (see Step 8)
+- Plan verification failures (see Step 8.1)
+- TODOS.md missing and user wants to create one (ask — see Step 14)
+- TODOS.md disorganized and user wants to reorganize (ask — see Step 14)
**Never stop for:**
- Uncommitted changes (always include them)
-- Version bump choice (auto-pick MICRO or PATCH — see Step 4)
+- Version bump choice (auto-pick MICRO or PATCH — see Step 12)
- CHANGELOG content (auto-generate from diff)
- Commit message approval (auto-commit)
- Multi-file changesets (auto-split into bisectable commits)
@@ -625,9 +625,9 @@ Re-running `/ship` means "run the whole checklist again." Every verification ste
(tests, coverage audit, plan completion, pre-landing review, adversarial review,
VERSION/CHANGELOG check, TODOS, document-release) runs on every invocation.
Only *actions* are idempotent:
-- Step 4: If VERSION already bumped, skip the bump but still read the version
-- Step 7: If already pushed, skip the push command
-- Step 8: If PR exists, update the body instead of creating a new PR
+- Step 12: If VERSION already bumped, skip the bump but still read the version
+- Step 17: If already pushed, skip the push command
+- Step 19: If PR exists, update the body instead of creating a new PR
Never skip a verification step because a prior `/ship` run already performed it.
---
@@ -695,19 +695,19 @@ Display:
If the Eng Review is NOT "CLEAR":
-Print: "No prior eng review found — ship will run its own pre-landing review in Step 3.5."
+Print: "No prior eng review found — ship will run its own pre-landing review in Step 9."
Check diff size: `git diff ...HEAD --stat | tail -1`. If the diff is >200 lines, add: "Note: This is a large diff. Consider running `/plan-eng-review` or `/autoplan` for architecture-level review before shipping."
If CEO Review is missing, mention as informational ("CEO Review not run — recommended for product changes") but do NOT block.
-For Design Review: run `source <(~/.claude/skills/gstack/bin/gstack-diff-scope 2>/dev/null)`. If `SCOPE_FRONTEND=true` and no design review (plan-design-review or design-review-lite) exists in the dashboard, mention: "Design Review not run — this PR changes frontend code. The lite design check will run automatically in Step 3.5, but consider running /design-review for a full visual audit post-implementation." Still never block.
+For Design Review: run `source <(~/.claude/skills/gstack/bin/gstack-diff-scope 2>/dev/null)`. If `SCOPE_FRONTEND=true` and no design review (plan-design-review or design-review-lite) exists in the dashboard, mention: "Design Review not run — this PR changes frontend code. The lite design check will run automatically in Step 9, but consider running /design-review for a full visual audit post-implementation." Still never block.
-Continue to Step 1.5 — do NOT block or ask. Ship runs its own review in Step 3.5.
+Continue to Step 2 — do NOT block or ask. Ship runs its own review in Step 9.
---
-## Step 1.5: Distribution Pipeline Check
+## Step 2: Distribution Pipeline Check
If the diff introduces a new standalone artifact (CLI binary, library package, tool) — not a web
service with existing deployment — verify that a distribution pipeline exists.
@@ -735,7 +735,7 @@ service with existing deployment — verify that a distribution pipeline exists.
---
-## Step 2: Merge the base branch (BEFORE tests)
+## Step 3: Merge the base branch (BEFORE tests)
Fetch and merge the base branch into the feature branch so tests run against the merged state:
@@ -749,7 +749,7 @@ git fetch origin && git merge origin/ --no-edit
---
-## Step 2.5: Test Framework Bootstrap
+## Step 4: Test Framework Bootstrap
## Test Framework Bootstrap
@@ -778,7 +778,7 @@ ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null
**If test framework detected** (config files or test directories found):
Print "Test framework detected: {name} ({N} existing tests). Skipping bootstrap."
Read 2-3 existing test files to learn conventions (naming, imports, assertion style, setup patterns).
-Store conventions as prose context for use in Phase 8e.5 or Step 3.4. **Skip the rest of bootstrap.**
+Store conventions as prose context for use in Phase 8e.5 or Step 7. **Skip the rest of bootstrap.**
**If BOOTSTRAP_DECLINED** appears: Print "Test bootstrap previously declined — skipping." **Skip the rest of bootstrap.**
@@ -907,7 +907,7 @@ Only commit if there are changes. Stage all bootstrap files (config, test direct
---
-## Step 3: Run tests (on merged code)
+## Step 5: Run tests (on merged code)
**Do NOT run `RAILS_ENV=test bin/rails db:migrate`** — `bin/test-lane` already calls
`db:test:prepare` internally, which loads the schema into the correct lane database.
@@ -1029,13 +1029,13 @@ Use AskUserQuestion:
- Continue with the workflow.
- Note in output: "Pre-existing test failure skipped: "
-**After triage:** If any in-branch failures remain unfixed, **STOP**. Do not proceed. If all failures were pre-existing and handled (fixed, TODOed, assigned, or skipped), continue to Step 3.25.
+**After triage:** If any in-branch failures remain unfixed, **STOP**. Do not proceed. If all failures were pre-existing and handled (fixed, TODOed, assigned, or skipped), continue to Step 6.
**If all pass:** Continue silently — just note the counts briefly.
---
-## Step 3.25: Eval Suites (conditional)
+## Step 6: Eval Suites (conditional)
Evals are mandatory when prompt-related files change. Skip this step entirely if no prompt files are in the diff.
@@ -1054,7 +1054,7 @@ Match against these patterns (from CLAUDE.md):
- `config/system_prompts/*.txt`
- `test/evals/**/*` (eval infrastructure changes affect all suites)
-**If no matches:** Print "No prompt-related files changed — skipping evals." and continue to Step 3.5.
+**If no matches:** Print "No prompt-related files changed — skipping evals." and continue to Step 9.
**2. Identify affected eval suites:**
@@ -1084,9 +1084,9 @@ If multiple suites need to run, run them sequentially (each needs a test lane).
**4. Check results:**
- **If any eval fails:** Show the failures, the cost dashboard, and **STOP**. Do not proceed.
-- **If all pass:** Note pass counts and cost. Continue to Step 3.5.
+- **If all pass:** Note pass counts and cost. Continue to Step 9.
-**5. Save eval output** — include eval results and cost dashboard in the PR body (Step 8).
+**5. Save eval output** — include eval results and cost dashboard in the PR body (Step 19).
**Tier reference (for context — /ship always uses `full`):**
| Tier | When | Speed (cached) | Cost |
@@ -1097,7 +1097,7 @@ If multiple suites need to run, run them sequentially (each needs a test lane).
---
-## Step 3.4: Test Coverage Audit
+## Step 7: Test Coverage Audit
100% coverage is the goal — every untested path is a path where bugs hide and vibe coding becomes yolo coding. Evaluate what was ACTUALLY coded (from the diff), not what was planned.
@@ -1121,7 +1121,7 @@ ls jest.config.* vitest.config.* playwright.config.* cypress.config.* .rspec pyt
ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null
```
-3. **If no framework detected:** falls through to the Test Framework Bootstrap step (Step 2.5) which handles full setup.
+3. **If no framework detected:** falls through to the Test Framework Bootstrap step (Step 4) which handles full setup.
**0. Before/after test count:**
@@ -1263,11 +1263,11 @@ GAPS: 8 paths need tests (2 need E2E, 1 needs eval)
─────────────────────────────────
```
-**Fast path:** All paths covered → "Step 3.4: All new code paths have test coverage ✓" Continue.
+**Fast path:** All paths covered → "Step 7: All new code paths have test coverage ✓" Continue.
**5. Generate tests for uncovered paths:**
-If test framework detected (or bootstrapped in Step 2.5):
+If test framework detected (or bootstrapped in Step 4):
- Prioritize error handlers and edge cases first (happy paths are more likely already tested)
- Read 2-3 existing test files to match conventions exactly
- Generate unit tests. Mock all external dependencies (DB, API, Redis).
@@ -1281,7 +1281,7 @@ Caps: 30 code paths max, 20 tests generated max (code + user flow combined), 2-m
If no test framework AND user declined bootstrap → diagram only, no generation. Note: "Test generation skipped — no test framework configured."
-**Diff is test-only changes:** Skip Step 3.4 entirely: "No new application code paths to audit."
+**Diff is test-only changes:** Skip Step 7 entirely: "No new application code paths to audit."
**6. After-count and coverage summary:**
@@ -1359,7 +1359,7 @@ Repo: {owner/repo}
---
-## Step 3.45: Plan Completion Audit
+## Step 8: Plan Completion Audit
### Plan File Discovery
@@ -1480,16 +1480,16 @@ After producing the completion checklist:
---
-## Step 3.47: Plan Verification
+## Step 8.1: Plan Verification
Automatically verify the plan's testing/verification steps using the `/qa-only` skill.
### 1. Check for verification section
-Using the plan file already discovered in Step 3.45, look for a verification section. Match any of these headings: `## Verification`, `## Test plan`, `## Testing`, `## How to test`, `## Manual testing`, or any section with verification-flavored items (URLs to visit, things to check visually, interactions to test).
+Using the plan file already discovered in Step 8, look for a verification section. Match any of these headings: `## Verification`, `## Test plan`, `## Testing`, `## How to test`, `## Manual testing`, or any section with verification-flavored items (URLs to visit, things to check visually, interactions to test).
**If no verification section found:** Skip with "No verification steps found in plan — skipping auto-verification."
-**If no plan file was found in Step 3.45:** Skip (already handled).
+**If no plan file was found in Step 8:** Skip (already handled).
### 2. Check for running dev server
@@ -1534,7 +1534,7 @@ Follow the /qa-only workflow with these modifications:
### 5. Include in PR body
-Add a `## Verification Results` section to the PR body (Step 8):
+Add a `## Verification Results` section to the PR body (Step 19):
- If verification ran: summary of results (N PASS, M FAIL, K SKIPPED)
- If skipped: reason for skipping (no plan, no server, no verification section)
@@ -1576,7 +1576,7 @@ matches a past learning, display:
This makes the compounding visible. The user should see that gstack is getting
smarter on their codebase over time.
-## Step 3.48: Scope Drift Detection
+## Step 8.2: Scope Drift Detection
Before reviewing code quality, check: **did they build what was requested — nothing more, nothing less?**
@@ -1613,7 +1613,7 @@ Before reviewing code quality, check: **did they build what was requested — no
---
-## Step 3.5: Pre-Landing Review
+## Step 9: Pre-Landing Review
Review the diff for structural issues that tests don't catch.
@@ -1708,7 +1708,7 @@ Present Codex output under a `CODEX (design):` header, merged with the checklist
Include any design findings alongside the code review findings. They follow the same Fix-First flow below.
-## Step 3.55: Review Army — Specialist Dispatch
+## Step 9.1: Review Army — Specialist Dispatch
### Detect stack and scope
@@ -1825,7 +1825,7 @@ CHECKLIST:
---
-### Step 3.56: Collect and merge findings
+### Step 9.2: Collect and merge findings
After all specialist subagents complete, collect their outputs.
@@ -1871,7 +1871,7 @@ SPECIALIST REVIEW: N findings (X critical, Y informational) from Z specialists
PR Quality Score: X/10
```
-These findings flow into the Fix-First flow (item 4) alongside the checklist pass (Step 3.5).
+These findings flow into the Fix-First flow (item 4) alongside the checklist pass (Step 9).
The Fix-First heuristic applies identically — specialist findings follow the same AUTO-FIX vs ASK classification.
**Compile per-specialist stats:**
@@ -1895,7 +1895,7 @@ If activated, dispatch one more subagent via the Agent tool (foreground, not bac
The Red Team subagent receives:
1. The red-team checklist from `~/.claude/skills/gstack/review/specialists/red-team.md`
-2. The merged specialist findings from Step 3.56 (so it knows what was already caught)
+2. The merged specialist findings from Step 9.2 (so it knows what was already caught)
3. The git diff command
Prompt: "You are a red team reviewer. The code has already been reviewed by N specialists
@@ -1911,7 +1911,7 @@ the Fix-First flow (item 4). Red Team findings are tagged with `"specialist":"re
If the Red Team returns NO FINDINGS, note: "Red Team review: no additional issues found."
If the Red Team subagent fails or times out, skip silently and continue.
-### Step 3.57: Cross-review finding dedup
+### Step 9.3: Cross-review finding dedup
Before classifying findings, check if any were previously skipped by the user in a prior review on this branch.
@@ -1931,7 +1931,7 @@ If skipped fingerprints exist, get the list of files changed since that review:
git diff --name-only HEAD
```
-For each current finding (from both the checklist pass (Step 3.5) and specialist review (Step 3.55-3.56)), check:
+For each current finding (from both the checklist pass (Step 9) and specialist review (Step 9.1-9.2)), check:
- Does its fingerprint match a previously skipped finding?
- Is the finding's file path NOT in the changed-files set?
@@ -1945,7 +1945,7 @@ If no prior reviews exist or none have a `findings` array, skip this step silent
Output a summary header: `Pre-Landing Review: N issues (X critical, Y informational)`
-4. **Classify each finding from both the checklist pass and specialist review (Step 3.55-3.56) as AUTO-FIX or ASK** per the Fix-First Heuristic in
+4. **Classify each finding from both the checklist pass and specialist review (Step 9.1-Step 9.2) as AUTO-FIX or ASK** per the Fix-First Heuristic in
checklist.md. Critical findings lean toward ASK; informational lean toward AUTO-FIX.
5. **Auto-fix all AUTO-FIX items.** Apply each fix. Output one line per fix:
@@ -1959,7 +1959,7 @@ Output a summary header: `Pre-Landing Review: N issues (X critical, Y informatio
7. **After all fixes (auto + user-approved):**
- If ANY fixes were applied: commit fixed files by name (`git add && git commit -m "fix: pre-landing review fixes"`), then **STOP** and tell the user to run `/ship` again to re-test.
- - If no fixes applied (all ASK items skipped, or no issues found): continue to Step 4.
+ - If no fixes applied (all ASK items skipped, or no issues found): continue to Step 12.
8. Output summary: `Pre-Landing Review: N issues — M auto-fixed, K asked (J fixed, L skipped)`
@@ -1971,19 +1971,19 @@ Output a summary header: `Pre-Landing Review: N issues (X critical, Y informatio
```
Substitute TIMESTAMP (ISO 8601), STATUS ("clean" if no issues, "issues_found" otherwise),
and N values from the summary counts above. The `via:"ship"` distinguishes from standalone `/review` runs.
-- `quality_score` = the PR Quality Score computed in Step 3.56 (e.g., 7.5). If specialists were skipped (small diff), use `10.0`
-- `specialists` = the per-specialist stats object compiled in Step 3.56. Each specialist that was considered gets an entry: `{"dispatched":true/false,"findings":N,"critical":N,"informational":N}` if dispatched, or `{"dispatched":false,"reason":"scope|gated"}` if skipped. Example: `{"testing":{"dispatched":true,"findings":2,"critical":0,"informational":2},"security":{"dispatched":false,"reason":"scope"}}`
+- `quality_score` = the PR Quality Score computed in Step 9.2 (e.g., 7.5). If specialists were skipped (small diff), use `10.0`
+- `specialists` = the per-specialist stats object compiled in Step 9.2. Each specialist that was considered gets an entry: `{"dispatched":true/false,"findings":N,"critical":N,"informational":N}` if dispatched, or `{"dispatched":false,"reason":"scope|gated"}` if skipped. Example: `{"testing":{"dispatched":true,"findings":2,"critical":0,"informational":2},"security":{"dispatched":false,"reason":"scope"}}`
- `findings` = array of per-finding records. For each finding (from checklist pass and specialists), include: `{"fingerprint":"path:line:category","severity":"CRITICAL|INFORMATIONAL","action":"ACTION"}`. ACTION is `"auto-fixed"`, `"fixed"` (user approved), or `"skipped"` (user chose Skip).
-Save the review output — it goes into the PR body in Step 8.
+Save the review output — it goes into the PR body in Step 19.
---
-## Step 3.75: Address Greptile review comments (if PR exists)
+## Step 10: Address Greptile review comments (if PR exists)
Read `.claude/skills/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
-**If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Continue to Step 4.
+**If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Continue to Step 12.
**If Greptile comments are found:**
@@ -2014,11 +2014,11 @@ For each classified comment:
**SUPPRESSED:** Skip silently — these are known false positives from previous triage.
-**After all comments are resolved:** If any fixes were applied, the tests from Step 3 are now stale. **Re-run tests** (Step 3) before continuing to Step 4. If no fixes were applied, continue to Step 4.
+**After all comments are resolved:** If any fixes were applied, the tests from Step 5 are now stale. **Re-run tests** (Step 5) before continuing to Step 12. If no fixes were applied, continue to Step 12.
---
-## Step 3.8: Adversarial review (always-on)
+## Step 11: Adversarial review (always-on)
Every diff gets adversarial review from both Claude and Codex. LOC is not a proxy for risk — a 5-line auth change can be critical.
@@ -2104,7 +2104,7 @@ A) Investigate and fix now (recommended)
B) Continue — review will still complete
```
-If A: address the findings. After fixing, re-run tests (Step 3) since code has changed. Re-run `codex review` to verify.
+If A: address the findings. After fixing, re-run tests (Step 5) since code has changed. Re-run `codex review` to verify.
Read stderr for errors (same error handling as Codex adversarial above).
@@ -2168,7 +2168,7 @@ staleness detection: if those files are later deleted, the learning can be flagg
**Only log genuine discoveries.** Don't log obvious things. Don't log things the user
already knows. A good test: would this insight save time in a future session? If yes, log it.
-## Step 4: Version bump (auto-decide)
+## Step 12: Version bump (auto-decide)
**Idempotency check:** Before bumping, compare VERSION against the base branch.
@@ -2199,7 +2199,7 @@ If output shows `ALREADY_BUMPED`, VERSION was already bumped on this branch (pri
---
-## CHANGELOG (auto-generate)
+## Step 13: CHANGELOG (auto-generate)
1. Read `CHANGELOG.md` header to know the format.
@@ -2243,7 +2243,7 @@ If output shows `ALREADY_BUMPED`, VERSION was already bumped on this branch (pri
---
-## Step 5.5: TODOS.md (auto-update)
+## Step 14: TODOS.md (auto-update)
Cross-reference the project's TODOS.md against the changes being shipped. Mark completed items automatically; prompt only if the file is missing or disorganized.
@@ -2255,7 +2255,7 @@ Read `.claude/skills/review/TODOS-format.md` for the canonical format reference.
- Message: "GStack recommends maintaining a TODOS.md organized by skill/component, then priority (P0 at top through P4, then Completed at bottom). See TODOS-format.md for the full format. Would you like to create one?"
- Options: A) Create it now, B) Skip for now
- If A: Create `TODOS.md` with a skeleton (# TODOS heading + ## Completed section). Continue to step 3.
-- If B: Skip the rest of Step 5.5. Continue to Step 6.
+- If B: Skip the rest of Step 14. Continue to Step 15.
**2. Check structure and organization:**
@@ -2294,11 +2294,11 @@ For each TODO item, check if the changes in this PR complete it by:
**6. Defensive:** If TODOS.md cannot be written (permission error, disk full), warn the user and continue. Never stop the ship workflow for a TODOS failure.
-Save this summary — it goes into the PR body in Step 8.
+Save this summary — it goes into the PR body in Step 19.
---
-## Step 6: Commit (bisectable chunks)
+## Step 15: Commit (bisectable chunks)
**Goal:** Create small, logical commits that work well with `git bisect` and help LLMs understand what changed.
@@ -2336,13 +2336,13 @@ EOF
---
-## Step 6.5: Verification Gate
+## Step 16: Verification Gate
**IRON LAW: NO COMPLETION CLAIMS WITHOUT FRESH VERIFICATION EVIDENCE.**
Before pushing, re-verify if code changed during Steps 4-6:
-1. **Test verification:** If ANY code changed after Step 3's test run (fixes from review findings, CHANGELOG edits don't count), re-run the test suite. Paste fresh output. Stale output from Step 3 is NOT acceptable.
+1. **Test verification:** If ANY code changed after Step 5's test run (fixes from review findings, CHANGELOG edits don't count), re-run the test suite. Paste fresh output. Stale output from Step 5 is NOT acceptable.
2. **Build verification:** If the project has a build step, run it. Paste output.
@@ -2352,13 +2352,13 @@ Before pushing, re-verify if code changed during Steps 4-6:
- "I already tested earlier" → Code changed since then. Test again.
- "It's a trivial change" → Trivial changes break production.
-**If tests fail here:** STOP. Do not push. Fix the issue and return to Step 3.
+**If tests fail here:** STOP. Do not push. Fix the issue and return to Step 5.
Claiming work is complete without verification is dishonesty, not efficiency.
---
-## Step 7: Push
+## Step 17: Push
**Idempotency check:** Check if the branch is already pushed and up to date.
@@ -2370,7 +2370,7 @@ echo "LOCAL: $LOCAL REMOTE: $REMOTE"
[ "$LOCAL" = "$REMOTE" ] && echo "ALREADY_PUSHED" || echo "PUSH_NEEDED"
```
-If `ALREADY_PUSHED`, skip the push but continue to Step 8. Otherwise push with upstream tracking:
+If `ALREADY_PUSHED`, skip the push but continue to Step 19. Otherwise push with upstream tracking:
```bash
git push -u origin
@@ -2378,7 +2378,7 @@ git push -u origin
---
-## Step 8: Create PR/MR
+## Step 19: Create PR/MR
**Idempotency check:** Check if a PR/MR already exists for this branch.
@@ -2392,7 +2392,7 @@ gh pr view --json url,number,state -q 'if .state == "OPEN" then "PR #\(.number):
glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS" else "NO_MR" end' 2>/dev/null || echo "NO_MR"
```
-If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary). Never reuse stale PR body content from a prior run. Print the existing URL and continue to Step 8.5.
+If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary). Never reuse stale PR body content from a prior run. Print the existing URL and continue to Step 18.
If no PR/MR exists: create a pull request (GitHub) or merge request (GitLab) using the platform detected in Step 0.
@@ -2408,11 +2408,11 @@ must appear in at least one section. If a commit's work isn't reflected in the s
you missed it.>
## Test Coverage
-
-
+
+
## Pre-Landing Review
-
+
## Design Review
@@ -2424,19 +2424,19 @@ you missed it.>
## Greptile Review
-
+
## Scope Drift
## Plan Completion
-
+
## Verification Results
-
+
@@ -2474,11 +2474,11 @@ EOF
**If neither CLI is available:**
Print the branch name, remote URL, and instruct the user to create the PR/MR manually via the web UI. Do not stop — the code is pushed and ready.
-**Output the PR/MR URL** — then proceed to Step 8.5.
+**Output the PR/MR URL** — then proceed to Step 18.
---
-## Step 8.5: Auto-invoke /document-release
+## Step 18: Auto-invoke /document-release
After the PR is created, automatically sync project documentation. Read the
`document-release/SKILL.md` skill file (adjacent to this skill's directory) and
@@ -2497,11 +2497,11 @@ execute its full workflow:
This step is automatic. Do not ask the user for confirmation. The goal is zero-friction
doc updates — the user runs `/ship` and documentation stays current without a separate command.
-If Step 8.5 created a docs commit, re-edit the PR/MR body to include the latest commit SHA in the summary. This ensures the PR body reflects the truly final state after document-release.
+If Step 18 created a docs commit, re-edit the PR/MR body to include the latest commit SHA in the summary. This ensures the PR body reflects the truly final state after document-release.
---
-## Step 8.75: Persist ship metrics
+## Step 20: Persist ship metrics
Log coverage and plan completion data so `/retro` can track trends:
@@ -2516,10 +2516,10 @@ echo '{"skill":"ship","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","coverage
```
Substitute from earlier steps:
-- **COVERAGE_PCT**: coverage percentage from Step 3.4 diagram (integer, or -1 if undetermined)
-- **PLAN_TOTAL**: total plan items extracted in Step 3.45 (0 if no plan file)
-- **PLAN_DONE**: count of DONE + CHANGED items from Step 3.45 (0 if no plan file)
-- **VERIFY_RESULT**: "pass", "fail", or "skipped" from Step 3.47
+- **COVERAGE_PCT**: coverage percentage from Step 7 diagram (integer, or -1 if undetermined)
+- **PLAN_TOTAL**: total plan items extracted in Step 8 (0 if no plan file)
+- **PLAN_DONE**: count of DONE + CHANGED items from Step 8 (0 if no plan file)
+- **VERIFY_RESULT**: "pass", "fail", or "skipped" from Step 8.1
- **VERSION**: from the VERSION file
- **BRANCH**: current branch name
@@ -2538,6 +2538,6 @@ This step is automatic — never skip it, never ask for confirmation.
- **Split commits for bisectability** — each commit = one logical change.
- **TODOS.md completion detection must be conservative.** Only mark items as completed when the diff clearly shows the work is done.
- **Use Greptile reply templates from greptile-triage.md.** Every reply includes evidence (inline diff, code references, re-rank suggestion). Never post vague replies.
-- **Never push without fresh verification evidence.** If code changed after Step 3 tests, re-run before pushing.
-- **Step 3.4 generates coverage tests.** They must pass before committing. Never commit failing tests.
+- **Never push without fresh verification evidence.** If code changed after Step 5 tests, re-run before pushing.
+- **Step 7 generates coverage tests.** They must pass before committing. Never commit failing tests.
- **The goal is: user says `/ship`, next thing they see is the review + PR URL + auto-synced docs.**
diff --git a/ship/SKILL.md.tmpl b/ship/SKILL.md.tmpl
index 76e4873d..9de170d7 100644
--- a/ship/SKILL.md.tmpl
+++ b/ship/SKILL.md.tmpl
@@ -34,17 +34,17 @@ You are running the `/ship` workflow. This is a **non-interactive, fully automat
- Merge conflicts that can't be auto-resolved (stop, show conflicts)
- In-branch test failures (pre-existing failures are triaged, not auto-blocking)
- Pre-landing review finds ASK items that need user judgment
-- MINOR or MAJOR version bump needed (ask — see Step 4)
+- MINOR or MAJOR version bump needed (ask — see Step 12)
- Greptile review comments that need user decision (complex fixes, false positives)
-- AI-assessed coverage below minimum threshold (hard gate with user override — see Step 3.4)
-- Plan items NOT DONE with no user override (see Step 3.45)
-- Plan verification failures (see Step 3.47)
-- TODOS.md missing and user wants to create one (ask — see Step 5.5)
-- TODOS.md disorganized and user wants to reorganize (ask — see Step 5.5)
+- AI-assessed coverage below minimum threshold (hard gate with user override — see Step 7)
+- Plan items NOT DONE with no user override (see Step 8)
+- Plan verification failures (see Step 8.1)
+- TODOS.md missing and user wants to create one (ask — see Step 14)
+- TODOS.md disorganized and user wants to reorganize (ask — see Step 14)
**Never stop for:**
- Uncommitted changes (always include them)
-- Version bump choice (auto-pick MICRO or PATCH — see Step 4)
+- Version bump choice (auto-pick MICRO or PATCH — see Step 12)
- CHANGELOG content (auto-generate from diff)
- Commit message approval (auto-commit)
- Multi-file changesets (auto-split into bisectable commits)
@@ -57,9 +57,9 @@ Re-running `/ship` means "run the whole checklist again." Every verification ste
(tests, coverage audit, plan completion, pre-landing review, adversarial review,
VERSION/CHANGELOG check, TODOS, document-release) runs on every invocation.
Only *actions* are idempotent:
-- Step 4: If VERSION already bumped, skip the bump but still read the version
-- Step 7: If already pushed, skip the push command
-- Step 8: If PR exists, update the body instead of creating a new PR
+- Step 12: If VERSION already bumped, skip the bump but still read the version
+- Step 17: If already pushed, skip the push command
+- Step 19: If PR exists, update the body instead of creating a new PR
Never skip a verification step because a prior `/ship` run already performed it.
---
@@ -78,19 +78,19 @@ Never skip a verification step because a prior `/ship` run already performed it.
If the Eng Review is NOT "CLEAR":
-Print: "No prior eng review found — ship will run its own pre-landing review in Step 3.5."
+Print: "No prior eng review found — ship will run its own pre-landing review in Step 9."
Check diff size: `git diff ...HEAD --stat | tail -1`. If the diff is >200 lines, add: "Note: This is a large diff. Consider running `/plan-eng-review` or `/autoplan` for architecture-level review before shipping."
If CEO Review is missing, mention as informational ("CEO Review not run — recommended for product changes") but do NOT block.
-For Design Review: run `source <(~/.claude/skills/gstack/bin/gstack-diff-scope 2>/dev/null)`. If `SCOPE_FRONTEND=true` and no design review (plan-design-review or design-review-lite) exists in the dashboard, mention: "Design Review not run — this PR changes frontend code. The lite design check will run automatically in Step 3.5, but consider running /design-review for a full visual audit post-implementation." Still never block.
+For Design Review: run `source <(~/.claude/skills/gstack/bin/gstack-diff-scope 2>/dev/null)`. If `SCOPE_FRONTEND=true` and no design review (plan-design-review or design-review-lite) exists in the dashboard, mention: "Design Review not run — this PR changes frontend code. The lite design check will run automatically in Step 9, but consider running /design-review for a full visual audit post-implementation." Still never block.
-Continue to Step 1.5 — do NOT block or ask. Ship runs its own review in Step 3.5.
+Continue to Step 2 — do NOT block or ask. Ship runs its own review in Step 9.
---
-## Step 1.5: Distribution Pipeline Check
+## Step 2: Distribution Pipeline Check
If the diff introduces a new standalone artifact (CLI binary, library package, tool) — not a web
service with existing deployment — verify that a distribution pipeline exists.
@@ -118,7 +118,7 @@ service with existing deployment — verify that a distribution pipeline exists.
---
-## Step 2: Merge the base branch (BEFORE tests)
+## Step 3: Merge the base branch (BEFORE tests)
Fetch and merge the base branch into the feature branch so tests run against the merged state:
@@ -132,13 +132,13 @@ git fetch origin && git merge origin/ --no-edit
---
-## Step 2.5: Test Framework Bootstrap
+## Step 4: Test Framework Bootstrap
{{TEST_BOOTSTRAP}}
---
-## Step 3: Run tests (on merged code)
+## Step 5: Run tests (on merged code)
**Do NOT run `RAILS_ENV=test bin/rails db:migrate`** — `bin/test-lane` already calls
`db:test:prepare` internally, which loads the schema into the correct lane database.
@@ -158,13 +158,13 @@ After both complete, read the output files and check pass/fail.
{{TEST_FAILURE_TRIAGE}}
-**After triage:** If any in-branch failures remain unfixed, **STOP**. Do not proceed. If all failures were pre-existing and handled (fixed, TODOed, assigned, or skipped), continue to Step 3.25.
+**After triage:** If any in-branch failures remain unfixed, **STOP**. Do not proceed. If all failures were pre-existing and handled (fixed, TODOed, assigned, or skipped), continue to Step 6.
**If all pass:** Continue silently — just note the counts briefly.
---
-## Step 3.25: Eval Suites (conditional)
+## Step 6: Eval Suites (conditional)
Evals are mandatory when prompt-related files change. Skip this step entirely if no prompt files are in the diff.
@@ -183,7 +183,7 @@ Match against these patterns (from CLAUDE.md):
- `config/system_prompts/*.txt`
- `test/evals/**/*` (eval infrastructure changes affect all suites)
-**If no matches:** Print "No prompt-related files changed — skipping evals." and continue to Step 3.5.
+**If no matches:** Print "No prompt-related files changed — skipping evals." and continue to Step 9.
**2. Identify affected eval suites:**
@@ -213,9 +213,9 @@ If multiple suites need to run, run them sequentially (each needs a test lane).
**4. Check results:**
- **If any eval fails:** Show the failures, the cost dashboard, and **STOP**. Do not proceed.
-- **If all pass:** Note pass counts and cost. Continue to Step 3.5.
+- **If all pass:** Note pass counts and cost. Continue to Step 9.
-**5. Save eval output** — include eval results and cost dashboard in the PR body (Step 8).
+**5. Save eval output** — include eval results and cost dashboard in the PR body (Step 19).
**Tier reference (for context — /ship always uses `full`):**
| Tier | When | Speed (cached) | Cost |
@@ -226,13 +226,13 @@ If multiple suites need to run, run them sequentially (each needs a test lane).
---
-## Step 3.4: Test Coverage Audit
+## Step 7: Test Coverage Audit
{{TEST_COVERAGE_AUDIT_SHIP}}
---
-## Step 3.45: Plan Completion Audit
+## Step 8: Plan Completion Audit
{{PLAN_COMPLETION_AUDIT_SHIP}}
@@ -246,7 +246,7 @@ If multiple suites need to run, run them sequentially (each needs a test lane).
---
-## Step 3.5: Pre-Landing Review
+## Step 9: Pre-Landing Review
Review the diff for structural issues that tests don't catch.
@@ -268,7 +268,7 @@ Review the diff for structural issues that tests don't catch.
{{CROSS_REVIEW_DEDUP}}
-4. **Classify each finding from both the checklist pass and specialist review (Step 3.55-3.56) as AUTO-FIX or ASK** per the Fix-First Heuristic in
+4. **Classify each finding from both the checklist pass and specialist review (Step 9.1-Step 9.2) as AUTO-FIX or ASK** per the Fix-First Heuristic in
checklist.md. Critical findings lean toward ASK; informational lean toward AUTO-FIX.
5. **Auto-fix all AUTO-FIX items.** Apply each fix. Output one line per fix:
@@ -282,7 +282,7 @@ Review the diff for structural issues that tests don't catch.
7. **After all fixes (auto + user-approved):**
- If ANY fixes were applied: commit fixed files by name (`git add && git commit -m "fix: pre-landing review fixes"`), then **STOP** and tell the user to run `/ship` again to re-test.
- - If no fixes applied (all ASK items skipped, or no issues found): continue to Step 4.
+ - If no fixes applied (all ASK items skipped, or no issues found): continue to Step 12.
8. Output summary: `Pre-Landing Review: N issues — M auto-fixed, K asked (J fixed, L skipped)`
@@ -294,19 +294,19 @@ Review the diff for structural issues that tests don't catch.
```
Substitute TIMESTAMP (ISO 8601), STATUS ("clean" if no issues, "issues_found" otherwise),
and N values from the summary counts above. The `via:"ship"` distinguishes from standalone `/review` runs.
-- `quality_score` = the PR Quality Score computed in Step 3.56 (e.g., 7.5). If specialists were skipped (small diff), use `10.0`
-- `specialists` = the per-specialist stats object compiled in Step 3.56. Each specialist that was considered gets an entry: `{"dispatched":true/false,"findings":N,"critical":N,"informational":N}` if dispatched, or `{"dispatched":false,"reason":"scope|gated"}` if skipped. Example: `{"testing":{"dispatched":true,"findings":2,"critical":0,"informational":2},"security":{"dispatched":false,"reason":"scope"}}`
+- `quality_score` = the PR Quality Score computed in Step 9.2 (e.g., 7.5). If specialists were skipped (small diff), use `10.0`
+- `specialists` = the per-specialist stats object compiled in Step 9.2. Each specialist that was considered gets an entry: `{"dispatched":true/false,"findings":N,"critical":N,"informational":N}` if dispatched, or `{"dispatched":false,"reason":"scope|gated"}` if skipped. Example: `{"testing":{"dispatched":true,"findings":2,"critical":0,"informational":2},"security":{"dispatched":false,"reason":"scope"}}`
- `findings` = array of per-finding records. For each finding (from checklist pass and specialists), include: `{"fingerprint":"path:line:category","severity":"CRITICAL|INFORMATIONAL","action":"ACTION"}`. ACTION is `"auto-fixed"`, `"fixed"` (user approved), or `"skipped"` (user chose Skip).
-Save the review output — it goes into the PR body in Step 8.
+Save the review output — it goes into the PR body in Step 19.
---
-## Step 3.75: Address Greptile review comments (if PR exists)
+## Step 10: Address Greptile review comments (if PR exists)
Read `.claude/skills/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
-**If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Continue to Step 4.
+**If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Continue to Step 12.
**If Greptile comments are found:**
@@ -337,7 +337,7 @@ For each classified comment:
**SUPPRESSED:** Skip silently — these are known false positives from previous triage.
-**After all comments are resolved:** If any fixes were applied, the tests from Step 3 are now stale. **Re-run tests** (Step 3) before continuing to Step 4. If no fixes were applied, continue to Step 4.
+**After all comments are resolved:** If any fixes were applied, the tests from Step 5 are now stale. **Re-run tests** (Step 5) before continuing to Step 12. If no fixes were applied, continue to Step 12.
---
@@ -345,7 +345,7 @@ For each classified comment:
{{LEARNINGS_LOG}}
-## Step 4: Version bump (auto-decide)
+## Step 12: Version bump (auto-decide)
**Idempotency check:** Before bumping, compare VERSION against the base branch.
@@ -380,7 +380,7 @@ If output shows `ALREADY_BUMPED`, VERSION was already bumped on this branch (pri
---
-## Step 5.5: TODOS.md (auto-update)
+## Step 14: TODOS.md (auto-update)
Cross-reference the project's TODOS.md against the changes being shipped. Mark completed items automatically; prompt only if the file is missing or disorganized.
@@ -392,7 +392,7 @@ Read `.claude/skills/review/TODOS-format.md` for the canonical format reference.
- Message: "GStack recommends maintaining a TODOS.md organized by skill/component, then priority (P0 at top through P4, then Completed at bottom). See TODOS-format.md for the full format. Would you like to create one?"
- Options: A) Create it now, B) Skip for now
- If A: Create `TODOS.md` with a skeleton (# TODOS heading + ## Completed section). Continue to step 3.
-- If B: Skip the rest of Step 5.5. Continue to Step 6.
+- If B: Skip the rest of Step 14. Continue to Step 15.
**2. Check structure and organization:**
@@ -431,11 +431,11 @@ For each TODO item, check if the changes in this PR complete it by:
**6. Defensive:** If TODOS.md cannot be written (permission error, disk full), warn the user and continue. Never stop the ship workflow for a TODOS failure.
-Save this summary — it goes into the PR body in Step 8.
+Save this summary — it goes into the PR body in Step 19.
---
-## Step 6: Commit (bisectable chunks)
+## Step 15: Commit (bisectable chunks)
**Goal:** Create small, logical commits that work well with `git bisect` and help LLMs understand what changed.
@@ -473,13 +473,13 @@ EOF
---
-## Step 6.5: Verification Gate
+## Step 16: Verification Gate
**IRON LAW: NO COMPLETION CLAIMS WITHOUT FRESH VERIFICATION EVIDENCE.**
Before pushing, re-verify if code changed during Steps 4-6:
-1. **Test verification:** If ANY code changed after Step 3's test run (fixes from review findings, CHANGELOG edits don't count), re-run the test suite. Paste fresh output. Stale output from Step 3 is NOT acceptable.
+1. **Test verification:** If ANY code changed after Step 5's test run (fixes from review findings, CHANGELOG edits don't count), re-run the test suite. Paste fresh output. Stale output from Step 5 is NOT acceptable.
2. **Build verification:** If the project has a build step, run it. Paste output.
@@ -489,13 +489,13 @@ Before pushing, re-verify if code changed during Steps 4-6:
- "I already tested earlier" → Code changed since then. Test again.
- "It's a trivial change" → Trivial changes break production.
-**If tests fail here:** STOP. Do not push. Fix the issue and return to Step 3.
+**If tests fail here:** STOP. Do not push. Fix the issue and return to Step 5.
Claiming work is complete without verification is dishonesty, not efficiency.
---
-## Step 7: Push
+## Step 17: Push
**Idempotency check:** Check if the branch is already pushed and up to date.
@@ -507,7 +507,7 @@ echo "LOCAL: $LOCAL REMOTE: $REMOTE"
[ "$LOCAL" = "$REMOTE" ] && echo "ALREADY_PUSHED" || echo "PUSH_NEEDED"
```
-If `ALREADY_PUSHED`, skip the push but continue to Step 8. Otherwise push with upstream tracking:
+If `ALREADY_PUSHED`, skip the push but continue to Step 19. Otherwise push with upstream tracking:
```bash
git push -u origin
@@ -515,7 +515,7 @@ git push -u origin
---
-## Step 8: Create PR/MR
+## Step 19: Create PR/MR
**Idempotency check:** Check if a PR/MR already exists for this branch.
@@ -529,7 +529,7 @@ gh pr view --json url,number,state -q 'if .state == "OPEN" then "PR #\(.number):
glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS" else "NO_MR" end' 2>/dev/null || echo "NO_MR"
```
-If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary). Never reuse stale PR body content from a prior run. Print the existing URL and continue to Step 8.5.
+If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary). Never reuse stale PR body content from a prior run. Print the existing URL and continue to Step 18.
If no PR/MR exists: create a pull request (GitHub) or merge request (GitLab) using the platform detected in Step 0.
@@ -545,11 +545,11 @@ must appear in at least one section. If a commit's work isn't reflected in the s
you missed it.>
## Test Coverage
-
-
+
+
## Pre-Landing Review
-
+
## Design Review
@@ -561,19 +561,19 @@ you missed it.>
## Greptile Review
-
+
## Scope Drift
## Plan Completion
-
+
## Verification Results
-
+
@@ -611,11 +611,11 @@ EOF
**If neither CLI is available:**
Print the branch name, remote URL, and instruct the user to create the PR/MR manually via the web UI. Do not stop — the code is pushed and ready.
-**Output the PR/MR URL** — then proceed to Step 8.5.
+**Output the PR/MR URL** — then proceed to Step 18.
---
-## Step 8.5: Auto-invoke /document-release
+## Step 18: Auto-invoke /document-release
After the PR is created, automatically sync project documentation. Read the
`document-release/SKILL.md` skill file (adjacent to this skill's directory) and
@@ -634,11 +634,11 @@ execute its full workflow:
This step is automatic. Do not ask the user for confirmation. The goal is zero-friction
doc updates — the user runs `/ship` and documentation stays current without a separate command.
-If Step 8.5 created a docs commit, re-edit the PR/MR body to include the latest commit SHA in the summary. This ensures the PR body reflects the truly final state after document-release.
+If Step 18 created a docs commit, re-edit the PR/MR body to include the latest commit SHA in the summary. This ensures the PR body reflects the truly final state after document-release.
---
-## Step 8.75: Persist ship metrics
+## Step 20: Persist ship metrics
Log coverage and plan completion data so `/retro` can track trends:
@@ -653,10 +653,10 @@ echo '{"skill":"ship","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","coverage
```
Substitute from earlier steps:
-- **COVERAGE_PCT**: coverage percentage from Step 3.4 diagram (integer, or -1 if undetermined)
-- **PLAN_TOTAL**: total plan items extracted in Step 3.45 (0 if no plan file)
-- **PLAN_DONE**: count of DONE + CHANGED items from Step 3.45 (0 if no plan file)
-- **VERIFY_RESULT**: "pass", "fail", or "skipped" from Step 3.47
+- **COVERAGE_PCT**: coverage percentage from Step 7 diagram (integer, or -1 if undetermined)
+- **PLAN_TOTAL**: total plan items extracted in Step 8 (0 if no plan file)
+- **PLAN_DONE**: count of DONE + CHANGED items from Step 8 (0 if no plan file)
+- **VERIFY_RESULT**: "pass", "fail", or "skipped" from Step 8.1
- **VERSION**: from the VERSION file
- **BRANCH**: current branch name
@@ -675,6 +675,6 @@ This step is automatic — never skip it, never ask for confirmation.
- **Split commits for bisectability** — each commit = one logical change.
- **TODOS.md completion detection must be conservative.** Only mark items as completed when the diff clearly shows the work is done.
- **Use Greptile reply templates from greptile-triage.md.** Every reply includes evidence (inline diff, code references, re-rank suggestion). Never post vague replies.
-- **Never push without fresh verification evidence.** If code changed after Step 3 tests, re-run before pushing.
-- **Step 3.4 generates coverage tests.** They must pass before committing. Never commit failing tests.
+- **Never push without fresh verification evidence.** If code changed after Step 5 tests, re-run before pushing.
+- **Step 7 generates coverage tests.** They must pass before committing. Never commit failing tests.
- **The goal is: user says `/ship`, next thing they see is the review + PR URL + auto-synced docs.**
diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts
index a555104d..2e0814ae 100644
--- a/test/gen-skill-docs.test.ts
+++ b/test/gen-skill-docs.test.ts
@@ -752,13 +752,13 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
test('ship SKILL.md contains review army specialist dispatch', () => {
expect(shipSkill).toContain('Specialist Dispatch');
- expect(shipSkill).toContain('Step 3.55');
- expect(shipSkill).toContain('Step 3.56');
+ expect(shipSkill).toContain('Step 9.1');
+ expect(shipSkill).toContain('Step 9.2');
});
test('ship SKILL.md contains cross-review finding dedup', () => {
expect(shipSkill).toContain('Cross-review finding dedup');
- expect(shipSkill).toContain('Step 3.57');
+ expect(shipSkill).toContain('Step 9.3');
});
test('ship SKILL.md contains re-run idempotency behavior', () => {
@@ -839,7 +839,7 @@ describe('PLAN_COMPLETION_AUDIT placeholders', () => {
test('ship SKILL.md contains plan completion audit step', () => {
expect(shipSkill).toContain('Plan Completion Audit');
- expect(shipSkill).toContain('Step 3.45');
+ expect(shipSkill).toContain('Step 8');
});
test('review SKILL.md contains plan completion in scope drift', () => {
@@ -888,7 +888,7 @@ describe('PLAN_VERIFICATION_EXEC placeholder', () => {
const shipSkill = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
test('ship SKILL.md contains plan verification step', () => {
- expect(shipSkill).toContain('Step 3.47');
+ expect(shipSkill).toContain('Step 8.1');
expect(shipSkill).toContain('Plan Verification');
});
@@ -946,7 +946,7 @@ describe('Ship metrics logging', () => {
const shipSkill = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
test('ship SKILL.md contains metrics persistence step', () => {
- expect(shipSkill).toContain('Step 8.75');
+ expect(shipSkill).toContain('Step 20');
expect(shipSkill).toContain('coverage_pct');
expect(shipSkill).toContain('plan_items_total');
expect(shipSkill).toContain('plan_items_done');
diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts
index 1da5db6d..96f0405c 100644
--- a/test/skill-validation.test.ts
+++ b/test/skill-validation.test.ts
@@ -1004,7 +1004,7 @@ describe('Test Bootstrap ({{TEST_BOOTSTRAP}}) integration', () => {
test('TEST_BOOTSTRAP appears in ship/SKILL.md', () => {
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
expect(content).toContain('Test Framework Bootstrap');
- expect(content).toContain('Step 2.5');
+ expect(content).toContain('Step 4');
});
test('TEST_BOOTSTRAP appears in design-review/SKILL.md', () => {
@@ -1099,9 +1099,9 @@ describe('Phase 8e.5 regression test generation', () => {
// --- Step 3.4 coverage audit validation ---
describe('Step 3.4 test coverage audit', () => {
- test('ship/SKILL.md contains Step 3.4', () => {
+ test('ship/SKILL.md contains Step 7', () => {
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
- expect(content).toContain('Step 3.4: Test Coverage Audit');
+ expect(content).toContain('Step 7: Test Coverage Audit');
expect(content).toContain('CODE PATH COVERAGE');
});
@@ -1126,7 +1126,7 @@ describe('Step 3.4 test coverage audit', () => {
test('ship rules include test generation rule', () => {
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
- expect(content).toContain('Step 3.4 generates coverage tests');
+ expect(content).toContain('Step 7 generates coverage tests');
expect(content).toContain('Never commit failing tests');
});