refactor: renumber /ship steps to clean integers (1-20)

Replaces fractional step numbers (1.5, 2.5, 3.25, 3.4, 3.45, 3.47, 3.48,
3.5, 3.55, 3.56, 3.57, 3.75, 3.8, 5.5, 6.5, 8.5, 8.75) with clean
integers 1 through 20, plus allowed resolver sub-steps 8.1, 8.2,
9.1, 9.2, 9.3. Fractional numbering signaled "optional appendix" and
contributed to /ship's habit of skipping late-stage steps.

Affects:
- ship/SKILL.md.tmpl (all headings + ~30 cross-references)
- scripts/resolvers/review.ts (ship-side 3.47/3.48/3.57/3.8 conditionals)
- scripts/resolvers/review-army.ts (ship-side 3.55/3.56 conditionals)
- scripts/resolvers/testing.ts (ship-side 2.5/3.4 references, 5 sites)
- scripts/resolvers/utility.ts (CHANGELOG heading gets Step 13 prefix)
- test/gen-skill-docs.test.ts (5 step-number assertions updated)
- test/skill-validation.test.ts (3 step-number assertions updated)

/review step numbering (1.5, 2.5, 4.5, 5.5-5.8) intentionally unchanged —
only the ship-side of each isShip conditional was updated.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-17 06:02:20 +08:00
parent dbd7aee5b6
commit 566d42c259
10 changed files with 174 additions and 174 deletions
+1 -1
View File
@@ -669,7 +669,7 @@ ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null
**If test framework detected** (config files or test directories found): **If test framework detected** (config files or test directories found):
Print "Test framework detected: {name} ({N} existing tests). Skipping bootstrap." Print "Test framework detected: {name} ({N} existing tests). Skipping bootstrap."
Read 2-3 existing test files to learn conventions (naming, imports, assertion style, setup patterns). Read 2-3 existing test files to learn conventions (naming, imports, assertion style, setup patterns).
Store conventions as prose context for use in Phase 8e.5 or Step 3.4. **Skip the rest of bootstrap.** Store conventions as prose context for use in Phase 8e.5 or Step 7. **Skip the rest of bootstrap.**
**If BOOTSTRAP_DECLINED** appears: Print "Test bootstrap previously declined — skipping." **Skip the rest of bootstrap.** **If BOOTSTRAP_DECLINED** appears: Print "Test bootstrap previously declined — skipping." **Skip the rest of bootstrap.**
+1 -1
View File
@@ -711,7 +711,7 @@ ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null
**If test framework detected** (config files or test directories found): **If test framework detected** (config files or test directories found):
Print "Test framework detected: {name} ({N} existing tests). Skipping bootstrap." Print "Test framework detected: {name} ({N} existing tests). Skipping bootstrap."
Read 2-3 existing test files to learn conventions (naming, imports, assertion style, setup patterns). Read 2-3 existing test files to learn conventions (naming, imports, assertion style, setup patterns).
Store conventions as prose context for use in Phase 8e.5 or Step 3.4. **Skip the rest of bootstrap.** Store conventions as prose context for use in Phase 8e.5 or Step 7. **Skip the rest of bootstrap.**
**If BOOTSTRAP_DECLINED** appears: Print "Test bootstrap previously declined — skipping." **Skip the rest of bootstrap.** **If BOOTSTRAP_DECLINED** appears: Print "Test bootstrap previously declined — skipping." **Skip the rest of bootstrap.**
+6 -6
View File
@@ -13,8 +13,8 @@ import type { TemplateContext } from './types';
function generateSpecialistSelection(ctx: TemplateContext): string { function generateSpecialistSelection(ctx: TemplateContext): string {
const isShip = ctx.skillName === 'ship'; const isShip = ctx.skillName === 'ship';
const stepSel = isShip ? '3.55' : '4.5'; const stepSel = isShip ? '9.1' : '4.5';
const stepMerge = isShip ? '3.56' : '4.6'; const stepMerge = isShip ? '9.2' : '4.6';
const nextStep = isShip ? 'the Fix-First flow (item 4)' : 'Step 5'; const nextStep = isShip ? 'the Fix-First flow (item 4)' : 'Step 5';
return `## Step ${stepSel}: Review Army — Specialist Dispatch return `## Step ${stepSel}: Review Army — Specialist Dispatch
@@ -134,10 +134,10 @@ CHECKLIST:
function generateFindingsMerge(ctx: TemplateContext): string { function generateFindingsMerge(ctx: TemplateContext): string {
const isShip = ctx.skillName === 'ship'; const isShip = ctx.skillName === 'ship';
const stepMerge = isShip ? '3.56' : '4.6'; const stepMerge = isShip ? '9.2' : '4.6';
const stepSel = isShip ? '3.55' : '4.5'; const stepSel = isShip ? '9.1' : '4.5';
const fixFirstRef = isShip ? 'the Fix-First flow (item 4)' : 'Step 5 Fix-First'; const fixFirstRef = isShip ? 'the Fix-First flow (item 4)' : 'Step 5 Fix-First';
const critPassRef = isShip ? 'the checklist pass (Step 3.5)' : 'the CRITICAL pass findings from Step 4'; const critPassRef = isShip ? 'the checklist pass (Step 9)' : 'the CRITICAL pass findings from Step 4';
const persistRef = isShip ? 'the review-log persist' : 'the review-log entry in Step 5.8'; const persistRef = isShip ? 'the review-log persist' : 'the review-log entry in Step 5.8';
return `### Step ${stepMerge}: Collect and merge findings return `### Step ${stepMerge}: Collect and merge findings
@@ -202,7 +202,7 @@ Remember these stats — you will need them for the review-log entry in Step 5.8
function generateRedTeam(ctx: TemplateContext): string { function generateRedTeam(ctx: TemplateContext): string {
const isShip = ctx.skillName === 'ship'; const isShip = ctx.skillName === 'ship';
const stepMerge = isShip ? '3.56' : '4.6'; const stepMerge = isShip ? '9.2' : '4.6';
const fixFirstRef = isShip ? 'the Fix-First flow (item 4)' : 'Step 5 Fix-First'; const fixFirstRef = isShip ? 'the Fix-First flow (item 4)' : 'Step 5 Fix-First';
return `### Red Team dispatch (conditional) return `### Red Team dispatch (conditional)
+9 -9
View File
@@ -368,7 +368,7 @@ If A: revise the premise and note the revision. If B: proceed (and note that the
export function generateScopeDrift(ctx: TemplateContext): string { export function generateScopeDrift(ctx: TemplateContext): string {
const isShip = ctx.skillName === 'ship'; const isShip = ctx.skillName === 'ship';
const stepNum = isShip ? '3.48' : '1.5'; const stepNum = isShip ? '8.2' : '1.5';
return `## Step ${stepNum}: Scope Drift Detection return `## Step ${stepNum}: Scope Drift Detection
@@ -413,7 +413,7 @@ export function generateAdversarialStep(ctx: TemplateContext): string {
if (ctx.host === 'codex') return ''; if (ctx.host === 'codex') return '';
const isShip = ctx.skillName === 'ship'; const isShip = ctx.skillName === 'ship';
const stepNum = isShip ? '3.8' : '5.7'; const stepNum = isShip ? '11' : '5.7';
return `## Step ${stepNum}: Adversarial review (always-on) return `## Step ${stepNum}: Adversarial review (always-on)
@@ -501,7 +501,7 @@ A) Investigate and fix now (recommended)
B) Continue review will still complete B) Continue review will still complete
\`\`\` \`\`\`
If A: address the findings${isShip ? '. After fixing, re-run tests (Step 3) since code has changed' : ''}. Re-run \`codex review\` to verify. If A: address the findings${isShip ? '. After fixing, re-run tests (Step 5) since code has changed' : ''}. Re-run \`codex review\` to verify.
Read stderr for errors (same error handling as Codex adversarial above). Read stderr for errors (same error handling as Codex adversarial above).
@@ -917,16 +917,16 @@ export function generatePlanCompletionAuditReview(_ctx: TemplateContext): string
// ─── Plan Verification Execution ────────────────────────────────────── // ─── Plan Verification Execution ──────────────────────────────────────
export function generatePlanVerificationExec(_ctx: TemplateContext): string { export function generatePlanVerificationExec(_ctx: TemplateContext): string {
return `## Step 3.47: Plan Verification return `## Step 8.1: Plan Verification
Automatically verify the plan's testing/verification steps using the \`/qa-only\` skill. Automatically verify the plan's testing/verification steps using the \`/qa-only\` skill.
### 1. Check for verification section ### 1. Check for verification section
Using the plan file already discovered in Step 3.45, look for a verification section. Match any of these headings: \`## Verification\`, \`## Test plan\`, \`## Testing\`, \`## How to test\`, \`## Manual testing\`, or any section with verification-flavored items (URLs to visit, things to check visually, interactions to test). Using the plan file already discovered in Step 8, look for a verification section. Match any of these headings: \`## Verification\`, \`## Test plan\`, \`## Testing\`, \`## How to test\`, \`## Manual testing\`, or any section with verification-flavored items (URLs to visit, things to check visually, interactions to test).
**If no verification section found:** Skip with "No verification steps found in plan — skipping auto-verification." **If no verification section found:** Skip with "No verification steps found in plan — skipping auto-verification."
**If no plan file was found in Step 3.45:** Skip (already handled). **If no plan file was found in Step 8:** Skip (already handled).
### 2. Check for running dev server ### 2. Check for running dev server
@@ -971,7 +971,7 @@ Follow the /qa-only workflow with these modifications:
### 5. Include in PR body ### 5. Include in PR body
Add a \`## Verification Results\` section to the PR body (Step 8): Add a \`## Verification Results\` section to the PR body (Step 19):
- If verification ran: summary of results (N PASS, M FAIL, K SKIPPED) - If verification ran: summary of results (N PASS, M FAIL, K SKIPPED)
- If skipped: reason for skipping (no plan, no server, no verification section)`; - If skipped: reason for skipping (no plan, no server, no verification section)`;
} }
@@ -980,9 +980,9 @@ Add a \`## Verification Results\` section to the PR body (Step 8):
export function generateCrossReviewDedup(ctx: TemplateContext): string { export function generateCrossReviewDedup(ctx: TemplateContext): string {
const isShip = ctx.skillName === 'ship'; const isShip = ctx.skillName === 'ship';
const stepNum = isShip ? '3.57' : '5.0'; const stepNum = isShip ? '9.3' : '5.0';
const findingsRef = isShip const findingsRef = isShip
? 'the checklist pass (Step 3.5) and specialist review (Step 3.55-3.56)' ? 'the checklist pass (Step 9) and specialist review (Step 9.1-9.2)'
: 'Step 4 critical pass and Step 4.5-4.6 specialists'; : 'Step 4 critical pass and Step 4.5-4.6 specialists';
return `### Step ${stepNum}: Cross-review finding dedup return `### Step ${stepNum}: Cross-review finding dedup
+5 -5
View File
@@ -28,7 +28,7 @@ ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null
**If test framework detected** (config files or test directories found): **If test framework detected** (config files or test directories found):
Print "Test framework detected: {name} ({N} existing tests). Skipping bootstrap." Print "Test framework detected: {name} ({N} existing tests). Skipping bootstrap."
Read 2-3 existing test files to learn conventions (naming, imports, assertion style, setup patterns). Read 2-3 existing test files to learn conventions (naming, imports, assertion style, setup patterns).
Store conventions as prose context for use in Phase 8e.5 or Step 3.4. **Skip the rest of bootstrap.** Store conventions as prose context for use in Phase 8e.5 or Step 7. **Skip the rest of bootstrap.**
**If BOOTSTRAP_DECLINED** appears: Print "Test bootstrap previously declined — skipping." **Skip the rest of bootstrap.** **If BOOTSTRAP_DECLINED** appears: Print "Test bootstrap previously declined — skipping." **Skip the rest of bootstrap.**
@@ -213,7 +213,7 @@ ls jest.config.* vitest.config.* playwright.config.* cypress.config.* .rspec pyt
ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null
\`\`\` \`\`\`
3. **If no framework detected:**${mode === 'ship' ? ' falls through to the Test Framework Bootstrap step (Step 2.5) which handles full setup.' : ' still produce the coverage diagram, but skip test generation.'}`); 3. **If no framework detected:**${mode === 'ship' ? ' falls through to the Test Framework Bootstrap step (Step 4) which handles full setup.' : ' still produce the coverage diagram, but skip test generation.'}`);
// ── Before/after count (ship only) ── // ── Before/after count (ship only) ──
if (mode === 'ship') { if (mode === 'ship') {
@@ -379,7 +379,7 @@ GAPS: 8 paths need tests (2 need E2E, 1 needs eval)
\`\`\` \`\`\`
**Fast path:** All paths covered "${mode === 'ship' ? 'Step 3.4' : mode === 'review' ? 'Step 4.75' : 'Test review'}: All new code paths have test coverage ✓" Continue.`); **Fast path:** All paths covered "${mode === 'ship' ? 'Step 7' : mode === 'review' ? 'Step 4.75' : 'Test review'}: All new code paths have test coverage ✓" Continue.`);
// ── Mode-specific action section ── // ── Mode-specific action section ──
if (mode === 'plan') { if (mode === 'plan') {
@@ -432,7 +432,7 @@ This file is consumed by \`/qa\` and \`/qa-only\` as primary test input. Include
sections.push(` sections.push(`
**5. Generate tests for uncovered paths:** **5. Generate tests for uncovered paths:**
If test framework detected (or bootstrapped in Step 2.5): If test framework detected (or bootstrapped in Step 4):
- Prioritize error handlers and edge cases first (happy paths are more likely already tested) - Prioritize error handlers and edge cases first (happy paths are more likely already tested)
- Read 2-3 existing test files to match conventions exactly - Read 2-3 existing test files to match conventions exactly
- Generate unit tests. Mock all external dependencies (DB, API, Redis). - Generate unit tests. Mock all external dependencies (DB, API, Redis).
@@ -446,7 +446,7 @@ Caps: 30 code paths max, 20 tests generated max (code + user flow combined), 2-m
If no test framework AND user declined bootstrap diagram only, no generation. Note: "Test generation skipped — no test framework configured." If no test framework AND user declined bootstrap diagram only, no generation. Note: "Test generation skipped — no test framework configured."
**Diff is test-only changes:** Skip Step 3.4 entirely: "No new application code paths to audit." **Diff is test-only changes:** Skip Step 7 entirely: "No new application code paths to audit."
**6. After-count and coverage summary:** **6. After-count and coverage summary:**
+1 -1
View File
@@ -373,7 +373,7 @@ export function generateCoAuthorTrailer(ctx: TemplateContext): string {
} }
export function generateChangelogWorkflow(_ctx: TemplateContext): string { export function generateChangelogWorkflow(_ctx: TemplateContext): string {
return `## CHANGELOG (auto-generate) return `## Step 13: CHANGELOG (auto-generate)
1. Read \`CHANGELOG.md\` header to know the format. 1. Read \`CHANGELOG.md\` header to know the format.
+80 -80
View File
@@ -602,17 +602,17 @@ You are running the `/ship` workflow. This is a **non-interactive, fully automat
- Merge conflicts that can't be auto-resolved (stop, show conflicts) - Merge conflicts that can't be auto-resolved (stop, show conflicts)
- In-branch test failures (pre-existing failures are triaged, not auto-blocking) - In-branch test failures (pre-existing failures are triaged, not auto-blocking)
- Pre-landing review finds ASK items that need user judgment - Pre-landing review finds ASK items that need user judgment
- MINOR or MAJOR version bump needed (ask — see Step 4) - MINOR or MAJOR version bump needed (ask — see Step 12)
- Greptile review comments that need user decision (complex fixes, false positives) - Greptile review comments that need user decision (complex fixes, false positives)
- AI-assessed coverage below minimum threshold (hard gate with user override — see Step 3.4) - AI-assessed coverage below minimum threshold (hard gate with user override — see Step 7)
- Plan items NOT DONE with no user override (see Step 3.45) - Plan items NOT DONE with no user override (see Step 8)
- Plan verification failures (see Step 3.47) - Plan verification failures (see Step 8.1)
- TODOS.md missing and user wants to create one (ask — see Step 5.5) - TODOS.md missing and user wants to create one (ask — see Step 14)
- TODOS.md disorganized and user wants to reorganize (ask — see Step 5.5) - TODOS.md disorganized and user wants to reorganize (ask — see Step 14)
**Never stop for:** **Never stop for:**
- Uncommitted changes (always include them) - Uncommitted changes (always include them)
- Version bump choice (auto-pick MICRO or PATCH — see Step 4) - Version bump choice (auto-pick MICRO or PATCH — see Step 12)
- CHANGELOG content (auto-generate from diff) - CHANGELOG content (auto-generate from diff)
- Commit message approval (auto-commit) - Commit message approval (auto-commit)
- Multi-file changesets (auto-split into bisectable commits) - Multi-file changesets (auto-split into bisectable commits)
@@ -625,9 +625,9 @@ Re-running `/ship` means "run the whole checklist again." Every verification ste
(tests, coverage audit, plan completion, pre-landing review, adversarial review, (tests, coverage audit, plan completion, pre-landing review, adversarial review,
VERSION/CHANGELOG check, TODOS, document-release) runs on every invocation. VERSION/CHANGELOG check, TODOS, document-release) runs on every invocation.
Only *actions* are idempotent: Only *actions* are idempotent:
- Step 4: If VERSION already bumped, skip the bump but still read the version - Step 12: If VERSION already bumped, skip the bump but still read the version
- Step 7: If already pushed, skip the push command - Step 17: If already pushed, skip the push command
- Step 8: If PR exists, update the body instead of creating a new PR - Step 19: If PR exists, update the body instead of creating a new PR
Never skip a verification step because a prior `/ship` run already performed it. Never skip a verification step because a prior `/ship` run already performed it.
--- ---
@@ -695,19 +695,19 @@ Display:
If the Eng Review is NOT "CLEAR": If the Eng Review is NOT "CLEAR":
Print: "No prior eng review found — ship will run its own pre-landing review in Step 3.5." Print: "No prior eng review found — ship will run its own pre-landing review in Step 9."
Check diff size: `git diff <base>...HEAD --stat | tail -1`. If the diff is >200 lines, add: "Note: This is a large diff. Consider running `/plan-eng-review` or `/autoplan` for architecture-level review before shipping." Check diff size: `git diff <base>...HEAD --stat | tail -1`. If the diff is >200 lines, add: "Note: This is a large diff. Consider running `/plan-eng-review` or `/autoplan` for architecture-level review before shipping."
If CEO Review is missing, mention as informational ("CEO Review not run — recommended for product changes") but do NOT block. If CEO Review is missing, mention as informational ("CEO Review not run — recommended for product changes") but do NOT block.
For Design Review: run `source <(~/.claude/skills/gstack/bin/gstack-diff-scope <base> 2>/dev/null)`. If `SCOPE_FRONTEND=true` and no design review (plan-design-review or design-review-lite) exists in the dashboard, mention: "Design Review not run — this PR changes frontend code. The lite design check will run automatically in Step 3.5, but consider running /design-review for a full visual audit post-implementation." Still never block. For Design Review: run `source <(~/.claude/skills/gstack/bin/gstack-diff-scope <base> 2>/dev/null)`. If `SCOPE_FRONTEND=true` and no design review (plan-design-review or design-review-lite) exists in the dashboard, mention: "Design Review not run — this PR changes frontend code. The lite design check will run automatically in Step 9, but consider running /design-review for a full visual audit post-implementation." Still never block.
Continue to Step 1.5 — do NOT block or ask. Ship runs its own review in Step 3.5. Continue to Step 2 — do NOT block or ask. Ship runs its own review in Step 9.
--- ---
## Step 1.5: Distribution Pipeline Check ## Step 2: Distribution Pipeline Check
If the diff introduces a new standalone artifact (CLI binary, library package, tool) — not a web If the diff introduces a new standalone artifact (CLI binary, library package, tool) — not a web
service with existing deployment — verify that a distribution pipeline exists. service with existing deployment — verify that a distribution pipeline exists.
@@ -735,7 +735,7 @@ service with existing deployment — verify that a distribution pipeline exists.
--- ---
## Step 2: Merge the base branch (BEFORE tests) ## Step 3: Merge the base branch (BEFORE tests)
Fetch and merge the base branch into the feature branch so tests run against the merged state: Fetch and merge the base branch into the feature branch so tests run against the merged state:
@@ -749,7 +749,7 @@ git fetch origin <base> && git merge origin/<base> --no-edit
--- ---
## Step 2.5: Test Framework Bootstrap ## Step 4: Test Framework Bootstrap
## Test Framework Bootstrap ## Test Framework Bootstrap
@@ -778,7 +778,7 @@ ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null
**If test framework detected** (config files or test directories found): **If test framework detected** (config files or test directories found):
Print "Test framework detected: {name} ({N} existing tests). Skipping bootstrap." Print "Test framework detected: {name} ({N} existing tests). Skipping bootstrap."
Read 2-3 existing test files to learn conventions (naming, imports, assertion style, setup patterns). Read 2-3 existing test files to learn conventions (naming, imports, assertion style, setup patterns).
Store conventions as prose context for use in Phase 8e.5 or Step 3.4. **Skip the rest of bootstrap.** Store conventions as prose context for use in Phase 8e.5 or Step 7. **Skip the rest of bootstrap.**
**If BOOTSTRAP_DECLINED** appears: Print "Test bootstrap previously declined — skipping." **Skip the rest of bootstrap.** **If BOOTSTRAP_DECLINED** appears: Print "Test bootstrap previously declined — skipping." **Skip the rest of bootstrap.**
@@ -907,7 +907,7 @@ Only commit if there are changes. Stage all bootstrap files (config, test direct
--- ---
## Step 3: Run tests (on merged code) ## Step 5: Run tests (on merged code)
**Do NOT run `RAILS_ENV=test bin/rails db:migrate`** — `bin/test-lane` already calls **Do NOT run `RAILS_ENV=test bin/rails db:migrate`** — `bin/test-lane` already calls
`db:test:prepare` internally, which loads the schema into the correct lane database. `db:test:prepare` internally, which loads the schema into the correct lane database.
@@ -1029,13 +1029,13 @@ Use AskUserQuestion:
- Continue with the workflow. - Continue with the workflow.
- Note in output: "Pre-existing test failure skipped: <test-name>" - Note in output: "Pre-existing test failure skipped: <test-name>"
**After triage:** If any in-branch failures remain unfixed, **STOP**. Do not proceed. If all failures were pre-existing and handled (fixed, TODOed, assigned, or skipped), continue to Step 3.25. **After triage:** If any in-branch failures remain unfixed, **STOP**. Do not proceed. If all failures were pre-existing and handled (fixed, TODOed, assigned, or skipped), continue to Step 6.
**If all pass:** Continue silently — just note the counts briefly. **If all pass:** Continue silently — just note the counts briefly.
--- ---
## Step 3.25: Eval Suites (conditional) ## Step 6: Eval Suites (conditional)
Evals are mandatory when prompt-related files change. Skip this step entirely if no prompt files are in the diff. Evals are mandatory when prompt-related files change. Skip this step entirely if no prompt files are in the diff.
@@ -1054,7 +1054,7 @@ Match against these patterns (from CLAUDE.md):
- `config/system_prompts/*.txt` - `config/system_prompts/*.txt`
- `test/evals/**/*` (eval infrastructure changes affect all suites) - `test/evals/**/*` (eval infrastructure changes affect all suites)
**If no matches:** Print "No prompt-related files changed — skipping evals." and continue to Step 3.5. **If no matches:** Print "No prompt-related files changed — skipping evals." and continue to Step 9.
**2. Identify affected eval suites:** **2. Identify affected eval suites:**
@@ -1084,9 +1084,9 @@ If multiple suites need to run, run them sequentially (each needs a test lane).
**4. Check results:** **4. Check results:**
- **If any eval fails:** Show the failures, the cost dashboard, and **STOP**. Do not proceed. - **If any eval fails:** Show the failures, the cost dashboard, and **STOP**. Do not proceed.
- **If all pass:** Note pass counts and cost. Continue to Step 3.5. - **If all pass:** Note pass counts and cost. Continue to Step 9.
**5. Save eval output** — include eval results and cost dashboard in the PR body (Step 8). **5. Save eval output** — include eval results and cost dashboard in the PR body (Step 19).
**Tier reference (for context — /ship always uses `full`):** **Tier reference (for context — /ship always uses `full`):**
| Tier | When | Speed (cached) | Cost | | Tier | When | Speed (cached) | Cost |
@@ -1097,7 +1097,7 @@ If multiple suites need to run, run them sequentially (each needs a test lane).
--- ---
## Step 3.4: Test Coverage Audit ## Step 7: Test Coverage Audit
100% coverage is the goal — every untested path is a path where bugs hide and vibe coding becomes yolo coding. Evaluate what was ACTUALLY coded (from the diff), not what was planned. 100% coverage is the goal — every untested path is a path where bugs hide and vibe coding becomes yolo coding. Evaluate what was ACTUALLY coded (from the diff), not what was planned.
@@ -1121,7 +1121,7 @@ ls jest.config.* vitest.config.* playwright.config.* cypress.config.* .rspec pyt
ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null ls -d test/ tests/ spec/ __tests__/ cypress/ e2e/ 2>/dev/null
``` ```
3. **If no framework detected:** falls through to the Test Framework Bootstrap step (Step 2.5) which handles full setup. 3. **If no framework detected:** falls through to the Test Framework Bootstrap step (Step 4) which handles full setup.
**0. Before/after test count:** **0. Before/after test count:**
@@ -1263,11 +1263,11 @@ GAPS: 8 paths need tests (2 need E2E, 1 needs eval)
───────────────────────────────── ─────────────────────────────────
``` ```
**Fast path:** All paths covered → "Step 3.4: All new code paths have test coverage ✓" Continue. **Fast path:** All paths covered → "Step 7: All new code paths have test coverage ✓" Continue.
**5. Generate tests for uncovered paths:** **5. Generate tests for uncovered paths:**
If test framework detected (or bootstrapped in Step 2.5): If test framework detected (or bootstrapped in Step 4):
- Prioritize error handlers and edge cases first (happy paths are more likely already tested) - Prioritize error handlers and edge cases first (happy paths are more likely already tested)
- Read 2-3 existing test files to match conventions exactly - Read 2-3 existing test files to match conventions exactly
- Generate unit tests. Mock all external dependencies (DB, API, Redis). - Generate unit tests. Mock all external dependencies (DB, API, Redis).
@@ -1281,7 +1281,7 @@ Caps: 30 code paths max, 20 tests generated max (code + user flow combined), 2-m
If no test framework AND user declined bootstrap → diagram only, no generation. Note: "Test generation skipped — no test framework configured." If no test framework AND user declined bootstrap → diagram only, no generation. Note: "Test generation skipped — no test framework configured."
**Diff is test-only changes:** Skip Step 3.4 entirely: "No new application code paths to audit." **Diff is test-only changes:** Skip Step 7 entirely: "No new application code paths to audit."
**6. After-count and coverage summary:** **6. After-count and coverage summary:**
@@ -1359,7 +1359,7 @@ Repo: {owner/repo}
--- ---
## Step 3.45: Plan Completion Audit ## Step 8: Plan Completion Audit
### Plan File Discovery ### Plan File Discovery
@@ -1480,16 +1480,16 @@ After producing the completion checklist:
--- ---
## Step 3.47: Plan Verification ## Step 8.1: Plan Verification
Automatically verify the plan's testing/verification steps using the `/qa-only` skill. Automatically verify the plan's testing/verification steps using the `/qa-only` skill.
### 1. Check for verification section ### 1. Check for verification section
Using the plan file already discovered in Step 3.45, look for a verification section. Match any of these headings: `## Verification`, `## Test plan`, `## Testing`, `## How to test`, `## Manual testing`, or any section with verification-flavored items (URLs to visit, things to check visually, interactions to test). Using the plan file already discovered in Step 8, look for a verification section. Match any of these headings: `## Verification`, `## Test plan`, `## Testing`, `## How to test`, `## Manual testing`, or any section with verification-flavored items (URLs to visit, things to check visually, interactions to test).
**If no verification section found:** Skip with "No verification steps found in plan — skipping auto-verification." **If no verification section found:** Skip with "No verification steps found in plan — skipping auto-verification."
**If no plan file was found in Step 3.45:** Skip (already handled). **If no plan file was found in Step 8:** Skip (already handled).
### 2. Check for running dev server ### 2. Check for running dev server
@@ -1534,7 +1534,7 @@ Follow the /qa-only workflow with these modifications:
### 5. Include in PR body ### 5. Include in PR body
Add a `## Verification Results` section to the PR body (Step 8): Add a `## Verification Results` section to the PR body (Step 19):
- If verification ran: summary of results (N PASS, M FAIL, K SKIPPED) - If verification ran: summary of results (N PASS, M FAIL, K SKIPPED)
- If skipped: reason for skipping (no plan, no server, no verification section) - If skipped: reason for skipping (no plan, no server, no verification section)
@@ -1576,7 +1576,7 @@ matches a past learning, display:
This makes the compounding visible. The user should see that gstack is getting This makes the compounding visible. The user should see that gstack is getting
smarter on their codebase over time. smarter on their codebase over time.
## Step 3.48: Scope Drift Detection ## Step 8.2: Scope Drift Detection
Before reviewing code quality, check: **did they build what was requested — nothing more, nothing less?** Before reviewing code quality, check: **did they build what was requested — nothing more, nothing less?**
@@ -1613,7 +1613,7 @@ Before reviewing code quality, check: **did they build what was requested — no
--- ---
## Step 3.5: Pre-Landing Review ## Step 9: Pre-Landing Review
Review the diff for structural issues that tests don't catch. Review the diff for structural issues that tests don't catch.
@@ -1708,7 +1708,7 @@ Present Codex output under a `CODEX (design):` header, merged with the checklist
Include any design findings alongside the code review findings. They follow the same Fix-First flow below. Include any design findings alongside the code review findings. They follow the same Fix-First flow below.
## Step 3.55: Review Army — Specialist Dispatch ## Step 9.1: Review Army — Specialist Dispatch
### Detect stack and scope ### Detect stack and scope
@@ -1825,7 +1825,7 @@ CHECKLIST:
--- ---
### Step 3.56: Collect and merge findings ### Step 9.2: Collect and merge findings
After all specialist subagents complete, collect their outputs. After all specialist subagents complete, collect their outputs.
@@ -1871,7 +1871,7 @@ SPECIALIST REVIEW: N findings (X critical, Y informational) from Z specialists
PR Quality Score: X/10 PR Quality Score: X/10
``` ```
These findings flow into the Fix-First flow (item 4) alongside the checklist pass (Step 3.5). These findings flow into the Fix-First flow (item 4) alongside the checklist pass (Step 9).
The Fix-First heuristic applies identically — specialist findings follow the same AUTO-FIX vs ASK classification. The Fix-First heuristic applies identically — specialist findings follow the same AUTO-FIX vs ASK classification.
**Compile per-specialist stats:** **Compile per-specialist stats:**
@@ -1895,7 +1895,7 @@ If activated, dispatch one more subagent via the Agent tool (foreground, not bac
The Red Team subagent receives: The Red Team subagent receives:
1. The red-team checklist from `~/.claude/skills/gstack/review/specialists/red-team.md` 1. The red-team checklist from `~/.claude/skills/gstack/review/specialists/red-team.md`
2. The merged specialist findings from Step 3.56 (so it knows what was already caught) 2. The merged specialist findings from Step 9.2 (so it knows what was already caught)
3. The git diff command 3. The git diff command
Prompt: "You are a red team reviewer. The code has already been reviewed by N specialists Prompt: "You are a red team reviewer. The code has already been reviewed by N specialists
@@ -1911,7 +1911,7 @@ the Fix-First flow (item 4). Red Team findings are tagged with `"specialist":"re
If the Red Team returns NO FINDINGS, note: "Red Team review: no additional issues found." If the Red Team returns NO FINDINGS, note: "Red Team review: no additional issues found."
If the Red Team subagent fails or times out, skip silently and continue. If the Red Team subagent fails or times out, skip silently and continue.
### Step 3.57: Cross-review finding dedup ### Step 9.3: Cross-review finding dedup
Before classifying findings, check if any were previously skipped by the user in a prior review on this branch. Before classifying findings, check if any were previously skipped by the user in a prior review on this branch.
@@ -1931,7 +1931,7 @@ If skipped fingerprints exist, get the list of files changed since that review:
git diff --name-only <prior-review-commit> HEAD git diff --name-only <prior-review-commit> HEAD
``` ```
For each current finding (from both the checklist pass (Step 3.5) and specialist review (Step 3.55-3.56)), check: For each current finding (from both the checklist pass (Step 9) and specialist review (Step 9.1-9.2)), check:
- Does its fingerprint match a previously skipped finding? - Does its fingerprint match a previously skipped finding?
- Is the finding's file path NOT in the changed-files set? - Is the finding's file path NOT in the changed-files set?
@@ -1945,7 +1945,7 @@ If no prior reviews exist or none have a `findings` array, skip this step silent
Output a summary header: `Pre-Landing Review: N issues (X critical, Y informational)` Output a summary header: `Pre-Landing Review: N issues (X critical, Y informational)`
4. **Classify each finding from both the checklist pass and specialist review (Step 3.55-3.56) as AUTO-FIX or ASK** per the Fix-First Heuristic in 4. **Classify each finding from both the checklist pass and specialist review (Step 9.1-Step 9.2) as AUTO-FIX or ASK** per the Fix-First Heuristic in
checklist.md. Critical findings lean toward ASK; informational lean toward AUTO-FIX. checklist.md. Critical findings lean toward ASK; informational lean toward AUTO-FIX.
5. **Auto-fix all AUTO-FIX items.** Apply each fix. Output one line per fix: 5. **Auto-fix all AUTO-FIX items.** Apply each fix. Output one line per fix:
@@ -1959,7 +1959,7 @@ Output a summary header: `Pre-Landing Review: N issues (X critical, Y informatio
7. **After all fixes (auto + user-approved):** 7. **After all fixes (auto + user-approved):**
- If ANY fixes were applied: commit fixed files by name (`git add <fixed-files> && git commit -m "fix: pre-landing review fixes"`), then **STOP** and tell the user to run `/ship` again to re-test. - If ANY fixes were applied: commit fixed files by name (`git add <fixed-files> && git commit -m "fix: pre-landing review fixes"`), then **STOP** and tell the user to run `/ship` again to re-test.
- If no fixes applied (all ASK items skipped, or no issues found): continue to Step 4. - If no fixes applied (all ASK items skipped, or no issues found): continue to Step 12.
8. Output summary: `Pre-Landing Review: N issues — M auto-fixed, K asked (J fixed, L skipped)` 8. Output summary: `Pre-Landing Review: N issues — M auto-fixed, K asked (J fixed, L skipped)`
@@ -1971,19 +1971,19 @@ Output a summary header: `Pre-Landing Review: N issues (X critical, Y informatio
``` ```
Substitute TIMESTAMP (ISO 8601), STATUS ("clean" if no issues, "issues_found" otherwise), Substitute TIMESTAMP (ISO 8601), STATUS ("clean" if no issues, "issues_found" otherwise),
and N values from the summary counts above. The `via:"ship"` distinguishes from standalone `/review` runs. and N values from the summary counts above. The `via:"ship"` distinguishes from standalone `/review` runs.
- `quality_score` = the PR Quality Score computed in Step 3.56 (e.g., 7.5). If specialists were skipped (small diff), use `10.0` - `quality_score` = the PR Quality Score computed in Step 9.2 (e.g., 7.5). If specialists were skipped (small diff), use `10.0`
- `specialists` = the per-specialist stats object compiled in Step 3.56. Each specialist that was considered gets an entry: `{"dispatched":true/false,"findings":N,"critical":N,"informational":N}` if dispatched, or `{"dispatched":false,"reason":"scope|gated"}` if skipped. Example: `{"testing":{"dispatched":true,"findings":2,"critical":0,"informational":2},"security":{"dispatched":false,"reason":"scope"}}` - `specialists` = the per-specialist stats object compiled in Step 9.2. Each specialist that was considered gets an entry: `{"dispatched":true/false,"findings":N,"critical":N,"informational":N}` if dispatched, or `{"dispatched":false,"reason":"scope|gated"}` if skipped. Example: `{"testing":{"dispatched":true,"findings":2,"critical":0,"informational":2},"security":{"dispatched":false,"reason":"scope"}}`
- `findings` = array of per-finding records. For each finding (from checklist pass and specialists), include: `{"fingerprint":"path:line:category","severity":"CRITICAL|INFORMATIONAL","action":"ACTION"}`. ACTION is `"auto-fixed"`, `"fixed"` (user approved), or `"skipped"` (user chose Skip). - `findings` = array of per-finding records. For each finding (from checklist pass and specialists), include: `{"fingerprint":"path:line:category","severity":"CRITICAL|INFORMATIONAL","action":"ACTION"}`. ACTION is `"auto-fixed"`, `"fixed"` (user approved), or `"skipped"` (user chose Skip).
Save the review output — it goes into the PR body in Step 8. Save the review output — it goes into the PR body in Step 19.
--- ---
## Step 3.75: Address Greptile review comments (if PR exists) ## Step 10: Address Greptile review comments (if PR exists)
Read `.claude/skills/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps. Read `.claude/skills/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
**If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Continue to Step 4. **If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Continue to Step 12.
**If Greptile comments are found:** **If Greptile comments are found:**
@@ -2014,11 +2014,11 @@ For each classified comment:
**SUPPRESSED:** Skip silently — these are known false positives from previous triage. **SUPPRESSED:** Skip silently — these are known false positives from previous triage.
**After all comments are resolved:** If any fixes were applied, the tests from Step 3 are now stale. **Re-run tests** (Step 3) before continuing to Step 4. If no fixes were applied, continue to Step 4. **After all comments are resolved:** If any fixes were applied, the tests from Step 5 are now stale. **Re-run tests** (Step 5) before continuing to Step 12. If no fixes were applied, continue to Step 12.
--- ---
## Step 3.8: Adversarial review (always-on) ## Step 11: Adversarial review (always-on)
Every diff gets adversarial review from both Claude and Codex. LOC is not a proxy for risk — a 5-line auth change can be critical. Every diff gets adversarial review from both Claude and Codex. LOC is not a proxy for risk — a 5-line auth change can be critical.
@@ -2104,7 +2104,7 @@ A) Investigate and fix now (recommended)
B) Continue — review will still complete B) Continue — review will still complete
``` ```
If A: address the findings. After fixing, re-run tests (Step 3) since code has changed. Re-run `codex review` to verify. If A: address the findings. After fixing, re-run tests (Step 5) since code has changed. Re-run `codex review` to verify.
Read stderr for errors (same error handling as Codex adversarial above). Read stderr for errors (same error handling as Codex adversarial above).
@@ -2168,7 +2168,7 @@ staleness detection: if those files are later deleted, the learning can be flagg
**Only log genuine discoveries.** Don't log obvious things. Don't log things the user **Only log genuine discoveries.** Don't log obvious things. Don't log things the user
already knows. A good test: would this insight save time in a future session? If yes, log it. already knows. A good test: would this insight save time in a future session? If yes, log it.
## Step 4: Version bump (auto-decide) ## Step 12: Version bump (auto-decide)
**Idempotency check:** Before bumping, compare VERSION against the base branch. **Idempotency check:** Before bumping, compare VERSION against the base branch.
@@ -2199,7 +2199,7 @@ If output shows `ALREADY_BUMPED`, VERSION was already bumped on this branch (pri
--- ---
## CHANGELOG (auto-generate) ## Step 13: CHANGELOG (auto-generate)
1. Read `CHANGELOG.md` header to know the format. 1. Read `CHANGELOG.md` header to know the format.
@@ -2243,7 +2243,7 @@ If output shows `ALREADY_BUMPED`, VERSION was already bumped on this branch (pri
--- ---
## Step 5.5: TODOS.md (auto-update) ## Step 14: TODOS.md (auto-update)
Cross-reference the project's TODOS.md against the changes being shipped. Mark completed items automatically; prompt only if the file is missing or disorganized. Cross-reference the project's TODOS.md against the changes being shipped. Mark completed items automatically; prompt only if the file is missing or disorganized.
@@ -2255,7 +2255,7 @@ Read `.claude/skills/review/TODOS-format.md` for the canonical format reference.
- Message: "GStack recommends maintaining a TODOS.md organized by skill/component, then priority (P0 at top through P4, then Completed at bottom). See TODOS-format.md for the full format. Would you like to create one?" - Message: "GStack recommends maintaining a TODOS.md organized by skill/component, then priority (P0 at top through P4, then Completed at bottom). See TODOS-format.md for the full format. Would you like to create one?"
- Options: A) Create it now, B) Skip for now - Options: A) Create it now, B) Skip for now
- If A: Create `TODOS.md` with a skeleton (# TODOS heading + ## Completed section). Continue to step 3. - If A: Create `TODOS.md` with a skeleton (# TODOS heading + ## Completed section). Continue to step 3.
- If B: Skip the rest of Step 5.5. Continue to Step 6. - If B: Skip the rest of Step 14. Continue to Step 15.
**2. Check structure and organization:** **2. Check structure and organization:**
@@ -2294,11 +2294,11 @@ For each TODO item, check if the changes in this PR complete it by:
**6. Defensive:** If TODOS.md cannot be written (permission error, disk full), warn the user and continue. Never stop the ship workflow for a TODOS failure. **6. Defensive:** If TODOS.md cannot be written (permission error, disk full), warn the user and continue. Never stop the ship workflow for a TODOS failure.
Save this summary — it goes into the PR body in Step 8. Save this summary — it goes into the PR body in Step 19.
--- ---
## Step 6: Commit (bisectable chunks) ## Step 15: Commit (bisectable chunks)
**Goal:** Create small, logical commits that work well with `git bisect` and help LLMs understand what changed. **Goal:** Create small, logical commits that work well with `git bisect` and help LLMs understand what changed.
@@ -2336,13 +2336,13 @@ EOF
--- ---
## Step 6.5: Verification Gate ## Step 16: Verification Gate
**IRON LAW: NO COMPLETION CLAIMS WITHOUT FRESH VERIFICATION EVIDENCE.** **IRON LAW: NO COMPLETION CLAIMS WITHOUT FRESH VERIFICATION EVIDENCE.**
Before pushing, re-verify if code changed during Steps 4-6: Before pushing, re-verify if code changed during Steps 4-6:
1. **Test verification:** If ANY code changed after Step 3's test run (fixes from review findings, CHANGELOG edits don't count), re-run the test suite. Paste fresh output. Stale output from Step 3 is NOT acceptable. 1. **Test verification:** If ANY code changed after Step 5's test run (fixes from review findings, CHANGELOG edits don't count), re-run the test suite. Paste fresh output. Stale output from Step 5 is NOT acceptable.
2. **Build verification:** If the project has a build step, run it. Paste output. 2. **Build verification:** If the project has a build step, run it. Paste output.
@@ -2352,13 +2352,13 @@ Before pushing, re-verify if code changed during Steps 4-6:
- "I already tested earlier" → Code changed since then. Test again. - "I already tested earlier" → Code changed since then. Test again.
- "It's a trivial change" → Trivial changes break production. - "It's a trivial change" → Trivial changes break production.
**If tests fail here:** STOP. Do not push. Fix the issue and return to Step 3. **If tests fail here:** STOP. Do not push. Fix the issue and return to Step 5.
Claiming work is complete without verification is dishonesty, not efficiency. Claiming work is complete without verification is dishonesty, not efficiency.
--- ---
## Step 7: Push ## Step 17: Push
**Idempotency check:** Check if the branch is already pushed and up to date. **Idempotency check:** Check if the branch is already pushed and up to date.
@@ -2370,7 +2370,7 @@ echo "LOCAL: $LOCAL REMOTE: $REMOTE"
[ "$LOCAL" = "$REMOTE" ] && echo "ALREADY_PUSHED" || echo "PUSH_NEEDED" [ "$LOCAL" = "$REMOTE" ] && echo "ALREADY_PUSHED" || echo "PUSH_NEEDED"
``` ```
If `ALREADY_PUSHED`, skip the push but continue to Step 8. Otherwise push with upstream tracking: If `ALREADY_PUSHED`, skip the push but continue to Step 19. Otherwise push with upstream tracking:
```bash ```bash
git push -u origin <branch-name> git push -u origin <branch-name>
@@ -2378,7 +2378,7 @@ git push -u origin <branch-name>
--- ---
## Step 8: Create PR/MR ## Step 19: Create PR/MR
**Idempotency check:** Check if a PR/MR already exists for this branch. **Idempotency check:** Check if a PR/MR already exists for this branch.
@@ -2392,7 +2392,7 @@ gh pr view --json url,number,state -q 'if .state == "OPEN" then "PR #\(.number):
glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS" else "NO_MR" end' 2>/dev/null || echo "NO_MR" glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS" else "NO_MR" end' 2>/dev/null || echo "NO_MR"
``` ```
If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary). Never reuse stale PR body content from a prior run. Print the existing URL and continue to Step 8.5. If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary). Never reuse stale PR body content from a prior run. Print the existing URL and continue to Step 18.
If no PR/MR exists: create a pull request (GitHub) or merge request (GitLab) using the platform detected in Step 0. If no PR/MR exists: create a pull request (GitHub) or merge request (GitLab) using the platform detected in Step 0.
@@ -2408,11 +2408,11 @@ must appear in at least one section. If a commit's work isn't reflected in the s
you missed it.> you missed it.>
## Test Coverage ## Test Coverage
<coverage diagram from Step 3.4, or "All new code paths have test coverage."> <coverage diagram from Step 7, or "All new code paths have test coverage.">
<If Step 3.4 ran: "Tests: {before} → {after} (+{delta} new)"> <If Step 7 ran: "Tests: {before} → {after} (+{delta} new)">
## Pre-Landing Review ## Pre-Landing Review
<findings from Step 3.5 code review, or "No issues found."> <findings from Step 9 code review, or "No issues found.">
## Design Review ## Design Review
<If design review ran: "Design Review (lite): N findings — M auto-fixed, K skipped. AI Slop: clean/N issues."> <If design review ran: "Design Review (lite): N findings — M auto-fixed, K skipped. AI Slop: clean/N issues.">
@@ -2424,19 +2424,19 @@ you missed it.>
## Greptile Review ## Greptile Review
<If Greptile comments were found: bullet list with [FIXED] / [FALSE POSITIVE] / [ALREADY FIXED] tag + one-line summary per comment> <If Greptile comments were found: bullet list with [FIXED] / [FALSE POSITIVE] / [ALREADY FIXED] tag + one-line summary per comment>
<If no Greptile comments found: "No Greptile comments."> <If no Greptile comments found: "No Greptile comments.">
<If no PR existed during Step 3.75: omit this section entirely> <If no PR existed during Step 10: omit this section entirely>
## Scope Drift ## Scope Drift
<If scope drift ran: "Scope Check: CLEAN" or list of drift/creep findings> <If scope drift ran: "Scope Check: CLEAN" or list of drift/creep findings>
<If no scope drift: omit this section> <If no scope drift: omit this section>
## Plan Completion ## Plan Completion
<If plan file found: completion checklist summary from Step 3.45> <If plan file found: completion checklist summary from Step 8>
<If no plan file: "No plan file detected."> <If no plan file: "No plan file detected.">
<If plan items deferred: list deferred items> <If plan items deferred: list deferred items>
## Verification Results ## Verification Results
<If verification ran: summary from Step 3.47 (N PASS, M FAIL, K SKIPPED)> <If verification ran: summary from Step 8.1 (N PASS, M FAIL, K SKIPPED)>
<If skipped: reason (no plan, no server, no verification section)> <If skipped: reason (no plan, no server, no verification section)>
<If not applicable: omit this section> <If not applicable: omit this section>
@@ -2474,11 +2474,11 @@ EOF
**If neither CLI is available:** **If neither CLI is available:**
Print the branch name, remote URL, and instruct the user to create the PR/MR manually via the web UI. Do not stop — the code is pushed and ready. Print the branch name, remote URL, and instruct the user to create the PR/MR manually via the web UI. Do not stop — the code is pushed and ready.
**Output the PR/MR URL** — then proceed to Step 8.5. **Output the PR/MR URL** — then proceed to Step 18.
--- ---
## Step 8.5: Auto-invoke /document-release ## Step 18: Auto-invoke /document-release
After the PR is created, automatically sync project documentation. Read the After the PR is created, automatically sync project documentation. Read the
`document-release/SKILL.md` skill file (adjacent to this skill's directory) and `document-release/SKILL.md` skill file (adjacent to this skill's directory) and
@@ -2497,11 +2497,11 @@ execute its full workflow:
This step is automatic. Do not ask the user for confirmation. The goal is zero-friction This step is automatic. Do not ask the user for confirmation. The goal is zero-friction
doc updates — the user runs `/ship` and documentation stays current without a separate command. doc updates — the user runs `/ship` and documentation stays current without a separate command.
If Step 8.5 created a docs commit, re-edit the PR/MR body to include the latest commit SHA in the summary. This ensures the PR body reflects the truly final state after document-release. If Step 18 created a docs commit, re-edit the PR/MR body to include the latest commit SHA in the summary. This ensures the PR body reflects the truly final state after document-release.
--- ---
## Step 8.75: Persist ship metrics ## Step 20: Persist ship metrics
Log coverage and plan completion data so `/retro` can track trends: Log coverage and plan completion data so `/retro` can track trends:
@@ -2516,10 +2516,10 @@ echo '{"skill":"ship","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","coverage
``` ```
Substitute from earlier steps: Substitute from earlier steps:
- **COVERAGE_PCT**: coverage percentage from Step 3.4 diagram (integer, or -1 if undetermined) - **COVERAGE_PCT**: coverage percentage from Step 7 diagram (integer, or -1 if undetermined)
- **PLAN_TOTAL**: total plan items extracted in Step 3.45 (0 if no plan file) - **PLAN_TOTAL**: total plan items extracted in Step 8 (0 if no plan file)
- **PLAN_DONE**: count of DONE + CHANGED items from Step 3.45 (0 if no plan file) - **PLAN_DONE**: count of DONE + CHANGED items from Step 8 (0 if no plan file)
- **VERIFY_RESULT**: "pass", "fail", or "skipped" from Step 3.47 - **VERIFY_RESULT**: "pass", "fail", or "skipped" from Step 8.1
- **VERSION**: from the VERSION file - **VERSION**: from the VERSION file
- **BRANCH**: current branch name - **BRANCH**: current branch name
@@ -2538,6 +2538,6 @@ This step is automatic — never skip it, never ask for confirmation.
- **Split commits for bisectability** — each commit = one logical change. - **Split commits for bisectability** — each commit = one logical change.
- **TODOS.md completion detection must be conservative.** Only mark items as completed when the diff clearly shows the work is done. - **TODOS.md completion detection must be conservative.** Only mark items as completed when the diff clearly shows the work is done.
- **Use Greptile reply templates from greptile-triage.md.** Every reply includes evidence (inline diff, code references, re-rank suggestion). Never post vague replies. - **Use Greptile reply templates from greptile-triage.md.** Every reply includes evidence (inline diff, code references, re-rank suggestion). Never post vague replies.
- **Never push without fresh verification evidence.** If code changed after Step 3 tests, re-run before pushing. - **Never push without fresh verification evidence.** If code changed after Step 5 tests, re-run before pushing.
- **Step 3.4 generates coverage tests.** They must pass before committing. Never commit failing tests. - **Step 7 generates coverage tests.** They must pass before committing. Never commit failing tests.
- **The goal is: user says `/ship`, next thing they see is the review + PR URL + auto-synced docs.** - **The goal is: user says `/ship`, next thing they see is the review + PR URL + auto-synced docs.**
+61 -61
View File
@@ -34,17 +34,17 @@ You are running the `/ship` workflow. This is a **non-interactive, fully automat
- Merge conflicts that can't be auto-resolved (stop, show conflicts) - Merge conflicts that can't be auto-resolved (stop, show conflicts)
- In-branch test failures (pre-existing failures are triaged, not auto-blocking) - In-branch test failures (pre-existing failures are triaged, not auto-blocking)
- Pre-landing review finds ASK items that need user judgment - Pre-landing review finds ASK items that need user judgment
- MINOR or MAJOR version bump needed (ask — see Step 4) - MINOR or MAJOR version bump needed (ask — see Step 12)
- Greptile review comments that need user decision (complex fixes, false positives) - Greptile review comments that need user decision (complex fixes, false positives)
- AI-assessed coverage below minimum threshold (hard gate with user override — see Step 3.4) - AI-assessed coverage below minimum threshold (hard gate with user override — see Step 7)
- Plan items NOT DONE with no user override (see Step 3.45) - Plan items NOT DONE with no user override (see Step 8)
- Plan verification failures (see Step 3.47) - Plan verification failures (see Step 8.1)
- TODOS.md missing and user wants to create one (ask — see Step 5.5) - TODOS.md missing and user wants to create one (ask — see Step 14)
- TODOS.md disorganized and user wants to reorganize (ask — see Step 5.5) - TODOS.md disorganized and user wants to reorganize (ask — see Step 14)
**Never stop for:** **Never stop for:**
- Uncommitted changes (always include them) - Uncommitted changes (always include them)
- Version bump choice (auto-pick MICRO or PATCH — see Step 4) - Version bump choice (auto-pick MICRO or PATCH — see Step 12)
- CHANGELOG content (auto-generate from diff) - CHANGELOG content (auto-generate from diff)
- Commit message approval (auto-commit) - Commit message approval (auto-commit)
- Multi-file changesets (auto-split into bisectable commits) - Multi-file changesets (auto-split into bisectable commits)
@@ -57,9 +57,9 @@ Re-running `/ship` means "run the whole checklist again." Every verification ste
(tests, coverage audit, plan completion, pre-landing review, adversarial review, (tests, coverage audit, plan completion, pre-landing review, adversarial review,
VERSION/CHANGELOG check, TODOS, document-release) runs on every invocation. VERSION/CHANGELOG check, TODOS, document-release) runs on every invocation.
Only *actions* are idempotent: Only *actions* are idempotent:
- Step 4: If VERSION already bumped, skip the bump but still read the version - Step 12: If VERSION already bumped, skip the bump but still read the version
- Step 7: If already pushed, skip the push command - Step 17: If already pushed, skip the push command
- Step 8: If PR exists, update the body instead of creating a new PR - Step 19: If PR exists, update the body instead of creating a new PR
Never skip a verification step because a prior `/ship` run already performed it. Never skip a verification step because a prior `/ship` run already performed it.
--- ---
@@ -78,19 +78,19 @@ Never skip a verification step because a prior `/ship` run already performed it.
If the Eng Review is NOT "CLEAR": If the Eng Review is NOT "CLEAR":
Print: "No prior eng review found — ship will run its own pre-landing review in Step 3.5." Print: "No prior eng review found — ship will run its own pre-landing review in Step 9."
Check diff size: `git diff <base>...HEAD --stat | tail -1`. If the diff is >200 lines, add: "Note: This is a large diff. Consider running `/plan-eng-review` or `/autoplan` for architecture-level review before shipping." Check diff size: `git diff <base>...HEAD --stat | tail -1`. If the diff is >200 lines, add: "Note: This is a large diff. Consider running `/plan-eng-review` or `/autoplan` for architecture-level review before shipping."
If CEO Review is missing, mention as informational ("CEO Review not run — recommended for product changes") but do NOT block. If CEO Review is missing, mention as informational ("CEO Review not run — recommended for product changes") but do NOT block.
For Design Review: run `source <(~/.claude/skills/gstack/bin/gstack-diff-scope <base> 2>/dev/null)`. If `SCOPE_FRONTEND=true` and no design review (plan-design-review or design-review-lite) exists in the dashboard, mention: "Design Review not run — this PR changes frontend code. The lite design check will run automatically in Step 3.5, but consider running /design-review for a full visual audit post-implementation." Still never block. For Design Review: run `source <(~/.claude/skills/gstack/bin/gstack-diff-scope <base> 2>/dev/null)`. If `SCOPE_FRONTEND=true` and no design review (plan-design-review or design-review-lite) exists in the dashboard, mention: "Design Review not run — this PR changes frontend code. The lite design check will run automatically in Step 9, but consider running /design-review for a full visual audit post-implementation." Still never block.
Continue to Step 1.5 — do NOT block or ask. Ship runs its own review in Step 3.5. Continue to Step 2 — do NOT block or ask. Ship runs its own review in Step 9.
--- ---
## Step 1.5: Distribution Pipeline Check ## Step 2: Distribution Pipeline Check
If the diff introduces a new standalone artifact (CLI binary, library package, tool) — not a web If the diff introduces a new standalone artifact (CLI binary, library package, tool) — not a web
service with existing deployment — verify that a distribution pipeline exists. service with existing deployment — verify that a distribution pipeline exists.
@@ -118,7 +118,7 @@ service with existing deployment — verify that a distribution pipeline exists.
--- ---
## Step 2: Merge the base branch (BEFORE tests) ## Step 3: Merge the base branch (BEFORE tests)
Fetch and merge the base branch into the feature branch so tests run against the merged state: Fetch and merge the base branch into the feature branch so tests run against the merged state:
@@ -132,13 +132,13 @@ git fetch origin <base> && git merge origin/<base> --no-edit
--- ---
## Step 2.5: Test Framework Bootstrap ## Step 4: Test Framework Bootstrap
{{TEST_BOOTSTRAP}} {{TEST_BOOTSTRAP}}
--- ---
## Step 3: Run tests (on merged code) ## Step 5: Run tests (on merged code)
**Do NOT run `RAILS_ENV=test bin/rails db:migrate`** — `bin/test-lane` already calls **Do NOT run `RAILS_ENV=test bin/rails db:migrate`** — `bin/test-lane` already calls
`db:test:prepare` internally, which loads the schema into the correct lane database. `db:test:prepare` internally, which loads the schema into the correct lane database.
@@ -158,13 +158,13 @@ After both complete, read the output files and check pass/fail.
{{TEST_FAILURE_TRIAGE}} {{TEST_FAILURE_TRIAGE}}
**After triage:** If any in-branch failures remain unfixed, **STOP**. Do not proceed. If all failures were pre-existing and handled (fixed, TODOed, assigned, or skipped), continue to Step 3.25. **After triage:** If any in-branch failures remain unfixed, **STOP**. Do not proceed. If all failures were pre-existing and handled (fixed, TODOed, assigned, or skipped), continue to Step 6.
**If all pass:** Continue silently — just note the counts briefly. **If all pass:** Continue silently — just note the counts briefly.
--- ---
## Step 3.25: Eval Suites (conditional) ## Step 6: Eval Suites (conditional)
Evals are mandatory when prompt-related files change. Skip this step entirely if no prompt files are in the diff. Evals are mandatory when prompt-related files change. Skip this step entirely if no prompt files are in the diff.
@@ -183,7 +183,7 @@ Match against these patterns (from CLAUDE.md):
- `config/system_prompts/*.txt` - `config/system_prompts/*.txt`
- `test/evals/**/*` (eval infrastructure changes affect all suites) - `test/evals/**/*` (eval infrastructure changes affect all suites)
**If no matches:** Print "No prompt-related files changed — skipping evals." and continue to Step 3.5. **If no matches:** Print "No prompt-related files changed — skipping evals." and continue to Step 9.
**2. Identify affected eval suites:** **2. Identify affected eval suites:**
@@ -213,9 +213,9 @@ If multiple suites need to run, run them sequentially (each needs a test lane).
**4. Check results:** **4. Check results:**
- **If any eval fails:** Show the failures, the cost dashboard, and **STOP**. Do not proceed. - **If any eval fails:** Show the failures, the cost dashboard, and **STOP**. Do not proceed.
- **If all pass:** Note pass counts and cost. Continue to Step 3.5. - **If all pass:** Note pass counts and cost. Continue to Step 9.
**5. Save eval output** — include eval results and cost dashboard in the PR body (Step 8). **5. Save eval output** — include eval results and cost dashboard in the PR body (Step 19).
**Tier reference (for context — /ship always uses `full`):** **Tier reference (for context — /ship always uses `full`):**
| Tier | When | Speed (cached) | Cost | | Tier | When | Speed (cached) | Cost |
@@ -226,13 +226,13 @@ If multiple suites need to run, run them sequentially (each needs a test lane).
--- ---
## Step 3.4: Test Coverage Audit ## Step 7: Test Coverage Audit
{{TEST_COVERAGE_AUDIT_SHIP}} {{TEST_COVERAGE_AUDIT_SHIP}}
--- ---
## Step 3.45: Plan Completion Audit ## Step 8: Plan Completion Audit
{{PLAN_COMPLETION_AUDIT_SHIP}} {{PLAN_COMPLETION_AUDIT_SHIP}}
@@ -246,7 +246,7 @@ If multiple suites need to run, run them sequentially (each needs a test lane).
--- ---
## Step 3.5: Pre-Landing Review ## Step 9: Pre-Landing Review
Review the diff for structural issues that tests don't catch. Review the diff for structural issues that tests don't catch.
@@ -268,7 +268,7 @@ Review the diff for structural issues that tests don't catch.
{{CROSS_REVIEW_DEDUP}} {{CROSS_REVIEW_DEDUP}}
4. **Classify each finding from both the checklist pass and specialist review (Step 3.55-3.56) as AUTO-FIX or ASK** per the Fix-First Heuristic in 4. **Classify each finding from both the checklist pass and specialist review (Step 9.1-Step 9.2) as AUTO-FIX or ASK** per the Fix-First Heuristic in
checklist.md. Critical findings lean toward ASK; informational lean toward AUTO-FIX. checklist.md. Critical findings lean toward ASK; informational lean toward AUTO-FIX.
5. **Auto-fix all AUTO-FIX items.** Apply each fix. Output one line per fix: 5. **Auto-fix all AUTO-FIX items.** Apply each fix. Output one line per fix:
@@ -282,7 +282,7 @@ Review the diff for structural issues that tests don't catch.
7. **After all fixes (auto + user-approved):** 7. **After all fixes (auto + user-approved):**
- If ANY fixes were applied: commit fixed files by name (`git add <fixed-files> && git commit -m "fix: pre-landing review fixes"`), then **STOP** and tell the user to run `/ship` again to re-test. - If ANY fixes were applied: commit fixed files by name (`git add <fixed-files> && git commit -m "fix: pre-landing review fixes"`), then **STOP** and tell the user to run `/ship` again to re-test.
- If no fixes applied (all ASK items skipped, or no issues found): continue to Step 4. - If no fixes applied (all ASK items skipped, or no issues found): continue to Step 12.
8. Output summary: `Pre-Landing Review: N issues — M auto-fixed, K asked (J fixed, L skipped)` 8. Output summary: `Pre-Landing Review: N issues — M auto-fixed, K asked (J fixed, L skipped)`
@@ -294,19 +294,19 @@ Review the diff for structural issues that tests don't catch.
``` ```
Substitute TIMESTAMP (ISO 8601), STATUS ("clean" if no issues, "issues_found" otherwise), Substitute TIMESTAMP (ISO 8601), STATUS ("clean" if no issues, "issues_found" otherwise),
and N values from the summary counts above. The `via:"ship"` distinguishes from standalone `/review` runs. and N values from the summary counts above. The `via:"ship"` distinguishes from standalone `/review` runs.
- `quality_score` = the PR Quality Score computed in Step 3.56 (e.g., 7.5). If specialists were skipped (small diff), use `10.0` - `quality_score` = the PR Quality Score computed in Step 9.2 (e.g., 7.5). If specialists were skipped (small diff), use `10.0`
- `specialists` = the per-specialist stats object compiled in Step 3.56. Each specialist that was considered gets an entry: `{"dispatched":true/false,"findings":N,"critical":N,"informational":N}` if dispatched, or `{"dispatched":false,"reason":"scope|gated"}` if skipped. Example: `{"testing":{"dispatched":true,"findings":2,"critical":0,"informational":2},"security":{"dispatched":false,"reason":"scope"}}` - `specialists` = the per-specialist stats object compiled in Step 9.2. Each specialist that was considered gets an entry: `{"dispatched":true/false,"findings":N,"critical":N,"informational":N}` if dispatched, or `{"dispatched":false,"reason":"scope|gated"}` if skipped. Example: `{"testing":{"dispatched":true,"findings":2,"critical":0,"informational":2},"security":{"dispatched":false,"reason":"scope"}}`
- `findings` = array of per-finding records. For each finding (from checklist pass and specialists), include: `{"fingerprint":"path:line:category","severity":"CRITICAL|INFORMATIONAL","action":"ACTION"}`. ACTION is `"auto-fixed"`, `"fixed"` (user approved), or `"skipped"` (user chose Skip). - `findings` = array of per-finding records. For each finding (from checklist pass and specialists), include: `{"fingerprint":"path:line:category","severity":"CRITICAL|INFORMATIONAL","action":"ACTION"}`. ACTION is `"auto-fixed"`, `"fixed"` (user approved), or `"skipped"` (user chose Skip).
Save the review output — it goes into the PR body in Step 8. Save the review output — it goes into the PR body in Step 19.
--- ---
## Step 3.75: Address Greptile review comments (if PR exists) ## Step 10: Address Greptile review comments (if PR exists)
Read `.claude/skills/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps. Read `.claude/skills/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
**If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Continue to Step 4. **If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Continue to Step 12.
**If Greptile comments are found:** **If Greptile comments are found:**
@@ -337,7 +337,7 @@ For each classified comment:
**SUPPRESSED:** Skip silently — these are known false positives from previous triage. **SUPPRESSED:** Skip silently — these are known false positives from previous triage.
**After all comments are resolved:** If any fixes were applied, the tests from Step 3 are now stale. **Re-run tests** (Step 3) before continuing to Step 4. If no fixes were applied, continue to Step 4. **After all comments are resolved:** If any fixes were applied, the tests from Step 5 are now stale. **Re-run tests** (Step 5) before continuing to Step 12. If no fixes were applied, continue to Step 12.
--- ---
@@ -345,7 +345,7 @@ For each classified comment:
{{LEARNINGS_LOG}} {{LEARNINGS_LOG}}
## Step 4: Version bump (auto-decide) ## Step 12: Version bump (auto-decide)
**Idempotency check:** Before bumping, compare VERSION against the base branch. **Idempotency check:** Before bumping, compare VERSION against the base branch.
@@ -380,7 +380,7 @@ If output shows `ALREADY_BUMPED`, VERSION was already bumped on this branch (pri
--- ---
## Step 5.5: TODOS.md (auto-update) ## Step 14: TODOS.md (auto-update)
Cross-reference the project's TODOS.md against the changes being shipped. Mark completed items automatically; prompt only if the file is missing or disorganized. Cross-reference the project's TODOS.md against the changes being shipped. Mark completed items automatically; prompt only if the file is missing or disorganized.
@@ -392,7 +392,7 @@ Read `.claude/skills/review/TODOS-format.md` for the canonical format reference.
- Message: "GStack recommends maintaining a TODOS.md organized by skill/component, then priority (P0 at top through P4, then Completed at bottom). See TODOS-format.md for the full format. Would you like to create one?" - Message: "GStack recommends maintaining a TODOS.md organized by skill/component, then priority (P0 at top through P4, then Completed at bottom). See TODOS-format.md for the full format. Would you like to create one?"
- Options: A) Create it now, B) Skip for now - Options: A) Create it now, B) Skip for now
- If A: Create `TODOS.md` with a skeleton (# TODOS heading + ## Completed section). Continue to step 3. - If A: Create `TODOS.md` with a skeleton (# TODOS heading + ## Completed section). Continue to step 3.
- If B: Skip the rest of Step 5.5. Continue to Step 6. - If B: Skip the rest of Step 14. Continue to Step 15.
**2. Check structure and organization:** **2. Check structure and organization:**
@@ -431,11 +431,11 @@ For each TODO item, check if the changes in this PR complete it by:
**6. Defensive:** If TODOS.md cannot be written (permission error, disk full), warn the user and continue. Never stop the ship workflow for a TODOS failure. **6. Defensive:** If TODOS.md cannot be written (permission error, disk full), warn the user and continue. Never stop the ship workflow for a TODOS failure.
Save this summary — it goes into the PR body in Step 8. Save this summary — it goes into the PR body in Step 19.
--- ---
## Step 6: Commit (bisectable chunks) ## Step 15: Commit (bisectable chunks)
**Goal:** Create small, logical commits that work well with `git bisect` and help LLMs understand what changed. **Goal:** Create small, logical commits that work well with `git bisect` and help LLMs understand what changed.
@@ -473,13 +473,13 @@ EOF
--- ---
## Step 6.5: Verification Gate ## Step 16: Verification Gate
**IRON LAW: NO COMPLETION CLAIMS WITHOUT FRESH VERIFICATION EVIDENCE.** **IRON LAW: NO COMPLETION CLAIMS WITHOUT FRESH VERIFICATION EVIDENCE.**
Before pushing, re-verify if code changed during Steps 4-6: Before pushing, re-verify if code changed during Steps 4-6:
1. **Test verification:** If ANY code changed after Step 3's test run (fixes from review findings, CHANGELOG edits don't count), re-run the test suite. Paste fresh output. Stale output from Step 3 is NOT acceptable. 1. **Test verification:** If ANY code changed after Step 5's test run (fixes from review findings, CHANGELOG edits don't count), re-run the test suite. Paste fresh output. Stale output from Step 5 is NOT acceptable.
2. **Build verification:** If the project has a build step, run it. Paste output. 2. **Build verification:** If the project has a build step, run it. Paste output.
@@ -489,13 +489,13 @@ Before pushing, re-verify if code changed during Steps 4-6:
- "I already tested earlier" → Code changed since then. Test again. - "I already tested earlier" → Code changed since then. Test again.
- "It's a trivial change" → Trivial changes break production. - "It's a trivial change" → Trivial changes break production.
**If tests fail here:** STOP. Do not push. Fix the issue and return to Step 3. **If tests fail here:** STOP. Do not push. Fix the issue and return to Step 5.
Claiming work is complete without verification is dishonesty, not efficiency. Claiming work is complete without verification is dishonesty, not efficiency.
--- ---
## Step 7: Push ## Step 17: Push
**Idempotency check:** Check if the branch is already pushed and up to date. **Idempotency check:** Check if the branch is already pushed and up to date.
@@ -507,7 +507,7 @@ echo "LOCAL: $LOCAL REMOTE: $REMOTE"
[ "$LOCAL" = "$REMOTE" ] && echo "ALREADY_PUSHED" || echo "PUSH_NEEDED" [ "$LOCAL" = "$REMOTE" ] && echo "ALREADY_PUSHED" || echo "PUSH_NEEDED"
``` ```
If `ALREADY_PUSHED`, skip the push but continue to Step 8. Otherwise push with upstream tracking: If `ALREADY_PUSHED`, skip the push but continue to Step 19. Otherwise push with upstream tracking:
```bash ```bash
git push -u origin <branch-name> git push -u origin <branch-name>
@@ -515,7 +515,7 @@ git push -u origin <branch-name>
--- ---
## Step 8: Create PR/MR ## Step 19: Create PR/MR
**Idempotency check:** Check if a PR/MR already exists for this branch. **Idempotency check:** Check if a PR/MR already exists for this branch.
@@ -529,7 +529,7 @@ gh pr view --json url,number,state -q 'if .state == "OPEN" then "PR #\(.number):
glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS" else "NO_MR" end' 2>/dev/null || echo "NO_MR" glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS" else "NO_MR" end' 2>/dev/null || echo "NO_MR"
``` ```
If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary). Never reuse stale PR body content from a prior run. Print the existing URL and continue to Step 8.5. If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary). Never reuse stale PR body content from a prior run. Print the existing URL and continue to Step 18.
If no PR/MR exists: create a pull request (GitHub) or merge request (GitLab) using the platform detected in Step 0. If no PR/MR exists: create a pull request (GitHub) or merge request (GitLab) using the platform detected in Step 0.
@@ -545,11 +545,11 @@ must appear in at least one section. If a commit's work isn't reflected in the s
you missed it.> you missed it.>
## Test Coverage ## Test Coverage
<coverage diagram from Step 3.4, or "All new code paths have test coverage."> <coverage diagram from Step 7, or "All new code paths have test coverage.">
<If Step 3.4 ran: "Tests: {before} → {after} (+{delta} new)"> <If Step 7 ran: "Tests: {before} → {after} (+{delta} new)">
## Pre-Landing Review ## Pre-Landing Review
<findings from Step 3.5 code review, or "No issues found."> <findings from Step 9 code review, or "No issues found.">
## Design Review ## Design Review
<If design review ran: "Design Review (lite): N findings — M auto-fixed, K skipped. AI Slop: clean/N issues."> <If design review ran: "Design Review (lite): N findings — M auto-fixed, K skipped. AI Slop: clean/N issues.">
@@ -561,19 +561,19 @@ you missed it.>
## Greptile Review ## Greptile Review
<If Greptile comments were found: bullet list with [FIXED] / [FALSE POSITIVE] / [ALREADY FIXED] tag + one-line summary per comment> <If Greptile comments were found: bullet list with [FIXED] / [FALSE POSITIVE] / [ALREADY FIXED] tag + one-line summary per comment>
<If no Greptile comments found: "No Greptile comments."> <If no Greptile comments found: "No Greptile comments.">
<If no PR existed during Step 3.75: omit this section entirely> <If no PR existed during Step 10: omit this section entirely>
## Scope Drift ## Scope Drift
<If scope drift ran: "Scope Check: CLEAN" or list of drift/creep findings> <If scope drift ran: "Scope Check: CLEAN" or list of drift/creep findings>
<If no scope drift: omit this section> <If no scope drift: omit this section>
## Plan Completion ## Plan Completion
<If plan file found: completion checklist summary from Step 3.45> <If plan file found: completion checklist summary from Step 8>
<If no plan file: "No plan file detected."> <If no plan file: "No plan file detected.">
<If plan items deferred: list deferred items> <If plan items deferred: list deferred items>
## Verification Results ## Verification Results
<If verification ran: summary from Step 3.47 (N PASS, M FAIL, K SKIPPED)> <If verification ran: summary from Step 8.1 (N PASS, M FAIL, K SKIPPED)>
<If skipped: reason (no plan, no server, no verification section)> <If skipped: reason (no plan, no server, no verification section)>
<If not applicable: omit this section> <If not applicable: omit this section>
@@ -611,11 +611,11 @@ EOF
**If neither CLI is available:** **If neither CLI is available:**
Print the branch name, remote URL, and instruct the user to create the PR/MR manually via the web UI. Do not stop — the code is pushed and ready. Print the branch name, remote URL, and instruct the user to create the PR/MR manually via the web UI. Do not stop — the code is pushed and ready.
**Output the PR/MR URL** — then proceed to Step 8.5. **Output the PR/MR URL** — then proceed to Step 18.
--- ---
## Step 8.5: Auto-invoke /document-release ## Step 18: Auto-invoke /document-release
After the PR is created, automatically sync project documentation. Read the After the PR is created, automatically sync project documentation. Read the
`document-release/SKILL.md` skill file (adjacent to this skill's directory) and `document-release/SKILL.md` skill file (adjacent to this skill's directory) and
@@ -634,11 +634,11 @@ execute its full workflow:
This step is automatic. Do not ask the user for confirmation. The goal is zero-friction This step is automatic. Do not ask the user for confirmation. The goal is zero-friction
doc updates — the user runs `/ship` and documentation stays current without a separate command. doc updates — the user runs `/ship` and documentation stays current without a separate command.
If Step 8.5 created a docs commit, re-edit the PR/MR body to include the latest commit SHA in the summary. This ensures the PR body reflects the truly final state after document-release. If Step 18 created a docs commit, re-edit the PR/MR body to include the latest commit SHA in the summary. This ensures the PR body reflects the truly final state after document-release.
--- ---
## Step 8.75: Persist ship metrics ## Step 20: Persist ship metrics
Log coverage and plan completion data so `/retro` can track trends: Log coverage and plan completion data so `/retro` can track trends:
@@ -653,10 +653,10 @@ echo '{"skill":"ship","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","coverage
``` ```
Substitute from earlier steps: Substitute from earlier steps:
- **COVERAGE_PCT**: coverage percentage from Step 3.4 diagram (integer, or -1 if undetermined) - **COVERAGE_PCT**: coverage percentage from Step 7 diagram (integer, or -1 if undetermined)
- **PLAN_TOTAL**: total plan items extracted in Step 3.45 (0 if no plan file) - **PLAN_TOTAL**: total plan items extracted in Step 8 (0 if no plan file)
- **PLAN_DONE**: count of DONE + CHANGED items from Step 3.45 (0 if no plan file) - **PLAN_DONE**: count of DONE + CHANGED items from Step 8 (0 if no plan file)
- **VERIFY_RESULT**: "pass", "fail", or "skipped" from Step 3.47 - **VERIFY_RESULT**: "pass", "fail", or "skipped" from Step 8.1
- **VERSION**: from the VERSION file - **VERSION**: from the VERSION file
- **BRANCH**: current branch name - **BRANCH**: current branch name
@@ -675,6 +675,6 @@ This step is automatic — never skip it, never ask for confirmation.
- **Split commits for bisectability** — each commit = one logical change. - **Split commits for bisectability** — each commit = one logical change.
- **TODOS.md completion detection must be conservative.** Only mark items as completed when the diff clearly shows the work is done. - **TODOS.md completion detection must be conservative.** Only mark items as completed when the diff clearly shows the work is done.
- **Use Greptile reply templates from greptile-triage.md.** Every reply includes evidence (inline diff, code references, re-rank suggestion). Never post vague replies. - **Use Greptile reply templates from greptile-triage.md.** Every reply includes evidence (inline diff, code references, re-rank suggestion). Never post vague replies.
- **Never push without fresh verification evidence.** If code changed after Step 3 tests, re-run before pushing. - **Never push without fresh verification evidence.** If code changed after Step 5 tests, re-run before pushing.
- **Step 3.4 generates coverage tests.** They must pass before committing. Never commit failing tests. - **Step 7 generates coverage tests.** They must pass before committing. Never commit failing tests.
- **The goal is: user says `/ship`, next thing they see is the review + PR URL + auto-synced docs.** - **The goal is: user says `/ship`, next thing they see is the review + PR URL + auto-synced docs.**
+6 -6
View File
@@ -752,13 +752,13 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
test('ship SKILL.md contains review army specialist dispatch', () => { test('ship SKILL.md contains review army specialist dispatch', () => {
expect(shipSkill).toContain('Specialist Dispatch'); expect(shipSkill).toContain('Specialist Dispatch');
expect(shipSkill).toContain('Step 3.55'); expect(shipSkill).toContain('Step 9.1');
expect(shipSkill).toContain('Step 3.56'); expect(shipSkill).toContain('Step 9.2');
}); });
test('ship SKILL.md contains cross-review finding dedup', () => { test('ship SKILL.md contains cross-review finding dedup', () => {
expect(shipSkill).toContain('Cross-review finding dedup'); expect(shipSkill).toContain('Cross-review finding dedup');
expect(shipSkill).toContain('Step 3.57'); expect(shipSkill).toContain('Step 9.3');
}); });
test('ship SKILL.md contains re-run idempotency behavior', () => { test('ship SKILL.md contains re-run idempotency behavior', () => {
@@ -839,7 +839,7 @@ describe('PLAN_COMPLETION_AUDIT placeholders', () => {
test('ship SKILL.md contains plan completion audit step', () => { test('ship SKILL.md contains plan completion audit step', () => {
expect(shipSkill).toContain('Plan Completion Audit'); expect(shipSkill).toContain('Plan Completion Audit');
expect(shipSkill).toContain('Step 3.45'); expect(shipSkill).toContain('Step 8');
}); });
test('review SKILL.md contains plan completion in scope drift', () => { test('review SKILL.md contains plan completion in scope drift', () => {
@@ -888,7 +888,7 @@ describe('PLAN_VERIFICATION_EXEC placeholder', () => {
const shipSkill = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); const shipSkill = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
test('ship SKILL.md contains plan verification step', () => { test('ship SKILL.md contains plan verification step', () => {
expect(shipSkill).toContain('Step 3.47'); expect(shipSkill).toContain('Step 8.1');
expect(shipSkill).toContain('Plan Verification'); expect(shipSkill).toContain('Plan Verification');
}); });
@@ -946,7 +946,7 @@ describe('Ship metrics logging', () => {
const shipSkill = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); const shipSkill = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
test('ship SKILL.md contains metrics persistence step', () => { test('ship SKILL.md contains metrics persistence step', () => {
expect(shipSkill).toContain('Step 8.75'); expect(shipSkill).toContain('Step 20');
expect(shipSkill).toContain('coverage_pct'); expect(shipSkill).toContain('coverage_pct');
expect(shipSkill).toContain('plan_items_total'); expect(shipSkill).toContain('plan_items_total');
expect(shipSkill).toContain('plan_items_done'); expect(shipSkill).toContain('plan_items_done');
+4 -4
View File
@@ -1004,7 +1004,7 @@ describe('Test Bootstrap ({{TEST_BOOTSTRAP}}) integration', () => {
test('TEST_BOOTSTRAP appears in ship/SKILL.md', () => { test('TEST_BOOTSTRAP appears in ship/SKILL.md', () => {
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
expect(content).toContain('Test Framework Bootstrap'); expect(content).toContain('Test Framework Bootstrap');
expect(content).toContain('Step 2.5'); expect(content).toContain('Step 4');
}); });
test('TEST_BOOTSTRAP appears in design-review/SKILL.md', () => { test('TEST_BOOTSTRAP appears in design-review/SKILL.md', () => {
@@ -1099,9 +1099,9 @@ describe('Phase 8e.5 regression test generation', () => {
// --- Step 3.4 coverage audit validation --- // --- Step 3.4 coverage audit validation ---
describe('Step 3.4 test coverage audit', () => { describe('Step 3.4 test coverage audit', () => {
test('ship/SKILL.md contains Step 3.4', () => { test('ship/SKILL.md contains Step 7', () => {
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
expect(content).toContain('Step 3.4: Test Coverage Audit'); expect(content).toContain('Step 7: Test Coverage Audit');
expect(content).toContain('CODE PATH COVERAGE'); expect(content).toContain('CODE PATH COVERAGE');
}); });
@@ -1126,7 +1126,7 @@ describe('Step 3.4 test coverage audit', () => {
test('ship rules include test generation rule', () => { test('ship rules include test generation rule', () => {
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
expect(content).toContain('Step 3.4 generates coverage tests'); expect(content).toContain('Step 7 generates coverage tests');
expect(content).toContain('Never commit failing tests'); expect(content).toContain('Never commit failing tests');
}); });