From 403ea84343aff2e1bc14f41a18ebd24e8644f37b Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sat, 21 Mar 2026 12:14:03 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20plan=20file=20review=20report=20?= =?UTF-8?q?=E2=80=94=20markdown=20table=20appended=20to=20plan=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds {{PLAN_FILE_REVIEW_REPORT}} template resolver that instructs review skills to write a structured markdown table (with Trigger/Why/Status/Findings columns) to the plan file itself, so review status is visible to anyone reading the plan — not just in conversation output. Integrated into plan-ceo-review, plan-eng-review, plan-design-review, and codex skill templates. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../skills/gstack-plan-ceo-review/SKILL.md | 62 ++++++++++++++++++ .../skills/gstack-plan-design-review/SKILL.md | 62 ++++++++++++++++++ .../skills/gstack-plan-eng-review/SKILL.md | 62 ++++++++++++++++++ codex/SKILL.md | 62 ++++++++++++++++++ codex/SKILL.md.tmpl | 2 + plan-ceo-review/SKILL.md | 62 ++++++++++++++++++ plan-ceo-review/SKILL.md.tmpl | 2 + plan-design-review/SKILL.md | 62 ++++++++++++++++++ plan-design-review/SKILL.md.tmpl | 2 + plan-eng-review/SKILL.md | 62 ++++++++++++++++++ plan-eng-review/SKILL.md.tmpl | 2 + scripts/gen-skill-docs.ts | 65 +++++++++++++++++++ test/gen-skill-docs.test.ts | 24 +++++++ 13 files changed, 531 insertions(+) diff --git a/.agents/skills/gstack-plan-ceo-review/SKILL.md b/.agents/skills/gstack-plan-ceo-review/SKILL.md index b43b8e0a..4f7fda21 100644 --- a/.agents/skills/gstack-plan-ceo-review/SKILL.md +++ b/.agents/skills/gstack-plan-ceo-review/SKILL.md @@ -1074,6 +1074,68 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - For entries without a \`commit\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection" - If all reviews match the current HEAD, do not display any staleness notes +## Plan File Review Report + +After displaying the Review Readiness Dashboard in conversation output, also update the +**plan file** itself so review status is visible to anyone reading the plan. + +### Detect the plan file + +1. Check if there is an active plan file in this conversation (the host provides plan file + paths in system messages — look for plan file references in the conversation context). +2. If not found, skip this section silently — not every review runs in plan mode. + +### Generate the report + +Read the review log output you already have from the Review Readiness Dashboard step above. +Parse each JSONL entry. Each skill logs different fields: + +- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} proposals, {M} accepted, {K} deferred" (from your Completion Summary) +- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} issues, {M} critical gaps, mode: {MODE}" +- **plan-design-review**: \`status\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` + → Findings: "score: {N}/10 → {M}/10, {K} decisions made" +- **codex-review**: \`status\`, \`gate\`, \`findings\` + → Findings: "{N} findings, {M}/{N} fixed" + +For the review you just completed, use details from your own Completion Summary (richer +than the JSONL). For prior reviews, use the JSONL fields to reconstruct a summary. + +Produce this markdown table: + +\`\`\`markdown +## GSTACK REVIEW REPORT + +| Review | Trigger | Why | Runs | Status | Findings | +|--------|---------|-----|------|--------|----------| +| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | +| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | +| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | +| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | +\`\`\` + +Below the table, add these lines (omit any that are empty/not applicable): + +- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes +- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis +- **UNRESOLVED:** total unresolved decisions across all reviews +- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). + If Eng Review is not CLEAR and not skipped globally, append "eng review required". + +### Write to the plan file + +**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one +file you are allowed to edit in plan mode. The plan file review report is part of the +plan's living status. + +- If a \`## GSTACK REVIEW REPORT\` section already exists at the end of the plan file, + **replace it** entirely using the Edit tool (match from \`## GSTACK REVIEW REPORT\` to + the end of the file). If the Edit fails (e.g., concurrent edit changed the content), + re-read the plan file and retry once with the updated content. +- If no such section exists, **append it** to the end of the plan file. +- Always place it as the very last section in the plan file. + ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, recommend the next review(s) based on what this CEO review discovered. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/.agents/skills/gstack-plan-design-review/SKILL.md b/.agents/skills/gstack-plan-design-review/SKILL.md index e431d72c..86ea0705 100644 --- a/.agents/skills/gstack-plan-design-review/SKILL.md +++ b/.agents/skills/gstack-plan-design-review/SKILL.md @@ -542,6 +542,68 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - For entries without a \`commit\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection" - If all reviews match the current HEAD, do not display any staleness notes +## Plan File Review Report + +After displaying the Review Readiness Dashboard in conversation output, also update the +**plan file** itself so review status is visible to anyone reading the plan. + +### Detect the plan file + +1. Check if there is an active plan file in this conversation (the host provides plan file + paths in system messages — look for plan file references in the conversation context). +2. If not found, skip this section silently — not every review runs in plan mode. + +### Generate the report + +Read the review log output you already have from the Review Readiness Dashboard step above. +Parse each JSONL entry. Each skill logs different fields: + +- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} proposals, {M} accepted, {K} deferred" (from your Completion Summary) +- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} issues, {M} critical gaps, mode: {MODE}" +- **plan-design-review**: \`status\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` + → Findings: "score: {N}/10 → {M}/10, {K} decisions made" +- **codex-review**: \`status\`, \`gate\`, \`findings\` + → Findings: "{N} findings, {M}/{N} fixed" + +For the review you just completed, use details from your own Completion Summary (richer +than the JSONL). For prior reviews, use the JSONL fields to reconstruct a summary. + +Produce this markdown table: + +\`\`\`markdown +## GSTACK REVIEW REPORT + +| Review | Trigger | Why | Runs | Status | Findings | +|--------|---------|-----|------|--------|----------| +| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | +| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | +| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | +| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | +\`\`\` + +Below the table, add these lines (omit any that are empty/not applicable): + +- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes +- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis +- **UNRESOLVED:** total unresolved decisions across all reviews +- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). + If Eng Review is not CLEAR and not skipped globally, append "eng review required". + +### Write to the plan file + +**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one +file you are allowed to edit in plan mode. The plan file review report is part of the +plan's living status. + +- If a \`## GSTACK REVIEW REPORT\` section already exists at the end of the plan file, + **replace it** entirely using the Edit tool (match from \`## GSTACK REVIEW REPORT\` to + the end of the file). If the Edit fails (e.g., concurrent edit changed the content), + re-read the plan file and retry once with the updated content. +- If no such section exists, **append it** to the end of the plan file. +- Always place it as the very last section in the plan file. + ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, recommend the next review(s) based on what this design review discovered. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/.agents/skills/gstack-plan-eng-review/SKILL.md b/.agents/skills/gstack-plan-eng-review/SKILL.md index 01233079..18eab2ab 100644 --- a/.agents/skills/gstack-plan-eng-review/SKILL.md +++ b/.agents/skills/gstack-plan-eng-review/SKILL.md @@ -531,6 +531,68 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - For entries without a \`commit\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection" - If all reviews match the current HEAD, do not display any staleness notes +## Plan File Review Report + +After displaying the Review Readiness Dashboard in conversation output, also update the +**plan file** itself so review status is visible to anyone reading the plan. + +### Detect the plan file + +1. Check if there is an active plan file in this conversation (the host provides plan file + paths in system messages — look for plan file references in the conversation context). +2. If not found, skip this section silently — not every review runs in plan mode. + +### Generate the report + +Read the review log output you already have from the Review Readiness Dashboard step above. +Parse each JSONL entry. Each skill logs different fields: + +- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} proposals, {M} accepted, {K} deferred" (from your Completion Summary) +- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} issues, {M} critical gaps, mode: {MODE}" +- **plan-design-review**: \`status\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` + → Findings: "score: {N}/10 → {M}/10, {K} decisions made" +- **codex-review**: \`status\`, \`gate\`, \`findings\` + → Findings: "{N} findings, {M}/{N} fixed" + +For the review you just completed, use details from your own Completion Summary (richer +than the JSONL). For prior reviews, use the JSONL fields to reconstruct a summary. + +Produce this markdown table: + +\`\`\`markdown +## GSTACK REVIEW REPORT + +| Review | Trigger | Why | Runs | Status | Findings | +|--------|---------|-----|------|--------|----------| +| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | +| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | +| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | +| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | +\`\`\` + +Below the table, add these lines (omit any that are empty/not applicable): + +- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes +- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis +- **UNRESOLVED:** total unresolved decisions across all reviews +- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). + If Eng Review is not CLEAR and not skipped globally, append "eng review required". + +### Write to the plan file + +**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one +file you are allowed to edit in plan mode. The plan file review report is part of the +plan's living status. + +- If a \`## GSTACK REVIEW REPORT\` section already exists at the end of the plan file, + **replace it** entirely using the Edit tool (match from \`## GSTACK REVIEW REPORT\` to + the end of the file). If the Edit fails (e.g., concurrent edit changed the content), + re-read the plan file and retry once with the updated content. +- If no such section exists, **append it** to the end of the plan file. +- Always place it as the very last section in the plan file. + ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, check if additional reviews would be valuable. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/codex/SKILL.md b/codex/SKILL.md index 77705f7e..604c364e 100644 --- a/codex/SKILL.md +++ b/codex/SKILL.md @@ -358,6 +358,68 @@ GATE ("pass" or "fail"), findings (count of [P1] + [P2] markers). rm -f "$TMPERR" ``` +## Plan File Review Report + +After displaying the Review Readiness Dashboard in conversation output, also update the +**plan file** itself so review status is visible to anyone reading the plan. + +### Detect the plan file + +1. Check if there is an active plan file in this conversation (the host provides plan file + paths in system messages — look for plan file references in the conversation context). +2. If not found, skip this section silently — not every review runs in plan mode. + +### Generate the report + +Read the review log output you already have from the Review Readiness Dashboard step above. +Parse each JSONL entry. Each skill logs different fields: + +- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} proposals, {M} accepted, {K} deferred" (from your Completion Summary) +- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} issues, {M} critical gaps, mode: {MODE}" +- **plan-design-review**: \`status\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` + → Findings: "score: {N}/10 → {M}/10, {K} decisions made" +- **codex-review**: \`status\`, \`gate\`, \`findings\` + → Findings: "{N} findings, {M}/{N} fixed" + +For the review you just completed, use details from your own Completion Summary (richer +than the JSONL). For prior reviews, use the JSONL fields to reconstruct a summary. + +Produce this markdown table: + +\`\`\`markdown +## GSTACK REVIEW REPORT + +| Review | Trigger | Why | Runs | Status | Findings | +|--------|---------|-----|------|--------|----------| +| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | +| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | +| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | +| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | +\`\`\` + +Below the table, add these lines (omit any that are empty/not applicable): + +- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes +- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis +- **UNRESOLVED:** total unresolved decisions across all reviews +- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). + If Eng Review is not CLEAR and not skipped globally, append "eng review required". + +### Write to the plan file + +**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one +file you are allowed to edit in plan mode. The plan file review report is part of the +plan's living status. + +- If a \`## GSTACK REVIEW REPORT\` section already exists at the end of the plan file, + **replace it** entirely using the Edit tool (match from \`## GSTACK REVIEW REPORT\` to + the end of the file). If the Edit fails (e.g., concurrent edit changed the content), + re-read the plan file and retry once with the updated content. +- If no such section exists, **append it** to the end of the plan file. +- Always place it as the very last section in the plan file. + --- ## Step 2B: Challenge (Adversarial) Mode diff --git a/codex/SKILL.md.tmpl b/codex/SKILL.md.tmpl index 30b603ee..2961e4c1 100644 --- a/codex/SKILL.md.tmpl +++ b/codex/SKILL.md.tmpl @@ -137,6 +137,8 @@ GATE ("pass" or "fail"), findings (count of [P1] + [P2] markers). rm -f "$TMPERR" ``` +{{PLAN_FILE_REVIEW_REPORT}} + --- ## Step 2B: Challenge (Adversarial) Mode diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md index be25485a..47df0bc4 100644 --- a/plan-ceo-review/SKILL.md +++ b/plan-ceo-review/SKILL.md @@ -1082,6 +1082,68 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - For entries without a \`commit\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection" - If all reviews match the current HEAD, do not display any staleness notes +## Plan File Review Report + +After displaying the Review Readiness Dashboard in conversation output, also update the +**plan file** itself so review status is visible to anyone reading the plan. + +### Detect the plan file + +1. Check if there is an active plan file in this conversation (the host provides plan file + paths in system messages — look for plan file references in the conversation context). +2. If not found, skip this section silently — not every review runs in plan mode. + +### Generate the report + +Read the review log output you already have from the Review Readiness Dashboard step above. +Parse each JSONL entry. Each skill logs different fields: + +- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} proposals, {M} accepted, {K} deferred" (from your Completion Summary) +- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} issues, {M} critical gaps, mode: {MODE}" +- **plan-design-review**: \`status\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` + → Findings: "score: {N}/10 → {M}/10, {K} decisions made" +- **codex-review**: \`status\`, \`gate\`, \`findings\` + → Findings: "{N} findings, {M}/{N} fixed" + +For the review you just completed, use details from your own Completion Summary (richer +than the JSONL). For prior reviews, use the JSONL fields to reconstruct a summary. + +Produce this markdown table: + +\`\`\`markdown +## GSTACK REVIEW REPORT + +| Review | Trigger | Why | Runs | Status | Findings | +|--------|---------|-----|------|--------|----------| +| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | +| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | +| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | +| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | +\`\`\` + +Below the table, add these lines (omit any that are empty/not applicable): + +- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes +- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis +- **UNRESOLVED:** total unresolved decisions across all reviews +- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). + If Eng Review is not CLEAR and not skipped globally, append "eng review required". + +### Write to the plan file + +**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one +file you are allowed to edit in plan mode. The plan file review report is part of the +plan's living status. + +- If a \`## GSTACK REVIEW REPORT\` section already exists at the end of the plan file, + **replace it** entirely using the Edit tool (match from \`## GSTACK REVIEW REPORT\` to + the end of the file). If the Edit fails (e.g., concurrent edit changed the content), + re-read the plan file and retry once with the updated content. +- If no such section exists, **append it** to the end of the plan file. +- Always place it as the very last section in the plan file. + ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, recommend the next review(s) based on what this CEO review discovered. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/plan-ceo-review/SKILL.md.tmpl b/plan-ceo-review/SKILL.md.tmpl index fea6879c..984e10c0 100644 --- a/plan-ceo-review/SKILL.md.tmpl +++ b/plan-ceo-review/SKILL.md.tmpl @@ -743,6 +743,8 @@ Before running this command, substitute the placeholder values from the Completi {{REVIEW_DASHBOARD}} +{{PLAN_FILE_REVIEW_REPORT}} + ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, recommend the next review(s) based on what this CEO review discovered. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/plan-design-review/SKILL.md b/plan-design-review/SKILL.md index 1483e6e8..9ca22aea 100644 --- a/plan-design-review/SKILL.md +++ b/plan-design-review/SKILL.md @@ -550,6 +550,68 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - For entries without a \`commit\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection" - If all reviews match the current HEAD, do not display any staleness notes +## Plan File Review Report + +After displaying the Review Readiness Dashboard in conversation output, also update the +**plan file** itself so review status is visible to anyone reading the plan. + +### Detect the plan file + +1. Check if there is an active plan file in this conversation (the host provides plan file + paths in system messages — look for plan file references in the conversation context). +2. If not found, skip this section silently — not every review runs in plan mode. + +### Generate the report + +Read the review log output you already have from the Review Readiness Dashboard step above. +Parse each JSONL entry. Each skill logs different fields: + +- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} proposals, {M} accepted, {K} deferred" (from your Completion Summary) +- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} issues, {M} critical gaps, mode: {MODE}" +- **plan-design-review**: \`status\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` + → Findings: "score: {N}/10 → {M}/10, {K} decisions made" +- **codex-review**: \`status\`, \`gate\`, \`findings\` + → Findings: "{N} findings, {M}/{N} fixed" + +For the review you just completed, use details from your own Completion Summary (richer +than the JSONL). For prior reviews, use the JSONL fields to reconstruct a summary. + +Produce this markdown table: + +\`\`\`markdown +## GSTACK REVIEW REPORT + +| Review | Trigger | Why | Runs | Status | Findings | +|--------|---------|-----|------|--------|----------| +| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | +| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | +| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | +| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | +\`\`\` + +Below the table, add these lines (omit any that are empty/not applicable): + +- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes +- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis +- **UNRESOLVED:** total unresolved decisions across all reviews +- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). + If Eng Review is not CLEAR and not skipped globally, append "eng review required". + +### Write to the plan file + +**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one +file you are allowed to edit in plan mode. The plan file review report is part of the +plan's living status. + +- If a \`## GSTACK REVIEW REPORT\` section already exists at the end of the plan file, + **replace it** entirely using the Edit tool (match from \`## GSTACK REVIEW REPORT\` to + the end of the file). If the Edit fails (e.g., concurrent edit changed the content), + re-read the plan file and retry once with the updated content. +- If no such section exists, **append it** to the end of the plan file. +- Always place it as the very last section in the plan file. + ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, recommend the next review(s) based on what this design review discovered. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/plan-design-review/SKILL.md.tmpl b/plan-design-review/SKILL.md.tmpl index 597ff6a7..96ddaa29 100644 --- a/plan-design-review/SKILL.md.tmpl +++ b/plan-design-review/SKILL.md.tmpl @@ -288,6 +288,8 @@ Substitute values from the Completion Summary: {{REVIEW_DASHBOARD}} +{{PLAN_FILE_REVIEW_REPORT}} + ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, recommend the next review(s) based on what this design review discovered. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md index 63fda40b..ce62e4eb 100644 --- a/plan-eng-review/SKILL.md +++ b/plan-eng-review/SKILL.md @@ -540,6 +540,68 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - For entries without a \`commit\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection" - If all reviews match the current HEAD, do not display any staleness notes +## Plan File Review Report + +After displaying the Review Readiness Dashboard in conversation output, also update the +**plan file** itself so review status is visible to anyone reading the plan. + +### Detect the plan file + +1. Check if there is an active plan file in this conversation (the host provides plan file + paths in system messages — look for plan file references in the conversation context). +2. If not found, skip this section silently — not every review runs in plan mode. + +### Generate the report + +Read the review log output you already have from the Review Readiness Dashboard step above. +Parse each JSONL entry. Each skill logs different fields: + +- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} proposals, {M} accepted, {K} deferred" (from your Completion Summary) +- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`commit\` + → Findings: "{N} issues, {M} critical gaps, mode: {MODE}" +- **plan-design-review**: \`status\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` + → Findings: "score: {N}/10 → {M}/10, {K} decisions made" +- **codex-review**: \`status\`, \`gate\`, \`findings\` + → Findings: "{N} findings, {M}/{N} fixed" + +For the review you just completed, use details from your own Completion Summary (richer +than the JSONL). For prior reviews, use the JSONL fields to reconstruct a summary. + +Produce this markdown table: + +\`\`\`markdown +## GSTACK REVIEW REPORT + +| Review | Trigger | Why | Runs | Status | Findings | +|--------|---------|-----|------|--------|----------| +| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | +| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | +| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | +| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | +\`\`\` + +Below the table, add these lines (omit any that are empty/not applicable): + +- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes +- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis +- **UNRESOLVED:** total unresolved decisions across all reviews +- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). + If Eng Review is not CLEAR and not skipped globally, append "eng review required". + +### Write to the plan file + +**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one +file you are allowed to edit in plan mode. The plan file review report is part of the +plan's living status. + +- If a \`## GSTACK REVIEW REPORT\` section already exists at the end of the plan file, + **replace it** entirely using the Edit tool (match from \`## GSTACK REVIEW REPORT\` to + the end of the file). If the Edit fails (e.g., concurrent edit changed the content), + re-read the plan file and retry once with the updated content. +- If no such section exists, **append it** to the end of the plan file. +- Always place it as the very last section in the plan file. + ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, check if additional reviews would be valuable. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/plan-eng-review/SKILL.md.tmpl b/plan-eng-review/SKILL.md.tmpl index 09782a9d..be75341b 100644 --- a/plan-eng-review/SKILL.md.tmpl +++ b/plan-eng-review/SKILL.md.tmpl @@ -278,6 +278,8 @@ Substitute values from the Completion Summary: {{REVIEW_DASHBOARD}} +{{PLAN_FILE_REVIEW_REPORT}} + ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, check if additional reviews would be valuable. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts index 8bb16bf9..d102dfe1 100644 --- a/scripts/gen-skill-docs.ts +++ b/scripts/gen-skill-docs.ts @@ -1107,6 +1107,70 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - If all reviews match the current HEAD, do not display any staleness notes`; } +function generatePlanFileReviewReport(_ctx: TemplateContext): string { + return `## Plan File Review Report + +After displaying the Review Readiness Dashboard in conversation output, also update the +**plan file** itself so review status is visible to anyone reading the plan. + +### Detect the plan file + +1. Check if there is an active plan file in this conversation (the host provides plan file + paths in system messages — look for plan file references in the conversation context). +2. If not found, skip this section silently — not every review runs in plan mode. + +### Generate the report + +Read the review log output you already have from the Review Readiness Dashboard step above. +Parse each JSONL entry. Each skill logs different fields: + +- **plan-ceo-review**: \\\`status\\\`, \\\`unresolved\\\`, \\\`critical_gaps\\\`, \\\`mode\\\`, \\\`commit\\\` + → Findings: "{N} proposals, {M} accepted, {K} deferred" (from your Completion Summary) +- **plan-eng-review**: \\\`status\\\`, \\\`unresolved\\\`, \\\`critical_gaps\\\`, \\\`mode\\\`, \\\`commit\\\` + → Findings: "{N} issues, {M} critical gaps, mode: {MODE}" +- **plan-design-review**: \\\`status\\\`, \\\`overall_score\\\`, \\\`unresolved\\\`, \\\`decisions_made\\\`, \\\`commit\\\` + → Findings: "score: {N}/10 → {M}/10, {K} decisions made" +- **codex-review**: \\\`status\\\`, \\\`gate\\\`, \\\`findings\\\` + → Findings: "{N} findings, {M}/{N} fixed" + +For the review you just completed, use details from your own Completion Summary (richer +than the JSONL). For prior reviews, use the JSONL fields to reconstruct a summary. + +Produce this markdown table: + +\\\`\\\`\\\`markdown +## GSTACK REVIEW REPORT + +| Review | Trigger | Why | Runs | Status | Findings | +|--------|---------|-----|------|--------|----------| +| CEO Review | \\\`/plan-ceo-review\\\` | Scope & strategy | {runs} | {status} | {findings} | +| Codex Review | \\\`/codex review\\\` | Independent 2nd opinion | {runs} | {status} | {findings} | +| Eng Review | \\\`/plan-eng-review\\\` | Architecture & tests (required) | {runs} | {status} | {findings} | +| Design Review | \\\`/plan-design-review\\\` | UI/UX gaps | {runs} | {status} | {findings} | +\\\`\\\`\\\` + +Below the table, add these lines (omit any that are empty/not applicable): + +- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes +- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis +- **UNRESOLVED:** total unresolved decisions across all reviews +- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). + If Eng Review is not CLEAR and not skipped globally, append "eng review required". + +### Write to the plan file + +**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one +file you are allowed to edit in plan mode. The plan file review report is part of the +plan's living status. + +- If a \\\`## GSTACK REVIEW REPORT\\\` section already exists at the end of the plan file, + **replace it** entirely using the Edit tool (match from \\\`## GSTACK REVIEW REPORT\\\` to + the end of the file). If the Edit fails (e.g., concurrent edit changed the content), + re-read the plan file and retry once with the updated content. +- If no such section exists, **append it** to the end of the plan file. +- Always place it as the very last section in the plan file.`; +} + function generateTestBootstrap(_ctx: TemplateContext): string { return `## Test Framework Bootstrap @@ -1555,6 +1619,7 @@ const RESOLVERS: Record string> = { DESIGN_METHODOLOGY: generateDesignMethodology, DESIGN_REVIEW_LITE: generateDesignReviewLite, REVIEW_DASHBOARD: generateReviewDashboard, + PLAN_FILE_REVIEW_REPORT: generatePlanFileReviewReport, TEST_BOOTSTRAP: generateTestBootstrap, SPEC_REVIEW_LOOP: generateSpecReviewLoop, DESIGN_SKETCH: generateDesignSketch, diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 64b39118..cc75da65 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -416,6 +416,30 @@ describe('REVIEW_DASHBOARD resolver', () => { }); }); +// --- {{PLAN_FILE_REVIEW_REPORT}} resolver tests --- + +describe('PLAN_FILE_REVIEW_REPORT resolver', () => { + const REVIEW_SKILLS = ['plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'codex']; + + for (const skill of REVIEW_SKILLS) { + test(`plan file review report appears in ${skill} generated file`, () => { + const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8'); + expect(content).toContain('GSTACK REVIEW REPORT'); + }); + } + + test('resolver output contains key report elements', () => { + const content = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8'); + expect(content).toContain('Trigger'); + expect(content).toContain('Findings'); + expect(content).toContain('VERDICT'); + expect(content).toContain('/plan-ceo-review'); + expect(content).toContain('/plan-eng-review'); + expect(content).toContain('/plan-design-review'); + expect(content).toContain('/codex review'); + }); +}); + // --- {{SPEC_REVIEW_LOOP}} resolver tests --- describe('SPEC_REVIEW_LOOP resolver', () => {