From e4313440786833c47b7df3a6c1db43636c4835f8 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Thu, 26 Mar 2026 22:27:02 -0600 Subject: [PATCH] =?UTF-8?q?fix:=20address=20adversarial=20review=20finding?= =?UTF-8?q?s=20=E2=80=94=20codex=20review=20cwd,=20test=20scope,=20fail-lo?= =?UTF-8?q?ud?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. codex review commands now cd to $_REPO_ROOT (review doesn't support -C) 2. Autoplan codex commands converted from prose "Prerequisite" to fenced bash blocks 3. || pwd fallback replaced with hard fail — silent wrong-dir is worse than error 4. Regression test now scans all resolver .ts files + generated SKILL.md files Co-Authored-By: Claude Opus 4.6 (1M context) --- autoplan/SKILL.md | 24 ++++++++++++++--------- autoplan/SKILL.md.tmpl | 24 ++++++++++++++--------- codex/SKILL.md | 10 +++++++--- codex/SKILL.md.tmpl | 10 +++++++--- design-consultation/SKILL.md | 2 +- design-review/SKILL.md | 2 +- office-hours/SKILL.md | 4 ++-- plan-ceo-review/SKILL.md | 2 +- plan-design-review/SKILL.md | 2 +- plan-eng-review/SKILL.md | 2 +- review/SKILL.md | 6 ++++-- scripts/resolvers/design.ts | 6 +++--- scripts/resolvers/review.ts | 8 +++++--- ship/SKILL.md | 6 ++++-- test/gen-skill-docs.test.ts | 38 ++++++++++++++++++++++++++++++------ 15 files changed, 99 insertions(+), 47 deletions(-) diff --git a/autoplan/SKILL.md b/autoplan/SKILL.md index a608d00f..5f8b5013 100644 --- a/autoplan/SKILL.md +++ b/autoplan/SKILL.md @@ -587,14 +587,16 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. Run them simultaneously (Agent tool for subagent, Bash for Codex). **Codex CEO voice** (via Bash): - Command: `codex exec "You are a CEO/founder advisor reviewing a development plan. + ```bash + _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } + codex exec "You are a CEO/founder advisor reviewing a development plan. Challenge the strategic foundations: Are the premises valid or assumed? Is this the right problem to solve, or is there a reframing that would be 10x more impactful? What alternatives were dismissed too quickly? What competitive or market risks are unaddressed? What scope decisions will look foolish in 6 months? Be adversarial. No compliments. Just the strategic blind spots. - File: " -C "$_REPO_ROOT" -s read-only --enable web_search_cached` - Prerequisite: resolve `_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd)` at the start of the same bash call. + File: " -C "$_REPO_ROOT" -s read-only --enable web_search_cached + ``` Timeout: 10 minutes **Claude CEO subagent** (via Agent tool): @@ -693,7 +695,9 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. - Dual voices: always run BOTH Claude subagent AND Codex if available (P6). **Codex design voice** (via Bash): - Command: `codex exec "Read the plan file at . Evaluate this plan's + ```bash + _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } + codex exec "Read the plan file at . Evaluate this plan's UI/UX design decisions. Also consider these findings from the CEO review phase: @@ -705,8 +709,8 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. accessibility requirements (keyboard nav, contrast, touch targets) specified or aspirational? Does the plan describe specific UI decisions or generic patterns? What design decisions will haunt the implementer if left ambiguous? - Be opinionated. No hedging." -C "$_REPO_ROOT" -s read-only --enable web_search_cached` - Prerequisite: resolve `_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd)` at the start of the same bash call. + Be opinionated. No hedging." -C "$_REPO_ROOT" -s read-only --enable web_search_cached + ``` Timeout: 10 minutes **Claude design subagent** (via Agent tool): @@ -764,15 +768,17 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. - Dual voices: always run BOTH Claude subagent AND Codex if available (P6). **Codex eng voice** (via Bash): - Command: `codex exec "Review this plan for architectural issues, missing edge cases, + ```bash + _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } + codex exec "Review this plan for architectural issues, missing edge cases, and hidden complexity. Be adversarial. Also consider these findings from prior review phases: CEO: Design: - File: " -C "$_REPO_ROOT" -s read-only --enable web_search_cached` - Prerequisite: resolve `_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd)` at the start of the same bash call. + File: " -C "$_REPO_ROOT" -s read-only --enable web_search_cached + ``` Timeout: 10 minutes **Claude eng subagent** (via Agent tool): diff --git a/autoplan/SKILL.md.tmpl b/autoplan/SKILL.md.tmpl index 7283396a..3fb94483 100644 --- a/autoplan/SKILL.md.tmpl +++ b/autoplan/SKILL.md.tmpl @@ -198,14 +198,16 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. Run them simultaneously (Agent tool for subagent, Bash for Codex). **Codex CEO voice** (via Bash): - Command: `codex exec "You are a CEO/founder advisor reviewing a development plan. + ```bash + _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } + codex exec "You are a CEO/founder advisor reviewing a development plan. Challenge the strategic foundations: Are the premises valid or assumed? Is this the right problem to solve, or is there a reframing that would be 10x more impactful? What alternatives were dismissed too quickly? What competitive or market risks are unaddressed? What scope decisions will look foolish in 6 months? Be adversarial. No compliments. Just the strategic blind spots. - File: " -C "$_REPO_ROOT" -s read-only --enable web_search_cached` - Prerequisite: resolve `_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd)` at the start of the same bash call. + File: " -C "$_REPO_ROOT" -s read-only --enable web_search_cached + ``` Timeout: 10 minutes **Claude CEO subagent** (via Agent tool): @@ -304,7 +306,9 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. - Dual voices: always run BOTH Claude subagent AND Codex if available (P6). **Codex design voice** (via Bash): - Command: `codex exec "Read the plan file at . Evaluate this plan's + ```bash + _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } + codex exec "Read the plan file at . Evaluate this plan's UI/UX design decisions. Also consider these findings from the CEO review phase: @@ -316,8 +320,8 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. accessibility requirements (keyboard nav, contrast, touch targets) specified or aspirational? Does the plan describe specific UI decisions or generic patterns? What design decisions will haunt the implementer if left ambiguous? - Be opinionated. No hedging." -C "$_REPO_ROOT" -s read-only --enable web_search_cached` - Prerequisite: resolve `_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd)` at the start of the same bash call. + Be opinionated. No hedging." -C "$_REPO_ROOT" -s read-only --enable web_search_cached + ``` Timeout: 10 minutes **Claude design subagent** (via Agent tool): @@ -375,15 +379,17 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. - Dual voices: always run BOTH Claude subagent AND Codex if available (P6). **Codex eng voice** (via Bash): - Command: `codex exec "Review this plan for architectural issues, missing edge cases, + ```bash + _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } + codex exec "Review this plan for architectural issues, missing edge cases, and hidden complexity. Be adversarial. Also consider these findings from prior review phases: CEO: Design: - File: " -C "$_REPO_ROOT" -s read-only --enable web_search_cached` - Prerequisite: resolve `_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd)` at the start of the same bash call. + File: " -C "$_REPO_ROOT" -s read-only --enable web_search_cached + ``` Timeout: 10 minutes **Claude eng subagent** (via Agent tool): diff --git a/codex/SKILL.md b/codex/SKILL.md index 88405fab..47128037 100644 --- a/codex/SKILL.md +++ b/codex/SKILL.md @@ -428,6 +428,8 @@ TMPERR=$(mktemp /tmp/codex-err-XXXXXX.txt) 2. Run the review (5-minute timeout): ```bash +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } +cd "$_REPO_ROOT" codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" ``` @@ -436,6 +438,8 @@ If the user passed `--xhigh`, use `"xhigh"` instead of `"high"`. Use `timeout: 300000` on the Bash call. If the user provided custom instructions (e.g., `/codex review focus on security`), pass them as the prompt argument: ```bash +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } +cd "$_REPO_ROOT" codex review "focus on security" --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" ``` @@ -577,7 +581,7 @@ With focus (e.g., "security"): If the user passed `--xhigh`, use `"xhigh"` instead of `"high"`. ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached --json 2>/dev/null | PYTHONUNBUFFERED=1 python3 -u -c " import sys, json for line in sys.stdin: @@ -677,7 +681,7 @@ If the user passed `--xhigh`, use `"xhigh"` instead of `"medium"`. For a **new session:** ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached --json 2>"$TMPERR" | PYTHONUNBUFFERED=1 python3 -u -c " import sys, json for line in sys.stdin: @@ -711,7 +715,7 @@ for line in sys.stdin: For a **resumed session** (user chose "Continue"): ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec resume "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached --json 2>"$TMPERR" | PYTHONUNBUFFERED=1 python3 -u -c " " diff --git a/codex/SKILL.md.tmpl b/codex/SKILL.md.tmpl index 7111b9cd..60247abd 100644 --- a/codex/SKILL.md.tmpl +++ b/codex/SKILL.md.tmpl @@ -88,6 +88,8 @@ TMPERR=$(mktemp /tmp/codex-err-XXXXXX.txt) 2. Run the review (5-minute timeout): ```bash +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } +cd "$_REPO_ROOT" codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" ``` @@ -96,6 +98,8 @@ If the user passed `--xhigh`, use `"xhigh"` instead of `"high"`. Use `timeout: 300000` on the Bash call. If the user provided custom instructions (e.g., `/codex review focus on security`), pass them as the prompt argument: ```bash +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } +cd "$_REPO_ROOT" codex review "focus on security" --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" ``` @@ -172,7 +176,7 @@ With focus (e.g., "security"): If the user passed `--xhigh`, use `"xhigh"` instead of `"high"`. ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached --json 2>/dev/null | PYTHONUNBUFFERED=1 python3 -u -c " import sys, json for line in sys.stdin: @@ -272,7 +276,7 @@ If the user passed `--xhigh`, use `"xhigh"` instead of `"medium"`. For a **new session:** ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached --json 2>"$TMPERR" | PYTHONUNBUFFERED=1 python3 -u -c " import sys, json for line in sys.stdin: @@ -306,7 +310,7 @@ for line in sys.stdin: For a **resumed session** (user chose "Continue"): ```bash -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec resume "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached --json 2>"$TMPERR" | PYTHONUNBUFFERED=1 python3 -u -c " " diff --git a/design-consultation/SKILL.md b/design-consultation/SKILL.md index 3273d90a..52cef88a 100644 --- a/design-consultation/SKILL.md +++ b/design-consultation/SKILL.md @@ -472,7 +472,7 @@ which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" 1. **Codex design voice** (via Bash): ```bash TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "Given this product context, propose a complete design direction: - Visual thesis: one sentence describing mood, material, and energy - Typography: specific font names (not defaults — no Inter/Roboto/Arial/system) + hex colors diff --git a/design-review/SKILL.md b/design-review/SKILL.md index 395bdd33..2f64917c 100644 --- a/design-review/SKILL.md +++ b/design-review/SKILL.md @@ -997,7 +997,7 @@ which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" 1. **Codex design voice** (via Bash): ```bash TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "Review the frontend source code in this repo. Evaluate against these design hard rules: - Spacing: systematic (design tokens / CSS variables) or magic numbers? - Typography: expressive purposeful fonts or default stacks? diff --git a/office-hours/SKILL.md b/office-hours/SKILL.md index a84fcf8a..bbee02fe 100644 --- a/office-hours/SKILL.md +++ b/office-hours/SKILL.md @@ -714,7 +714,7 @@ Write the full prompt (context block + instructions) to this file. Use the mode- ```bash TMPERR_OH=$(mktemp /tmp/codex-oh-err-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "$(cat "$CODEX_PROMPT_FILE")" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_OH" ``` @@ -866,7 +866,7 @@ If user chooses A, launch both voices simultaneously: 1. **Codex** (via Bash, `model_reasoning_effort="medium"`): ```bash TMPERR_SKETCH=$(mktemp /tmp/codex-sketch-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH" ``` Use a 5-minute timeout (`timeout: 300000`). After completion: `cat "$TMPERR_SKETCH" && rm -f "$TMPERR_SKETCH"` diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md index 6059d05e..675487a2 100644 --- a/plan-ceo-review/SKILL.md +++ b/plan-ceo-review/SKILL.md @@ -1091,7 +1091,7 @@ THE PLAN: ```bash TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_PV" ``` diff --git a/plan-design-review/SKILL.md b/plan-design-review/SKILL.md index de83956d..31389bbc 100644 --- a/plan-design-review/SKILL.md +++ b/plan-design-review/SKILL.md @@ -489,7 +489,7 @@ which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" 1. **Codex design voice** (via Bash): ```bash TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "Read the plan file at [plan-file-path]. Evaluate this plan's UI/UX design against these criteria. HARD REJECTION — flag if ANY apply: diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md index f1f999da..41a29f2b 100644 --- a/plan-eng-review/SKILL.md +++ b/plan-eng-review/SKILL.md @@ -749,7 +749,7 @@ THE PLAN: ```bash TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_PV" ``` diff --git a/review/SKILL.md b/review/SKILL.md index 46c2de19..05df971d 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -621,7 +621,7 @@ If Codex is available, run a lightweight design check on the diff: ```bash TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" ``` @@ -980,7 +980,7 @@ Claude's structured review already ran. Now add a **cross-model adversarial chal ```bash TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_ADV" ``` @@ -1026,6 +1026,8 @@ Claude's structured review already ran. Now run **all three remaining passes** f **1. Codex structured review (if available):** ```bash TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } +cd "$_REPO_ROOT" codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" ``` diff --git a/scripts/resolvers/design.ts b/scripts/resolvers/design.ts index abd8fbb7..a59f516f 100644 --- a/scripts/resolvers/design.ts +++ b/scripts/resolvers/design.ts @@ -17,7 +17,7 @@ If Codex is available, run a lightweight design check on the diff: \`\`\`bash TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): ${litmusList} Flag any hard rejections: ${rejectionList} 5 most important design findings only. Reference file:line." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" \`\`\` @@ -468,7 +468,7 @@ If user chooses A, launch both voices simultaneously: 1. **Codex** (via Bash, \`model_reasoning_effort="medium"\`): \`\`\`bash TMPERR_SKETCH=$(mktemp /tmp/codex-sketch-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH" \`\`\` Use a 5-minute timeout (\`timeout: 300000\`). After completion: \`cat "$TMPERR_SKETCH" && rm -f "$TMPERR_SKETCH"\` @@ -638,7 +638,7 @@ which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" 1. **Codex design voice** (via Bash): \`\`\`bash TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "${escapedCodexPrompt}" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="${reasoningEffort}"' --enable web_search_cached 2>"$TMPERR_DESIGN" \`\`\` Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr: diff --git a/scripts/resolvers/review.ts b/scripts/resolvers/review.ts index 8845b239..a4963b13 100644 --- a/scripts/resolvers/review.ts +++ b/scripts/resolvers/review.ts @@ -292,7 +292,7 @@ Write the full prompt (context block + instructions) to this file. Use the mode- \`\`\`bash TMPERR_OH=$(mktemp /tmp/codex-oh-err-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "$(cat "$CODEX_PROMPT_FILE")" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_OH" \`\`\` @@ -377,7 +377,7 @@ Claude's structured review already ran. Now add a **cross-model adversarial chal \`\`\`bash TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_ADV" \`\`\` @@ -423,6 +423,8 @@ Claude's structured review already ran. Now run **all three remaining passes** f **1. Codex structured review (if available):** \`\`\`bash TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } +cd "$_REPO_ROOT" codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" \`\`\` @@ -533,7 +535,7 @@ THE PLAN: \`\`\`bash TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_PV" \`\`\` diff --git a/ship/SKILL.md b/ship/SKILL.md index d7895c3f..6192c50b 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -1347,7 +1347,7 @@ If Codex is available, run a lightweight design check on the diff: ```bash TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" ``` @@ -1470,7 +1470,7 @@ Claude's structured review already ran. Now add a **cross-model adversarial chal ```bash TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) -_REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_ADV" ``` @@ -1516,6 +1516,8 @@ Claude's structured review already ran. Now run **all three remaining passes** f **1. Codex structured review (if available):** ```bash TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) +_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } +cd "$_REPO_ROOT" codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" ``` diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 55ee4c5f..71dc80d6 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -1648,17 +1648,20 @@ describe('telemetry', () => { }); }); -describe('codex exec -C must use $_REPO_ROOT, not inline $(git rev-parse)', () => { +describe('codex commands must not use inline $(git rev-parse --show-toplevel) for cwd', () => { // Regression test: inline $(git rev-parse --show-toplevel) in codex exec -C - // evaluates in whatever cwd the background shell inherits, which may be a - // different project in Conductor workspaces. The fix is to resolve _REPO_ROOT - // eagerly at the top of each bash block. + // or codex review without cd evaluates in whatever cwd the background shell + // inherits, which may be a different project in Conductor workspaces. + // The fix is to resolve _REPO_ROOT eagerly at the top of each bash block. + // Scan all source files that could contain codex commands const sourceFiles = [ ...fs.readdirSync(ROOT, { recursive: true }) .filter((f): f is string => typeof f === 'string' && f.endsWith('.tmpl') && !f.includes('node_modules')), - 'scripts/resolvers/review.ts', - 'scripts/resolvers/design.ts', + ...fs.readdirSync(path.join(ROOT, 'scripts/resolvers')) + .filter(f => f.endsWith('.ts')) + .map(f => `scripts/resolvers/${f}`), + 'scripts/gen-skill-docs.ts', ]; test('no codex exec command uses inline $(git rev-parse --show-toplevel) in -C flag', () => { @@ -1677,4 +1680,27 @@ describe('codex exec -C must use $_REPO_ROOT, not inline $(git rev-parse)', () = } expect(violations).toEqual([]); }); + + test('no generated SKILL.md has codex exec with inline $(git rev-parse --show-toplevel) in -C flag', () => { + const violations: string[] = []; + const skillMdFiles = fs.readdirSync(ROOT, { recursive: true }) + .filter((f): f is string => + typeof f === 'string' && + f.endsWith('SKILL.md') && + !f.includes('node_modules') && + !f.includes('.tmpl')); + for (const rel of skillMdFiles) { + const abs = path.join(ROOT, rel); + if (!fs.existsSync(abs)) continue; + const content = fs.readFileSync(abs, 'utf-8'); + const lines = content.split('\n'); + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (line.includes('codex exec') && line.includes('-C') && line.includes('$(git rev-parse --show-toplevel)')) { + violations.push(`${rel}:${i + 1}`); + } + } + } + expect(violations).toEqual([]); + }); });