diff --git a/SKILL.md b/SKILL.md index dada1e75..f6d2831e 100644 --- a/SKILL.md +++ b/SKILL.md @@ -297,6 +297,28 @@ If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during Only run skills the user explicitly invokes. This preference persists across sessions via `gstack-config`. +If `PROACTIVE` is `true` (default): suggest adjacent gstack skills when relevant to the +user's workflow stage: +- Brainstorming → /office-hours +- Strategy → /plan-ceo-review +- Architecture → /plan-eng-review +- Design → /plan-design-review or /design-consultation +- Auto-review → /autoplan +- Debugging → /investigate +- QA → /qa +- Code review → /review +- Visual audit → /design-review +- Shipping → /ship +- Docs → /document-release +- Retro → /retro +- Second opinion → /codex +- Prod safety → /careful or /guard +- Scoped edits → /freeze or /unfreeze +- Upgrades → /gstack-upgrade + +If the user opts out of suggestions, run `gstack-config set proactive false`. +If they opt back in, run `gstack-config set proactive true`. + # gstack browse: QA Testing & Dogfooding Persistent headless Chromium. First call auto-starts (~3s), then ~100-200ms per command. diff --git a/SKILL.md.tmpl b/SKILL.md.tmpl index fca8fa60..31bd2837 100644 --- a/SKILL.md.tmpl +++ b/SKILL.md.tmpl @@ -20,6 +20,28 @@ If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during Only run skills the user explicitly invokes. This preference persists across sessions via `gstack-config`. +If `PROACTIVE` is `true` (default): suggest adjacent gstack skills when relevant to the +user's workflow stage: +- Brainstorming → /office-hours +- Strategy → /plan-ceo-review +- Architecture → /plan-eng-review +- Design → /plan-design-review or /design-consultation +- Auto-review → /autoplan +- Debugging → /investigate +- QA → /qa +- Code review → /review +- Visual audit → /design-review +- Shipping → /ship +- Docs → /document-release +- Retro → /retro +- Second opinion → /codex +- Prod safety → /careful or /guard +- Scoped edits → /freeze or /unfreeze +- Upgrades → /gstack-upgrade + +If the user opts out of suggestions, run `gstack-config set proactive false`. +If they opt back in, run `gstack-config set proactive true`. + # gstack browse: QA Testing & Dogfooding Persistent headless Chromium. First call auto-starts (~3s), then ~100-200ms per command. diff --git a/autoplan/SKILL.md b/autoplan/SKILL.md index e9161eab..14874900 100644 --- a/autoplan/SKILL.md +++ b/autoplan/SKILL.md @@ -547,7 +547,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. What alternatives were dismissed too quickly? What competitive or market risks are unaddressed? What scope decisions will look foolish in 6 months? Be adversarial. No compliments. Just the strategic blind spots. - File: " -s read-only --enable web_search_cached` + File: " -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached` Timeout: 10 minutes **Claude CEO subagent** (via Agent tool): @@ -658,7 +658,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. accessibility requirements (keyboard nav, contrast, touch targets) specified or aspirational? Does the plan describe specific UI decisions or generic patterns? What design decisions will haunt the implementer if left ambiguous? - Be opinionated. No hedging." -s read-only --enable web_search_cached` + Be opinionated. No hedging." -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached` Timeout: 10 minutes **Claude design subagent** (via Agent tool): @@ -723,7 +723,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. CEO: Design: - File: " -s read-only --enable web_search_cached` + File: " -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached` Timeout: 10 minutes **Claude eng subagent** (via Agent tool): diff --git a/autoplan/SKILL.md.tmpl b/autoplan/SKILL.md.tmpl index b3e0a340..661e8fb0 100644 --- a/autoplan/SKILL.md.tmpl +++ b/autoplan/SKILL.md.tmpl @@ -204,7 +204,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. What alternatives were dismissed too quickly? What competitive or market risks are unaddressed? What scope decisions will look foolish in 6 months? Be adversarial. No compliments. Just the strategic blind spots. - File: " -s read-only --enable web_search_cached` + File: " -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached` Timeout: 10 minutes **Claude CEO subagent** (via Agent tool): @@ -315,7 +315,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. accessibility requirements (keyboard nav, contrast, touch targets) specified or aspirational? Does the plan describe specific UI decisions or generic patterns? What design decisions will haunt the implementer if left ambiguous? - Be opinionated. No hedging." -s read-only --enable web_search_cached` + Be opinionated. No hedging." -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached` Timeout: 10 minutes **Claude design subagent** (via Agent tool): @@ -380,7 +380,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles. CEO: Design: - File: " -s read-only --enable web_search_cached` + File: " -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached` Timeout: 10 minutes **Claude eng subagent** (via Agent tool): diff --git a/codex/SKILL.md b/codex/SKILL.md index f34b8db4..8bce22e5 100644 --- a/codex/SKILL.md +++ b/codex/SKILL.md @@ -518,7 +518,7 @@ With focus (e.g., "security"): 2. Run codex exec with **JSONL output** to capture reasoning traces and tool calls (5-minute timeout): ```bash -codex exec "" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>/dev/null | python3 -c " +codex exec "" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>/dev/null | python3 -c " import sys, json for line in sys.stdin: line = line.strip() @@ -603,7 +603,7 @@ THE PLAN: For a **new session:** ```bash -codex exec "" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c " +codex exec "" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c " import sys, json for line in sys.stdin: line = line.strip() @@ -636,7 +636,7 @@ for line in sys.stdin: For a **resumed session** (user chose "Continue"): ```bash -codex exec resume "" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c " +codex exec resume "" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c " " ``` diff --git a/codex/SKILL.md.tmpl b/codex/SKILL.md.tmpl index c0b7adb1..338df93b 100644 --- a/codex/SKILL.md.tmpl +++ b/codex/SKILL.md.tmpl @@ -159,7 +159,7 @@ With focus (e.g., "security"): 2. Run codex exec with **JSONL output** to capture reasoning traces and tool calls (5-minute timeout): ```bash -codex exec "" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>/dev/null | python3 -c " +codex exec "" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>/dev/null | python3 -c " import sys, json for line in sys.stdin: line = line.strip() @@ -244,7 +244,7 @@ THE PLAN: For a **new session:** ```bash -codex exec "" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c " +codex exec "" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c " import sys, json for line in sys.stdin: line = line.strip() @@ -277,7 +277,7 @@ for line in sys.stdin: For a **resumed session** (user chose "Continue"): ```bash -codex exec resume "" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c " +codex exec resume "" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c " " ``` diff --git a/scripts/resolvers/design.ts b/scripts/resolvers/design.ts index 30b1fe2c..c4926112 100644 --- a/scripts/resolvers/design.ts +++ b/scripts/resolvers/design.ts @@ -17,7 +17,7 @@ If Codex is available, run a lightweight design check on the diff: \`\`\`bash TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX) -codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): ${litmusList} Flag any hard rejections: ${rejectionList} 5 most important design findings only. Reference file:line." -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" +codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): ${litmusList} Flag any hard rejections: ${rejectionList} 5 most important design findings only. Reference file:line." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" \`\`\` Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr: @@ -467,7 +467,7 @@ If user chooses A, launch both voices simultaneously: 1. **Codex** (via Bash, \`model_reasoning_effort="medium"\`): \`\`\`bash TMPERR_SKETCH=$(mktemp /tmp/codex-sketch-XXXXXXXX) -codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH" +codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH" \`\`\` Use a 5-minute timeout (\`timeout: 300000\`). After completion: \`cat "$TMPERR_SKETCH" && rm -f "$TMPERR_SKETCH"\` @@ -636,7 +636,7 @@ which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" 1. **Codex design voice** (via Bash): \`\`\`bash TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX) -codex exec "${escapedCodexPrompt}" -s read-only -c 'model_reasoning_effort="${reasoningEffort}"' --enable web_search_cached 2>"$TMPERR_DESIGN" +codex exec "${escapedCodexPrompt}" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="${reasoningEffort}"' --enable web_search_cached 2>"$TMPERR_DESIGN" \`\`\` Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr: \`\`\`bash diff --git a/scripts/resolvers/review.ts b/scripts/resolvers/review.ts index 1831e098..2b83f36d 100644 --- a/scripts/resolvers/review.ts +++ b/scripts/resolvers/review.ts @@ -286,7 +286,7 @@ Write the full prompt (context block + instructions) to this file. Use the mode- \`\`\`bash TMPERR_OH=$(mktemp /tmp/codex-oh-err-XXXXXXXX) -codex exec "$(cat "$CODEX_PROMPT_FILE")" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_OH" +codex exec "$(cat "$CODEX_PROMPT_FILE")" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_OH" \`\`\` Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr: @@ -370,7 +370,7 @@ Claude's structured review already ran. Now add a **cross-model adversarial chal \`\`\`bash TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) -codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_ADV" +codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_ADV" \`\`\` Set the Bash tool's \`timeout\` parameter to \`300000\` (5 minutes). Do NOT use the \`timeout\` shell command — it doesn't exist on macOS. After the command completes, read stderr: @@ -525,7 +525,7 @@ THE PLAN: \`\`\`bash TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX) -codex exec "" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_PV" +codex exec "" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_PV" \`\`\` Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr: diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index d8a071a1..c26bb64b 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -152,6 +152,24 @@ describe('gen-skill-docs', () => { } }); + test('every Codex SKILL.md description stays under 900-char warning threshold', () => { + const WARN_THRESHOLD = 900; + const agentsDir = path.join(ROOT, '.agents', 'skills'); + if (!fs.existsSync(agentsDir)) return; + const violations: string[] = []; + for (const entry of fs.readdirSync(agentsDir, { withFileTypes: true })) { + if (!entry.isDirectory()) continue; + const skillMd = path.join(agentsDir, entry.name, 'SKILL.md'); + if (!fs.existsSync(skillMd)) continue; + const content = fs.readFileSync(skillMd, 'utf-8'); + const description = extractDescription(content); + if (description.length > WARN_THRESHOLD) { + violations.push(`${entry.name}: ${description.length} chars (limit ${MAX_SKILL_DESCRIPTION_LENGTH}, ${MAX_SKILL_DESCRIPTION_LENGTH - description.length} remaining)`); + } + } + expect(violations).toEqual([]); + }); + test('package.json version matches VERSION file', () => { const pkg = JSON.parse(fs.readFileSync(path.join(ROOT, 'package.json'), 'utf-8')); const version = fs.readFileSync(path.join(ROOT, 'VERSION'), 'utf-8').trim();