mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 03:35:09 +02:00
fix: Codex wrong-repo + routing table to body + 900-char guard (v0.11.19.0)
- Add -C "$(git rev-parse --show-toplevel)" to all 14 codex exec calls so Codex always runs in the correct repo (fixes Conductor multi-workspace bug) - Move skill routing table from description to body in SKILL.md.tmpl (description was already shortened on main; routing table was missing from body) - Add 900-char warning threshold test for Codex descriptions - Bump version + sync package.json Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -297,6 +297,28 @@ If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during
|
||||
Only run skills the user explicitly invokes. This preference persists across sessions via
|
||||
`gstack-config`.
|
||||
|
||||
If `PROACTIVE` is `true` (default): suggest adjacent gstack skills when relevant to the
|
||||
user's workflow stage:
|
||||
- Brainstorming → /office-hours
|
||||
- Strategy → /plan-ceo-review
|
||||
- Architecture → /plan-eng-review
|
||||
- Design → /plan-design-review or /design-consultation
|
||||
- Auto-review → /autoplan
|
||||
- Debugging → /investigate
|
||||
- QA → /qa
|
||||
- Code review → /review
|
||||
- Visual audit → /design-review
|
||||
- Shipping → /ship
|
||||
- Docs → /document-release
|
||||
- Retro → /retro
|
||||
- Second opinion → /codex
|
||||
- Prod safety → /careful or /guard
|
||||
- Scoped edits → /freeze or /unfreeze
|
||||
- Upgrades → /gstack-upgrade
|
||||
|
||||
If the user opts out of suggestions, run `gstack-config set proactive false`.
|
||||
If they opt back in, run `gstack-config set proactive true`.
|
||||
|
||||
# gstack browse: QA Testing & Dogfooding
|
||||
|
||||
Persistent headless Chromium. First call auto-starts (~3s), then ~100-200ms per command.
|
||||
|
||||
@@ -20,6 +20,28 @@ If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during
|
||||
Only run skills the user explicitly invokes. This preference persists across sessions via
|
||||
`gstack-config`.
|
||||
|
||||
If `PROACTIVE` is `true` (default): suggest adjacent gstack skills when relevant to the
|
||||
user's workflow stage:
|
||||
- Brainstorming → /office-hours
|
||||
- Strategy → /plan-ceo-review
|
||||
- Architecture → /plan-eng-review
|
||||
- Design → /plan-design-review or /design-consultation
|
||||
- Auto-review → /autoplan
|
||||
- Debugging → /investigate
|
||||
- QA → /qa
|
||||
- Code review → /review
|
||||
- Visual audit → /design-review
|
||||
- Shipping → /ship
|
||||
- Docs → /document-release
|
||||
- Retro → /retro
|
||||
- Second opinion → /codex
|
||||
- Prod safety → /careful or /guard
|
||||
- Scoped edits → /freeze or /unfreeze
|
||||
- Upgrades → /gstack-upgrade
|
||||
|
||||
If the user opts out of suggestions, run `gstack-config set proactive false`.
|
||||
If they opt back in, run `gstack-config set proactive true`.
|
||||
|
||||
# gstack browse: QA Testing & Dogfooding
|
||||
|
||||
Persistent headless Chromium. First call auto-starts (~3s), then ~100-200ms per command.
|
||||
|
||||
+3
-3
@@ -547,7 +547,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
What alternatives were dismissed too quickly? What competitive or market risks are
|
||||
unaddressed? What scope decisions will look foolish in 6 months? Be adversarial.
|
||||
No compliments. Just the strategic blind spots.
|
||||
File: <plan_path>" -s read-only --enable web_search_cached`
|
||||
File: <plan_path>" -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes
|
||||
|
||||
**Claude CEO subagent** (via Agent tool):
|
||||
@@ -658,7 +658,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
accessibility requirements (keyboard nav, contrast, touch targets) specified or
|
||||
aspirational? Does the plan describe specific UI decisions or generic patterns?
|
||||
What design decisions will haunt the implementer if left ambiguous?
|
||||
Be opinionated. No hedging." -s read-only --enable web_search_cached`
|
||||
Be opinionated. No hedging." -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes
|
||||
|
||||
**Claude design subagent** (via Agent tool):
|
||||
@@ -723,7 +723,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
CEO: <insert CEO consensus table summary — key concerns, DISAGREEs>
|
||||
Design: <insert Design consensus table summary, or 'skipped, no UI scope'>
|
||||
|
||||
File: <plan_path>" -s read-only --enable web_search_cached`
|
||||
File: <plan_path>" -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes
|
||||
|
||||
**Claude eng subagent** (via Agent tool):
|
||||
|
||||
@@ -204,7 +204,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
What alternatives were dismissed too quickly? What competitive or market risks are
|
||||
unaddressed? What scope decisions will look foolish in 6 months? Be adversarial.
|
||||
No compliments. Just the strategic blind spots.
|
||||
File: <plan_path>" -s read-only --enable web_search_cached`
|
||||
File: <plan_path>" -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes
|
||||
|
||||
**Claude CEO subagent** (via Agent tool):
|
||||
@@ -315,7 +315,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
accessibility requirements (keyboard nav, contrast, touch targets) specified or
|
||||
aspirational? Does the plan describe specific UI decisions or generic patterns?
|
||||
What design decisions will haunt the implementer if left ambiguous?
|
||||
Be opinionated. No hedging." -s read-only --enable web_search_cached`
|
||||
Be opinionated. No hedging." -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes
|
||||
|
||||
**Claude design subagent** (via Agent tool):
|
||||
@@ -380,7 +380,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
|
||||
CEO: <insert CEO consensus table summary — key concerns, DISAGREEs>
|
||||
Design: <insert Design consensus table summary, or 'skipped, no UI scope'>
|
||||
|
||||
File: <plan_path>" -s read-only --enable web_search_cached`
|
||||
File: <plan_path>" -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached`
|
||||
Timeout: 10 minutes
|
||||
|
||||
**Claude eng subagent** (via Agent tool):
|
||||
|
||||
+3
-3
@@ -518,7 +518,7 @@ With focus (e.g., "security"):
|
||||
|
||||
2. Run codex exec with **JSONL output** to capture reasoning traces and tool calls (5-minute timeout):
|
||||
```bash
|
||||
codex exec "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>/dev/null | python3 -c "
|
||||
codex exec "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>/dev/null | python3 -c "
|
||||
import sys, json
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
@@ -603,7 +603,7 @@ THE PLAN:
|
||||
|
||||
For a **new session:**
|
||||
```bash
|
||||
codex exec "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
|
||||
codex exec "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
|
||||
import sys, json
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
@@ -636,7 +636,7 @@ for line in sys.stdin:
|
||||
|
||||
For a **resumed session** (user chose "Continue"):
|
||||
```bash
|
||||
codex exec resume <session-id> "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
|
||||
codex exec resume <session-id> "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
|
||||
<same python streaming parser as above>
|
||||
"
|
||||
```
|
||||
|
||||
+3
-3
@@ -159,7 +159,7 @@ With focus (e.g., "security"):
|
||||
|
||||
2. Run codex exec with **JSONL output** to capture reasoning traces and tool calls (5-minute timeout):
|
||||
```bash
|
||||
codex exec "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>/dev/null | python3 -c "
|
||||
codex exec "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>/dev/null | python3 -c "
|
||||
import sys, json
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
@@ -244,7 +244,7 @@ THE PLAN:
|
||||
|
||||
For a **new session:**
|
||||
```bash
|
||||
codex exec "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
|
||||
codex exec "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
|
||||
import sys, json
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
@@ -277,7 +277,7 @@ for line in sys.stdin:
|
||||
|
||||
For a **resumed session** (user chose "Continue"):
|
||||
```bash
|
||||
codex exec resume <session-id> "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
|
||||
codex exec resume <session-id> "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
|
||||
<same python streaming parser as above>
|
||||
"
|
||||
```
|
||||
|
||||
@@ -17,7 +17,7 @@ If Codex is available, run a lightweight design check on the diff:
|
||||
|
||||
\`\`\`bash
|
||||
TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX)
|
||||
codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): ${litmusList} Flag any hard rejections: ${rejectionList} 5 most important design findings only. Reference file:line." -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL"
|
||||
codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): ${litmusList} Flag any hard rejections: ${rejectionList} 5 most important design findings only. Reference file:line." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL"
|
||||
\`\`\`
|
||||
|
||||
Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr:
|
||||
@@ -467,7 +467,7 @@ If user chooses A, launch both voices simultaneously:
|
||||
1. **Codex** (via Bash, \`model_reasoning_effort="medium"\`):
|
||||
\`\`\`bash
|
||||
TMPERR_SKETCH=$(mktemp /tmp/codex-sketch-XXXXXXXX)
|
||||
codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH"
|
||||
codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH"
|
||||
\`\`\`
|
||||
Use a 5-minute timeout (\`timeout: 300000\`). After completion: \`cat "$TMPERR_SKETCH" && rm -f "$TMPERR_SKETCH"\`
|
||||
|
||||
@@ -636,7 +636,7 @@ which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
|
||||
1. **Codex design voice** (via Bash):
|
||||
\`\`\`bash
|
||||
TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX)
|
||||
codex exec "${escapedCodexPrompt}" -s read-only -c 'model_reasoning_effort="${reasoningEffort}"' --enable web_search_cached 2>"$TMPERR_DESIGN"
|
||||
codex exec "${escapedCodexPrompt}" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="${reasoningEffort}"' --enable web_search_cached 2>"$TMPERR_DESIGN"
|
||||
\`\`\`
|
||||
Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr:
|
||||
\`\`\`bash
|
||||
|
||||
@@ -286,7 +286,7 @@ Write the full prompt (context block + instructions) to this file. Use the mode-
|
||||
|
||||
\`\`\`bash
|
||||
TMPERR_OH=$(mktemp /tmp/codex-oh-err-XXXXXXXX)
|
||||
codex exec "$(cat "$CODEX_PROMPT_FILE")" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_OH"
|
||||
codex exec "$(cat "$CODEX_PROMPT_FILE")" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_OH"
|
||||
\`\`\`
|
||||
|
||||
Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr:
|
||||
@@ -370,7 +370,7 @@ Claude's structured review already ran. Now add a **cross-model adversarial chal
|
||||
|
||||
\`\`\`bash
|
||||
TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX)
|
||||
codex exec "Review the changes on this branch against the base branch. Run git diff origin/<base> to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_ADV"
|
||||
codex exec "Review the changes on this branch against the base branch. Run git diff origin/<base> to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_ADV"
|
||||
\`\`\`
|
||||
|
||||
Set the Bash tool's \`timeout\` parameter to \`300000\` (5 minutes). Do NOT use the \`timeout\` shell command — it doesn't exist on macOS. After the command completes, read stderr:
|
||||
@@ -525,7 +525,7 @@ THE PLAN:
|
||||
|
||||
\`\`\`bash
|
||||
TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX)
|
||||
codex exec "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_PV"
|
||||
codex exec "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_PV"
|
||||
\`\`\`
|
||||
|
||||
Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr:
|
||||
|
||||
@@ -152,6 +152,24 @@ describe('gen-skill-docs', () => {
|
||||
}
|
||||
});
|
||||
|
||||
test('every Codex SKILL.md description stays under 900-char warning threshold', () => {
|
||||
const WARN_THRESHOLD = 900;
|
||||
const agentsDir = path.join(ROOT, '.agents', 'skills');
|
||||
if (!fs.existsSync(agentsDir)) return;
|
||||
const violations: string[] = [];
|
||||
for (const entry of fs.readdirSync(agentsDir, { withFileTypes: true })) {
|
||||
if (!entry.isDirectory()) continue;
|
||||
const skillMd = path.join(agentsDir, entry.name, 'SKILL.md');
|
||||
if (!fs.existsSync(skillMd)) continue;
|
||||
const content = fs.readFileSync(skillMd, 'utf-8');
|
||||
const description = extractDescription(content);
|
||||
if (description.length > WARN_THRESHOLD) {
|
||||
violations.push(`${entry.name}: ${description.length} chars (limit ${MAX_SKILL_DESCRIPTION_LENGTH}, ${MAX_SKILL_DESCRIPTION_LENGTH - description.length} remaining)`);
|
||||
}
|
||||
}
|
||||
expect(violations).toEqual([]);
|
||||
});
|
||||
|
||||
test('package.json version matches VERSION file', () => {
|
||||
const pkg = JSON.parse(fs.readFileSync(path.join(ROOT, 'package.json'), 'utf-8'));
|
||||
const version = fs.readFileSync(path.join(ROOT, 'VERSION'), 'utf-8').trim();
|
||||
|
||||
Reference in New Issue
Block a user