mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-18 07:40:09 +02:00
Merge remote-tracking branch 'origin/main' into garrytan/trunk-land-skill
# Conflicts: # CHANGELOG.md # VERSION # package.json
This commit is contained in:
@@ -50,6 +50,24 @@ function regenerateAndValidate() {
|
||||
console.log(` [check] \u2705 ${output} — ${totalValid} commands, all valid`);
|
||||
}
|
||||
}
|
||||
|
||||
// Dev workspace render isolation: the default in-place regen above keeps the
|
||||
// worktree canonical. If bin/dev-setup set up an untracked brain-aware render
|
||||
// (.claude/gstack-rendered), refresh it too so live template edits reflect at
|
||||
// this workspace's runtime. Only runs when the render dir already exists — we
|
||||
// never create it during plain template dev.
|
||||
const RENDER_DIR = path.join(ROOT, '.claude', 'gstack-rendered');
|
||||
if (fs.existsSync(RENDER_DIR)) {
|
||||
try {
|
||||
execSync(
|
||||
`bun run scripts/gen-skill-docs.ts --respect-detection --host claude --out-dir ${JSON.stringify(RENDER_DIR)}`,
|
||||
{ cwd: ROOT, stdio: 'pipe' },
|
||||
);
|
||||
console.log(' [render] refreshed .claude/gstack-rendered (brain-aware workspace copy)');
|
||||
} catch (err: any) {
|
||||
console.log(` [render] ERROR: ${err.stderr?.toString().trim() || err.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initial run
|
||||
|
||||
@@ -137,6 +137,39 @@ const EXPLAIN_LEVEL: 'default' | 'terse' = (() => {
|
||||
return val;
|
||||
})();
|
||||
|
||||
// ─── Out-dir (dev workspace render isolation) ───────────────
|
||||
// --out-dir <abs-dir> redirects Claude SKILL.md + section output to a separate
|
||||
// (untracked) directory instead of writing in place, AND rewrites the literal
|
||||
// section-base path (`~/.claude/skills/gstack/<skill>/sections/`) inside the
|
||||
// generated content to point at the out-dir, so section Reads resolve to the
|
||||
// rendered copy rather than the global install. Used by bin/dev-setup to render
|
||||
// the gbrain `:user` variant for a Conductor workspace without dirtying tracked
|
||||
// source. Default (unset) = in-place, behavior unchanged. Claude host only.
|
||||
const OUT_DIR_ARG = process.argv.find(a => a.startsWith('--out-dir'));
|
||||
const OUT_DIR: string | null = (() => {
|
||||
if (!OUT_DIR_ARG) return null;
|
||||
const val = OUT_DIR_ARG.includes('=')
|
||||
? OUT_DIR_ARG.split('=')[1]
|
||||
: process.argv[process.argv.indexOf(OUT_DIR_ARG) + 1];
|
||||
if (!val) throw new Error('--out-dir requires a directory path');
|
||||
return path.resolve(val);
|
||||
})();
|
||||
|
||||
/**
|
||||
* When rendering to an out-dir, repoint the literal section-base path at the
|
||||
* out-dir so section Reads resolve to the rendered copy, not the global install.
|
||||
* Surgical: ONLY paths containing `/sections/` are rewritten — bin/, browse/,
|
||||
* docs/ references keep pointing at `~/.claude/skills/gstack` (the global
|
||||
* install, which still works). No-op when --out-dir is unset.
|
||||
*/
|
||||
function rewriteSectionBase(content: string): string {
|
||||
if (!OUT_DIR) return content;
|
||||
return content.replace(
|
||||
/~\/\.claude\/skills\/gstack\/([^\s)`"'*]+\/sections\/)/g,
|
||||
`${OUT_DIR}/$1`,
|
||||
);
|
||||
}
|
||||
|
||||
// HostPaths, HOST_PATHS, and TemplateContext imported from ./resolvers/types (line 7-8)
|
||||
// Design constants (AI_SLOP_BLACKLIST, OPENAI_HARD_REJECTIONS, OPENAI_LITMUS_CHECKS)
|
||||
// live in ./resolvers/constants and are consumed by resolvers directly.
|
||||
@@ -768,6 +801,12 @@ function processTemplate(tmplPath: string, host: Host = 'claude'): { outputPath:
|
||||
// Determine skill directory relative to ROOT
|
||||
const skillDir = path.relative(ROOT, path.dirname(tmplPath));
|
||||
|
||||
// --out-dir (Claude only): mirror the skill tree into the out-dir instead of
|
||||
// writing in place. External hosts compute their own paths below.
|
||||
if (OUT_DIR && host === 'claude') {
|
||||
outputPath = path.join(OUT_DIR, skillDir, path.basename(tmplPath).replace(/\.tmpl$/, ''));
|
||||
}
|
||||
|
||||
// Extract name/description: name drives external skill naming + setup symlinks
|
||||
// (and TemplateContext.skillName via buildContext); description feeds external
|
||||
// host metadata. When frontmatter name: differs from directory name (e.g.
|
||||
@@ -822,6 +861,9 @@ function processTemplate(tmplPath: string, host: Host = 'claude'): { outputPath:
|
||||
}
|
||||
}
|
||||
|
||||
// --out-dir: repoint section-base paths to the out-dir (no-op otherwise).
|
||||
if (host === 'claude') content = rewriteSectionBase(content);
|
||||
|
||||
return { outputPath, content, symlinkLoop, catalogParts };
|
||||
}
|
||||
|
||||
@@ -860,6 +902,10 @@ function processSectionTemplate(
|
||||
// External hosts: rewrite cross-reference paths/tools (no frontmatter to transform).
|
||||
if (host !== 'claude') {
|
||||
content = applyHostRewrites(content, hostConfig);
|
||||
} else {
|
||||
// --out-dir: a section may cross-reference another section by absolute path;
|
||||
// repoint those to the out-dir too (no-op when --out-dir is unset).
|
||||
content = rewriteSectionBase(content);
|
||||
}
|
||||
|
||||
// Plain generated header (no frontmatter to insert after).
|
||||
@@ -868,7 +914,7 @@ function processSectionTemplate(
|
||||
const fileName = path.basename(sectionTmplPath).replace(/\.tmpl$/, '');
|
||||
let outputPath: string;
|
||||
if (host === 'claude') {
|
||||
outputPath = path.join(ROOT, skillDir, 'sections', fileName);
|
||||
outputPath = path.join(OUT_DIR || ROOT, skillDir, 'sections', fileName);
|
||||
} else {
|
||||
const externalName = externalSkillName(skillDir, parentName);
|
||||
outputPath = path.join(ROOT, hostConfig.hostSubdir, 'skills', externalName, 'sections', fileName);
|
||||
@@ -933,7 +979,7 @@ for (const currentHost of hostsToRun) {
|
||||
voice_line: catalogParts.voiceLine,
|
||||
};
|
||||
}
|
||||
const relOutput = path.relative(ROOT, outputPath);
|
||||
const relOutput = path.relative(OUT_DIR || ROOT, outputPath);
|
||||
|
||||
if (symlinkLoop) {
|
||||
console.log(`SKIPPED (symlink loop): ${relOutput}`);
|
||||
@@ -946,6 +992,9 @@ for (const currentHost of hostsToRun) {
|
||||
console.log(`FRESH: ${relOutput}`);
|
||||
}
|
||||
} else {
|
||||
// In-place writes land in existing dirs; --out-dir needs the mirrored
|
||||
// skill dir created first.
|
||||
if (OUT_DIR) fs.mkdirSync(path.dirname(outputPath), { recursive: true });
|
||||
fs.writeFileSync(outputPath, content);
|
||||
console.log(`GENERATED: ${relOutput}`);
|
||||
}
|
||||
@@ -982,7 +1031,7 @@ for (const currentHost of hostsToRun) {
|
||||
currentHostConfig.generation.skipSkills.includes(sec.skillDir)) continue;
|
||||
|
||||
const { outputPath, content } = processSectionTemplate(path.join(ROOT, sec.tmpl), sec.skillDir, currentHost);
|
||||
const relOutput = path.relative(ROOT, outputPath);
|
||||
const relOutput = path.relative(OUT_DIR || ROOT, outputPath);
|
||||
|
||||
if (DRY_RUN) {
|
||||
const existing = fs.existsSync(outputPath) ? fs.readFileSync(outputPath, 'utf-8') : '';
|
||||
@@ -1079,7 +1128,9 @@ The orchestrator will persist the plan link to its own memory/knowledge store.
|
||||
// No timestamp field — keeps the file content-deterministic across runs so
|
||||
// CI dry-run freshness checks don't flap on regen. If a per-run timestamp
|
||||
// is ever needed for debugging, write it to a separate `.gen-stamp` file.
|
||||
if (currentHost === 'claude' && CATALOG_MODE === 'trim' && Object.keys(proactiveAggregate).length > 0 && !DRY_RUN) {
|
||||
// Skip the global proactive-suggestions.json in --out-dir mode: it lives at
|
||||
// a repo path (scripts/) and the dev workspace render doesn't need it.
|
||||
if (currentHost === 'claude' && CATALOG_MODE === 'trim' && Object.keys(proactiveAggregate).length > 0 && !DRY_RUN && !OUT_DIR) {
|
||||
const proactivePath = path.join(ROOT, 'scripts', 'proactive-suggestions.json');
|
||||
// Sort keys alphabetically so the serialized JSON is identical across
|
||||
// machines regardless of filesystem-iteration order. Without this, CI
|
||||
|
||||
@@ -78,6 +78,11 @@
|
||||
"routing": "Uses the browse tool to actually TEST the\ndeveloper experience: navigates docs, tries the getting started flow, times\nTTHW, screenshots error messages, evaluates CLI help text. Produces a DX\nscorecard with evidence. Compares against /plan-devex-review scores if they\nexist (the boomerang: plan said 3 minutes, reality says 8). Use when asked to\n\"test the DX\", \"DX audit\", \"developer experience test\", or \"try the\nonboarding\". Proactively suggest after shipping a developer-facing feature.",
|
||||
"voice_line": "Voice triggers (speech-to-text aliases): \"dx audit\", \"test the developer experience\", \"try the onboarding\", \"developer experience test\"."
|
||||
},
|
||||
"diagram": {
|
||||
"lead": "Turn an English description (or mermaid source) into a diagram triplet: the source, an editable .excalidraw file you can open",
|
||||
"routing": "on excalidraw.com,\nand rendered SVG + PNG (clean mermaid style; the .excalidraw carries the\nhand-drawn aesthetic). Fully offline.\nUse when asked to \"make a diagram\", \"draw the architecture\", \"create a\nflowchart\", \"diagram this\", or \"visualize this flow\".",
|
||||
"voice_line": null
|
||||
},
|
||||
"document-generate": {
|
||||
"lead": "Generate missing documentation from scratch for a feature, module, or entire project.",
|
||||
"routing": "Uses the Diataxis framework (tutorial / how-to / reference / explanation) to produce\ncomplete, structured documentation. Can be invoked standalone or called by\n/document-release when it finds coverage gaps. Use when asked to \"write docs\",\n\"generate documentation\", \"document this feature\", \"create a tutorial\", or\n\"explain this module\".",
|
||||
|
||||
@@ -56,3 +56,61 @@ export function codexErrorHandling(feature: string): string {
|
||||
- Empty response: note and skip
|
||||
On any error: continue — ${feature} is informational, not a gate.`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared Codex preflight bash block — the single source of truth for deciding
|
||||
* whether a Codex review pass should run. Used by ADVERSARIAL_STEP,
|
||||
* CODEX_PLAN_REVIEW, and CODEX_DOC_REVIEW so install/auth/config detection
|
||||
* lives in exactly one place.
|
||||
*
|
||||
* Emits ONE self-contained bash block (the caller must place it in a single
|
||||
* fenced block — CLAUDE.md: each block is a fresh shell, so functions sourced
|
||||
* here do NOT persist to later blocks). It:
|
||||
* 1. reads the `codex_reviews` master switch,
|
||||
* 2. sources `gstack-codex-probe`,
|
||||
* 3. runs `command -v codex` (literal — keeps the e2e substring assertion),
|
||||
* then `_gstack_codex_auth_probe`, then `_gstack_codex_version_check`,
|
||||
* 4. logs the relevant `_gstack_codex_log_event` for each non-ready outcome,
|
||||
* 5. sets ONE canonical mode var and echoes `CODEX_MODE: <mode>` so the agent
|
||||
* gates later blocks on the echoed value.
|
||||
*
|
||||
* Mode values: `disabled` (config off) | `not_installed` | `not_authed` | `ready`.
|
||||
* The path is host-rewritten at gen-skill-docs time (pathRewrites), so the
|
||||
* literal `~/.claude/skills/gstack` is correct here and becomes `$GSTACK_ROOT`
|
||||
* etc. for non-Claude hosts.
|
||||
*
|
||||
* `disabledBehavior` controls the `disabled`-mode interpretation, which is the
|
||||
* one branch that legitimately differs per caller (D1):
|
||||
* - `skip-all` (plan / doc reviews): disabled means no extra review step at
|
||||
* all — skip the section, no Claude fallback.
|
||||
* - `codex-only` (diff adversarial): disabled gates only the Codex passes; the
|
||||
* free Claude adversarial subagent still runs.
|
||||
*/
|
||||
export function codexPreflight(opts: { modeVar?: string; disabledBehavior: 'skip-all' | 'codex-only' }): string {
|
||||
const m = opts.modeVar ?? '_CODEX_MODE';
|
||||
const disabledLine = opts.disabledBehavior === 'codex-only'
|
||||
? 'Skip the Codex passes only; the Claude adversarial subagent below STILL runs (it is free and fast). Print: "Codex passes skipped (codex_reviews disabled) — running Claude adversarial only."'
|
||||
: 'Skip this section entirely; do NOT fall back to a Claude subagent — disabled means no extra review step. Print: "Codex review skipped (codex_reviews disabled). Re-enable: `gstack-config set codex_reviews enabled`."';
|
||||
return `\`\`\`bash
|
||||
# Codex preflight: one block (functions sourced here don't persist to later blocks).
|
||||
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || echo off)
|
||||
_CODEX_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || echo enabled)
|
||||
source ~/.claude/skills/gstack/bin/gstack-codex-probe 2>/dev/null || true
|
||||
if [ "$_CODEX_CFG" = "disabled" ]; then
|
||||
${m}="disabled"
|
||||
elif ! command -v codex >/dev/null 2>&1; then
|
||||
${m}="not_installed"; _gstack_codex_log_event "codex_cli_missing" 2>/dev/null || true
|
||||
elif ! _gstack_codex_auth_probe >/dev/null 2>&1; then
|
||||
${m}="not_authed"; _gstack_codex_log_event "codex_auth_failed" 2>/dev/null || true
|
||||
else
|
||||
${m}="ready"; _gstack_codex_version_check 2>/dev/null || true
|
||||
fi
|
||||
echo "CODEX_MODE: $${m}"
|
||||
\`\`\`
|
||||
|
||||
Branch on the echoed \`CODEX_MODE\`:
|
||||
- **\`disabled\`** — the user turned Codex reviews off (\`codex_reviews=disabled\`). ${disabledLine}
|
||||
- **\`not_installed\`** — Codex CLI absent. Print: "Codex not installed — using Claude subagent. Install for cross-model coverage: \`npm install -g @openai/codex\`." Fall back to the Claude subagent path.
|
||||
- **\`not_authed\`** — installed but no credentials. Print: "Codex installed but not authenticated — using Claude subagent. Run \`codex login\` or set \`$CODEX_API_KEY\`." Fall back to the Claude subagent path.
|
||||
- **\`ready\`** — run the Codex pass below.`;
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ import { generateTestFailureTriage } from './preamble';
|
||||
import { generateCommandReference, generateSnapshotFlags, generateBrowseSetup } from './browse';
|
||||
import { generateDesignMethodology, generateDesignHardRules, generateDesignOutsideVoices, generateDesignReviewLite, generateDesignSketch, generateDesignSetup, generateDesignMockup, generateDesignShotgunLoop, generateTasteProfile, generateUXPrinciples } from './design';
|
||||
import { generateTestBootstrap, generateTestCoverageAuditPlan, generateTestCoverageAuditShip, generateTestCoverageAuditReview } from './testing';
|
||||
import { generateReviewDashboard, generatePlanFileReviewReport, generateExitPlanModeGate, generateAntiShortcutClause, generateSpecReviewLoop, generateBenefitsFrom, generateCodexSecondOpinion, generateAdversarialStep, generateCodexPlanReview, generatePlanCompletionAuditShip, generatePlanCompletionAuditReview, generatePlanVerificationExec, generateScopeDrift, generateCrossReviewDedup } from './review';
|
||||
import { generateReviewDashboard, generatePlanFileReviewReport, generateExitPlanModeGate, generateAntiShortcutClause, generateSpecReviewLoop, generateBenefitsFrom, generateCodexSecondOpinion, generateAdversarialStep, generateCodexPlanReview, generateCodexDocReview, generatePlanCompletionAuditShip, generatePlanCompletionAuditReview, generatePlanVerificationExec, generateScopeDrift, generateCrossReviewDedup } from './review';
|
||||
import { generateSlugEval, generateSlugSetup, generateBaseBranchDetect, generateDeployBootstrap, generateQAMethodology, generateCoAuthorTrailer, generateChangelogWorkflow } from './utility';
|
||||
import { generateLearningsSearch, generateLearningsLog } from './learnings';
|
||||
import { generateConfidenceCalibration } from './confidence';
|
||||
@@ -75,6 +75,7 @@ export const RESOLVERS: Record<string, ResolverValue> = {
|
||||
SCOPE_DRIFT: generateScopeDrift,
|
||||
DEPLOY_BOOTSTRAP: generateDeployBootstrap,
|
||||
CODEX_PLAN_REVIEW: generateCodexPlanReview,
|
||||
CODEX_DOC_REVIEW: generateCodexDocReview,
|
||||
PLAN_COMPLETION_AUDIT_SHIP: generatePlanCompletionAuditShip,
|
||||
PLAN_COMPLETION_AUDIT_REVIEW: generatePlanCompletionAuditReview,
|
||||
PLAN_VERIFICATION_EXEC: generatePlanVerificationExec,
|
||||
|
||||
@@ -7,7 +7,9 @@ export function generateAskUserFormat(_ctx: TemplateContext): string {
|
||||
|
||||
"AskUserQuestion" can resolve to two tools at runtime: the **host MCP variant** (e.g. \`mcp__conductor__AskUserQuestion\` — appears in your tool list when the host registers it) or the **native** Claude Code tool.
|
||||
|
||||
**Rule:** if any \`mcp__*__AskUserQuestion\` variant is in your tool list, prefer it. Hosts may disable native AUQ via \`--disallowedTools AskUserQuestion\` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
|
||||
**Conductor rule (read before the MCP rule):** if \`CONDUCTOR_SESSION: true\` was echoed by the preamble, do NOT call AskUserQuestion at all — neither native nor any \`mcp__*__AskUserQuestion\` variant. Render EVERY decision brief as the **prose form** below and STOP. This is proactive, not a reaction to a failure: Conductor disables native AUQ and its MCP variant is flaky (it returns \`[Tool result missing due to internal error]\`), so prose is the reliable path. **Auto-decide preferences still apply first:** if a \`[plan-tune auto-decide] <id> → <option>\` result has already surfaced for a question, proceed with that option (no prose). Because in Conductor you go straight to prose without ever calling the tool, this auto-decide-first ordering is enforced HERE, not only by the PreToolUse hook. When you render a Conductor prose brief, also capture it with \`bin/gstack-question-log\` (the PostToolUse capture hook never fires on a prose path, so \`/plan-tune\` history/learning depends on this call).
|
||||
|
||||
**Rule (non-Conductor):** if any \`mcp__*__AskUserQuestion\` variant is in your tool list, prefer it. Hosts may disable native AUQ via \`--disallowedTools AskUserQuestion\` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
|
||||
|
||||
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
|
||||
|
||||
@@ -29,7 +31,11 @@ Tell three outcomes apart:
|
||||
2. **Completeness scores per choice** — explicit \`Completeness: X/10\` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
|
||||
3. **The recommendation and why** — a \`Recommendation: <choice> because <reason>\` line plus the \`(recommended)\` marker on that choice.
|
||||
|
||||
Layout: a \`D<N>\` title + a one-line note that AskUserQuestion failed and to reply with a letter; the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its \`(recommended)\` marker, its \`Completeness: X/10\`, and 2-4 sentences of reasoning — never a bare bullet list; a closing \`Net:\` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
|
||||
Layout: a \`D<N>\` title + a one-line note to reply with a letter (in Conductor this is the normal path; elsewhere it means AskUserQuestion was unavailable or errored); the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its \`(recommended)\` marker, its \`Completeness: X/10\`, and 2-4 sentences of reasoning — never a bare bullet list; a closing \`Net:\` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
|
||||
|
||||
**Continuation — mapping a typed reply back to a brief.** Each brief carries a stable label (\`D<N>\`, or \`D<N>.k\` in a split chain). The user references it (e.g. "3.2: B"). A bare letter maps to the single most-recent UNANSWERED brief; if more than one is open (a split chain), do NOT guess — ask which \`D<N>.k\` it answers. Never apply a bare letter ambiguously across a chain.
|
||||
|
||||
**One-way / destructive confirmations in prose.** When the decision is a one-way door (irreversible or destructive — delete, force-push, drop, overwrite), prose is a WEAKER gate than the tool, so make it stronger: require an explicit typed confirmation (the exact option letter or word), state plainly what is irreversible, and NEVER proceed on a vague, partial, or ambiguous reply — re-ask instead. Treat silence or "ok"/"sure" without the explicit choice as not-yet-confirmed.
|
||||
|
||||
### Format
|
||||
|
||||
@@ -113,7 +119,7 @@ Before calling AskUserQuestion, verify:
|
||||
- [ ] (recommended) label on one option (even for neutral-posture)
|
||||
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
|
||||
- [ ] Net line closes the decision
|
||||
- [ ] You are calling the tool, not writing prose — unless the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + \`(recommended)\` — and a "reply with a letter" instruction, then STOP)
|
||||
- [ ] You are calling the tool, not writing prose — unless \`CONDUCTOR_SESSION: true\` (then prose is the DEFAULT, not the tool) OR the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + \`(recommended)\` — and a "reply with a letter" instruction, then STOP)
|
||||
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \\u-escaped
|
||||
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
|
||||
- [ ] If you split, you checked dependencies between options before firing the chain
|
||||
|
||||
@@ -36,6 +36,13 @@ echo "REPO_MODE: $REPO_MODE"
|
||||
_SESSION_KIND=$(${ctx.paths.binDir}/gstack-session-kind 2>/dev/null || echo "interactive")
|
||||
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
|
||||
echo "SESSION_KIND: $_SESSION_KIND"
|
||||
# Conductor host: AskUserQuestion is unreliable here (native disabled, MCP
|
||||
# variant flaky), so skills render decisions as prose instead of calling the
|
||||
# tool. Gated on !headless so an eval/CI run INSIDE Conductor (GSTACK_HEADLESS)
|
||||
# still BLOCKs rather than rendering prose to nobody.
|
||||
if [ "$_SESSION_KIND" != "headless" ] && { [ -n "\${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "\${CONDUCTOR_PORT:-}" ]; }; then
|
||||
echo "CONDUCTOR_SESSION: true"
|
||||
fi
|
||||
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
|
||||
echo "LAKE_INTRO: $_LAKE_SEEN"
|
||||
_TEL=$(${ctx.paths.binDir}/gstack-config get telemetry 2>/dev/null || true)
|
||||
|
||||
+133
-45
@@ -14,6 +14,7 @@
|
||||
*/
|
||||
import type { TemplateContext } from './types';
|
||||
import { generateInvokeSkill } from './composition';
|
||||
import { codexPreflight, codexErrorHandling } from './constants';
|
||||
|
||||
const CODEX_BOUNDARY = 'IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\\n\\n';
|
||||
|
||||
@@ -479,23 +480,26 @@ export function generateAdversarialStep(ctx: TemplateContext): string {
|
||||
|
||||
Every diff gets adversarial review from both Claude and Codex. LOC is not a proxy for risk — a 5-line auth change can be critical.
|
||||
|
||||
**Detect diff size and tool availability:**
|
||||
**Detect diff size:**
|
||||
|
||||
\`\`\`bash
|
||||
DIFF_BASE=$(git merge-base origin/<base> HEAD)
|
||||
DIFF_INS=$(git diff "$DIFF_BASE" --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0")
|
||||
DIFF_DEL=$(git diff "$DIFF_BASE" --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0")
|
||||
DIFF_TOTAL=$((DIFF_INS + DIFF_DEL))
|
||||
command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
|
||||
# Legacy opt-out — only gates Codex passes, Claude always runs
|
||||
OLD_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || true)
|
||||
echo "DIFF_SIZE: $DIFF_TOTAL"
|
||||
echo "OLD_CFG: \${OLD_CFG:-not_set}"
|
||||
\`\`\`
|
||||
|
||||
If \`OLD_CFG\` is \`disabled\`: skip Codex passes only. Claude adversarial subagent still runs (it's free and fast). Jump to the "Claude adversarial subagent" section.
|
||||
**Detect the Codex master switch + tool availability:**
|
||||
|
||||
**User override:** If the user explicitly requested "full review", "structured review", or "P1 gate", also run the Codex structured review regardless of diff size.
|
||||
${codexPreflight({ disabledBehavior: 'codex-only' })}
|
||||
|
||||
For this diff-review path, \`CODEX_MODE: disabled\` means skip the Codex passes ONLY — the
|
||||
Claude adversarial subagent below still runs (it's free and fast). \`ready\` runs the Codex
|
||||
passes; \`not_installed\` / \`not_authed\` skip them with the printed note and continue with
|
||||
Claude only.
|
||||
|
||||
**User override:** If the user explicitly requested "full review", "structured review", or "P1 gate", also run the Codex structured review regardless of diff size (still requires \`CODEX_MODE: ready\`).
|
||||
|
||||
---
|
||||
|
||||
@@ -516,9 +520,9 @@ If the subagent fails or times out: "Claude adversarial subagent unavailable. Co
|
||||
|
||||
---
|
||||
|
||||
### Codex adversarial challenge (always runs when available)
|
||||
### Codex adversarial challenge (runs whenever \`CODEX_MODE: ready\`)
|
||||
|
||||
If Codex is available AND \`OLD_CFG\` is NOT \`disabled\`:
|
||||
If \`CODEX_MODE\` is \`ready\`:
|
||||
|
||||
\`\`\`bash
|
||||
TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX)
|
||||
@@ -540,13 +544,13 @@ Present the full output verbatim. This is informational — it never blocks ship
|
||||
|
||||
**Cleanup:** Run \`rm -f "$TMPERR_ADV"\` after processing.
|
||||
|
||||
If Codex is NOT available: "Codex CLI not found — running Claude adversarial only. Install Codex for cross-model coverage: \`npm install -g @openai/codex\`"
|
||||
If \`CODEX_MODE\` is \`not_installed\` / \`not_authed\` / \`disabled\`: the preflight already printed the reason; run Claude adversarial only.
|
||||
|
||||
---
|
||||
|
||||
### Codex structured review (large diffs only, 200+ lines)
|
||||
|
||||
If \`DIFF_TOTAL >= 200\` AND Codex is available AND \`OLD_CFG\` is NOT \`disabled\`:
|
||||
If \`DIFF_TOTAL >= 200\` AND \`CODEX_MODE\` is \`ready\`:
|
||||
|
||||
\`\`\`bash
|
||||
TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX)
|
||||
@@ -610,38 +614,25 @@ export function generateCodexPlanReview(ctx: TemplateContext): string {
|
||||
// Codex host: strip entirely — Codex should never invoke itself
|
||||
if (ctx.host === 'codex') return '';
|
||||
|
||||
return `## Outside Voice — Independent Plan Challenge (optional, recommended)
|
||||
return `## Outside Voice — Independent Plan Challenge (default-on)
|
||||
|
||||
After all review sections are complete, offer an independent second opinion from a
|
||||
different AI system. Two models agreeing on a plan is stronger signal than one model's
|
||||
thorough review.
|
||||
After all review sections are complete, run an independent second opinion from a
|
||||
different AI system automatically — it is a standard part of plan review, not an
|
||||
opt-in. Two models agreeing on a plan is stronger signal than one model's thorough
|
||||
review. The user turns this off only by asking explicitly
|
||||
(\`gstack-config set codex_reviews disabled\`).
|
||||
|
||||
**Check tool availability:**
|
||||
**Preflight — decide whether and how the outside voice runs:**
|
||||
|
||||
\`\`\`bash
|
||||
command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
|
||||
\`\`\`
|
||||
${codexPreflight({ disabledBehavior: 'skip-all' })}
|
||||
|
||||
Use AskUserQuestion:
|
||||
When the mode is \`ready\`, \`not_installed\`, or \`not_authed\`, print one line so the off-switch
|
||||
stays discoverable: "Running the outside voice automatically (standard step). Disable: \`gstack-config set codex_reviews disabled\`."
|
||||
|
||||
> "All review sections are complete. Want an outside voice? A different AI system can
|
||||
> give a brutally honest, independent challenge of this plan — logical gaps, feasibility
|
||||
> risks, and blind spots that are hard to catch from inside the review. Takes about 2
|
||||
> minutes."
|
||||
>
|
||||
> RECOMMENDATION: Choose A — an independent second opinion catches structural blind
|
||||
> spots. Two different AI models agreeing on a plan is stronger signal than one model's
|
||||
> thorough review. Completeness: A=9/10, B=7/10.
|
||||
|
||||
Options:
|
||||
- A) Get the outside voice (recommended)
|
||||
- B) Skip — proceed to outputs
|
||||
|
||||
**If B:** Print "Skipping outside voice." and continue to the next section.
|
||||
|
||||
**If A:** Construct the plan review prompt. Read the plan file being reviewed (the file
|
||||
the user pointed this review at, or the branch diff scope). If a CEO plan document
|
||||
was written in Step 0D-POST, read that too — it contains the scope decisions and vision.
|
||||
**Construct the plan review prompt** (for \`ready\`, \`not_installed\`, and \`not_authed\` — skip only on \`disabled\`).
|
||||
Read the plan file being reviewed (the file the user pointed this review at, or the branch
|
||||
diff scope). If a CEO plan document was written in Step 0D-POST, read that too — it contains
|
||||
the scope decisions and vision.
|
||||
|
||||
Construct this prompt (substitute the actual plan content — if plan content exceeds 30KB,
|
||||
truncate to the first 30KB and note "Plan truncated for size"). **Always start with the
|
||||
@@ -659,7 +650,7 @@ compliments. Just the problems.
|
||||
THE PLAN:
|
||||
<plan content>"
|
||||
|
||||
**If CODEX_AVAILABLE:**
|
||||
**If \`CODEX_MODE: ready\` — run Codex:**
|
||||
|
||||
\`\`\`bash
|
||||
TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX)
|
||||
@@ -682,15 +673,15 @@ CODEX SAYS (plan review — outside voice):
|
||||
\`\`\`
|
||||
|
||||
**Error handling:** All errors are non-blocking — the outside voice is informational.
|
||||
- Auth failure (stderr contains "auth", "login", "unauthorized"): "Codex auth failed. Run \\\`codex login\\\` to authenticate."
|
||||
- Timeout: "Codex timed out after 5 minutes."
|
||||
- Empty response: "Codex returned no response."
|
||||
- Auth failure (stderr contains "auth", "login", "unauthorized"): "Codex auth failed. Run \\\`codex login\\\` to authenticate." Fall back to the Claude subagent below.
|
||||
- Timeout: "Codex timed out after 5 minutes." Fall back to the Claude subagent below.
|
||||
- Empty response: "Codex returned no response." Fall back to the Claude subagent below.
|
||||
|
||||
On any Codex error, fall back to the Claude adversarial subagent.
|
||||
|
||||
**If CODEX_NOT_AVAILABLE (or Codex errored):**
|
||||
**If \`CODEX_MODE: not_installed\` or \`not_authed\` (or Codex errored at runtime):**
|
||||
|
||||
Dispatch via the Agent tool. The subagent has fresh context — genuine independence.
|
||||
Bound it the same way as Codex: cap the dispatch at a 5-minute timeout so "never blocking"
|
||||
is also "never hanging."
|
||||
|
||||
Subagent prompt: same plan review prompt as above.
|
||||
|
||||
@@ -698,6 +689,8 @@ Present findings under an \`OUTSIDE VOICE (Claude subagent):\` header.
|
||||
|
||||
If the subagent fails or times out: "Outside voice unavailable. Continuing to outputs."
|
||||
|
||||
(On \`CODEX_MODE: disabled\` you already skipped this section per the preflight — do not reach here.)
|
||||
|
||||
**Cross-model tension:**
|
||||
|
||||
After presenting the outside voice findings, note any points where the outside voice
|
||||
@@ -747,6 +740,101 @@ SOURCE = "codex" if Codex ran, "claude" if subagent ran.
|
||||
---`;
|
||||
}
|
||||
|
||||
export function generateCodexDocReview(ctx: TemplateContext): string {
|
||||
// Codex host: strip entirely — Codex should never invoke itself
|
||||
if (ctx.host === 'codex') return '';
|
||||
|
||||
return `## Codex Documentation Review (default-on)
|
||||
|
||||
After the documentation updates above are written, run an independent cross-model pass that
|
||||
checks the docs against what actually shipped. This is a standard part of /document-release,
|
||||
not an opt-in. The user turns it off only by asking explicitly
|
||||
(\`gstack-config set codex_reviews disabled\`).
|
||||
|
||||
**Preflight — decide whether and how the doc review runs:**
|
||||
|
||||
${codexPreflight({ disabledBehavior: 'skip-all' })}
|
||||
|
||||
When the mode is \`ready\`, \`not_installed\`, or \`not_authed\`, print one line so the off-switch
|
||||
stays discoverable: "Running the Codex doc review automatically (standard step). Disable: \`gstack-config set codex_reviews disabled\`."
|
||||
|
||||
**Determine the release diff range (D3 — reuse the method, do not invent one).**
|
||||
Recompute the SAME range document-release used in its pre-flight / diff analysis, with the
|
||||
documented merge-base method:
|
||||
|
||||
\`\`\`bash
|
||||
DOC_DIFF_BASE=$(git merge-base origin/<base> HEAD 2>/dev/null || echo "<base>")
|
||||
echo "DOC_DIFF_BASE: $DOC_DIFF_BASE"
|
||||
\`\`\`
|
||||
|
||||
Do NOT rely on an in-memory variable from an earlier step — shell vars do not survive across
|
||||
blocks. Recompute it here.
|
||||
|
||||
**Construct the doc-review prompt** (for \`ready\`, \`not_installed\`, and \`not_authed\` — skip only on \`disabled\`).
|
||||
Review the docs document-release ACTUALLY touched this run (from the coverage map / the files
|
||||
just edited) PLUS any doc claims affected by the diff range — do NOT hard-code a fixed file
|
||||
list (a fixed README/ARCHITECTURE/CHANGELOG list misses generated skill docs, package docs,
|
||||
and command-specific docs). **Always start with the filesystem boundary instruction:**
|
||||
|
||||
"${CODEX_BOUNDARY}You are reviewing documentation changes against the code that shipped on this
|
||||
branch. Run \\\`git diff \\$DOC_DIFF_BASE...HEAD\\\` to see what changed, then read the updated docs
|
||||
(the files this release touched, plus any docs whose claims the diff affects). Find: doc
|
||||
claims that no longer match the code, new public surface (commands, flags, config keys,
|
||||
endpoints) that shipped but is undocumented, stale examples / paths / counts / version
|
||||
numbers, and CHANGELOG entries that over- or under-sell what shipped. Be terse. Just the gaps.
|
||||
|
||||
THE DOCS AND DIFF: <list the touched doc paths>"
|
||||
|
||||
**If \`CODEX_MODE: ready\` — run Codex:**
|
||||
|
||||
\`\`\`bash
|
||||
TMPERR_DOC=$(mktemp /tmp/codex-docreview-XXXXXXXX)
|
||||
_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; }
|
||||
codex exec "<prompt>" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR_DOC"
|
||||
\`\`\`
|
||||
|
||||
Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr:
|
||||
\`\`\`bash
|
||||
cat "$TMPERR_DOC"
|
||||
\`\`\`
|
||||
|
||||
Present the full output verbatim under \`CODEX SAYS (documentation review):\`.
|
||||
|
||||
${codexErrorHandling('documentation review')}
|
||||
|
||||
**If \`CODEX_MODE: not_installed\` or \`not_authed\` (or Codex errored at runtime):**
|
||||
|
||||
Dispatch via the Agent tool with the same prompt. Bound it at a 5-minute timeout.
|
||||
Present findings under \`DOCUMENTATION REVIEW (Claude subagent):\`. If it fails: "Doc review unavailable. Continuing."
|
||||
|
||||
**Apply decision (T3B — informational, never auto-edit, but findings don't evaporate).**
|
||||
If there are zero findings, say "Docs match what shipped — no gaps." and continue. Otherwise
|
||||
present the findings, then use AskUserQuestion ONCE:
|
||||
|
||||
> "The doc review found N gaps between the docs and what shipped. How do you want to handle them?"
|
||||
>
|
||||
> RECOMMENDATION: Choose A if the gaps are concrete doc fixes (stale path, missing flag). The
|
||||
> doc review only reports; nothing is edited without your say-so. Completeness: A=9/10, B=4/10, C=8/10.
|
||||
|
||||
Options:
|
||||
- A) Apply all the doc fixes now
|
||||
- B) Skip — leave docs as-is
|
||||
- C) Decide per-finding
|
||||
|
||||
On A or per-finding approvals, make the approved edits yourself (the tool never silently
|
||||
rewrites docs). On B, note the gaps in the output so they're visible.
|
||||
|
||||
**Persist the result:**
|
||||
\`\`\`bash
|
||||
~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"codex-doc-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","commit":"'"$(git rev-parse --short HEAD)"'"}'
|
||||
\`\`\`
|
||||
Substitute: STATUS = "clean" if no gaps, "issues_found" if gaps exist. SOURCE = "codex" if Codex ran, "claude" if the subagent ran.
|
||||
|
||||
**Cleanup:** Run \`rm -f "$TMPERR_DOC"\` after processing (if Codex was used).
|
||||
|
||||
---`;
|
||||
}
|
||||
|
||||
// ─── Plan File Discovery (shared helper) ──────────────────────────────
|
||||
|
||||
function generatePlanFileDiscovery(): string {
|
||||
|
||||
Reference in New Issue
Block a user