Merge remote-tracking branch 'origin/main' into garrytan/askuserquestion-split-on-overflow

2026-06-17 15:20:11 +02:00 · 2026-05-26 22:27:54 -07:00
parent d0d8cb2db6 f8bb59094d
commit e08e5fa8aa
107 changed files with 10060 additions and 3885 deletions
@@ -0,0 +1,54 @@
+#!/usr/bin/env bun
+/**
+ * CLI for capturing a parity baseline snapshot.
+ *
+ * Usage:
+ *   bun run scripts/capture-baseline.ts                            # default path
+ *   bun run scripts/capture-baseline.ts --tag v1.44.1              # tag the snapshot
+ *   bun run scripts/capture-baseline.ts --out path/to/baseline.json
+ *
+ * The default output path is test/fixtures/parity-baseline-<tag>.json,
+ * or test/fixtures/parity-baseline-current.json when no tag is given.
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+import { captureBaseline } from '../test/helpers/capture-parity-baseline';
+
+const ROOT = path.resolve(import.meta.dir, '..');
+
+function arg(name: string): string | undefined {
+  const i = process.argv.indexOf(name);
+  if (i === -1) return undefined;
+  return process.argv[i + 1];
+}
+
+const tag = arg('--tag');
+const outOverride = arg('--out');
+const defaultOut = path.join(
+  ROOT,
+  'test',
+  'fixtures',
+  `parity-baseline-${tag ?? 'current'}.json`,
+);
+const outPath = outOverride ? path.resolve(outOverride) : defaultOut;
+
+const baseline = captureBaseline({ repoRoot: ROOT, tag });
+
+fs.mkdirSync(path.dirname(outPath), { recursive: true });
+fs.writeFileSync(outPath, JSON.stringify(baseline, null, 2) + '\n');
+
+const totalKB = Math.round(baseline.totalCorpusBytes / 1024);
+const top3 = baseline.topHeaviest.slice(0, 3);
+console.log(`Parity baseline captured: ${outPath}`);
+console.log(`  tag:           ${baseline.tag}`);
+console.log(`  commit:        ${baseline.capturedFromCommit}`);
+console.log(`  branch:        ${baseline.capturedFromBranch}`);
+console.log(`  skills:        ${baseline.totalSkills}`);
+console.log(`  total corpus:  ${totalKB} KB`);
+console.log(`  catalog tokens: ~${baseline.estTotalCatalogTokens}`);
+console.log(`  top 3 heaviest:`);
+for (const s of top3) {
+  const kb = Math.round(s.skillMdBytes / 1024);
+  console.log(`    ${s.skill.padEnd(28)} ${kb} KB (${s.skillMdLines} lines, ~${s.estTokens} tokens)`);
+}
@@ -16,7 +16,7 @@ import { writeLlmsTxt } from './gen-llms-txt';
 import * as fs from 'fs';
 import * as path from 'path';
 import type { Host, TemplateContext } from './resolvers/types';
-import { HOST_PATHS } from './resolvers/types';
+import { HOST_PATHS, unwrapResolver } from './resolvers/types';
 import { RESOLVERS } from './resolvers/index';
 import { externalSkillName, extractHookSafetyProse as _extractHookSafetyProse, extractNameAndDescription as _extractNameAndDescription, condenseOpenAIShortDescription as _condenseOpenAIShortDescription, generateOpenAIYaml as _generateOpenAIYaml } from './resolvers/codex-helpers';
 import { generatePlanCompletionAuditShip, generatePlanCompletionAuditReview, generatePlanVerificationExec } from './resolvers/review';
@@ -59,6 +59,41 @@ const MODEL_ARG_VAL: Model = (() => {
  return resolved;
 })();

+// ─── Catalog Mode (v1.45.0.0 T4) ────────────────────────────
+// 'trim' (default): shorten frontmatter description to lead sentence,
+// move routing/voice prose into a "## When to invoke" body section, and
+// emit scripts/proactive-suggestions.json (single file across all skills).
+// 'full': legacy v1.44 behavior — full description stays in frontmatter.
+const CATALOG_MODE_ARG = process.argv.find(a => a.startsWith('--catalog-mode'));
+const CATALOG_MODE: 'trim' | 'full' = (() => {
+  if (!CATALOG_MODE_ARG) return 'trim';
+  const val = CATALOG_MODE_ARG.includes('=')
+    ? CATALOG_MODE_ARG.split('=')[1]
+    : process.argv[process.argv.indexOf(CATALOG_MODE_ARG) + 1];
+  if (val !== 'trim' && val !== 'full') {
+    throw new Error(`Unknown catalog mode: ${val}. Use 'trim' (default) or 'full'.`);
+  }
+  return val;
+})();
+
+// ─── Explain-level Overlay ──────────────────────────────────
+// --explain-level=terse compresses preamble prose (writing-style, completeness,
+// confusion-protocol, context-health) to a single pointer line at gen time.
+// Default keeps the runtime-conditional behavior (sections render unconditionally,
+// the model skips them when EXPLAIN_LEVEL: terse appears in the preamble echo).
+// Opt-in via the build flag so most users get the runtime-flexible default.
+const EXPLAIN_LEVEL_ARG = process.argv.find(a => a.startsWith('--explain-level'));
+const EXPLAIN_LEVEL: 'default' | 'terse' = (() => {
+  if (!EXPLAIN_LEVEL_ARG) return 'default';
+  const val = EXPLAIN_LEVEL_ARG.includes('=')
+    ? EXPLAIN_LEVEL_ARG.split('=')[1]
+    : process.argv[process.argv.indexOf(EXPLAIN_LEVEL_ARG) + 1];
+  if (val !== 'default' && val !== 'terse') {
+    throw new Error(`Unknown explain level: ${val}. Use 'default' or 'terse'.`);
+  }
+  return val;
+})();
+
 // HostPaths, HOST_PATHS, and TemplateContext imported from ./resolvers/types (line 7-8)
 // Design constants (AI_SLOP_BLACKLIST, OPENAI_HARD_REJECTIONS, OPENAI_LITMUS_CHECKS)
 // live in ./resolvers/constants and are consumed by resolvers directly.
@@ -172,6 +207,169 @@ function processVoiceTriggers(content: string): string {
 // Export for testing
 export { extractVoiceTriggers, processVoiceTriggers };

+// ─── Catalog Trim (v1.45.0.0 T4) ─────────────────────────────
+//
+// Frontmatter `description:` blocks today pack: a one-line outcome, "Use when
+// asked to..." voice triggers, "Proactively..." routing guidance, and a
+// "(gstack)" tag. This pile is the always-loaded catalog surface — every
+// session pays for the full text. The catalog trim splits the description
+// into a one-line catalog entry (lead sentence + "(gstack)") that stays in
+// the frontmatter, and a "## When to invoke" body section that holds the
+// routing/voice triggers prose for in-skill discovery. A registry written
+// to scripts/proactive-suggestions.json (one entry per skill) makes routing
+// available to agents that need it without paying the always-loaded cost.
+//
+// Opt-out: `--catalog-mode=full` keeps v1.44 behavior (no trim, full
+// description in frontmatter). Use when debugging routing regressions or
+// when shipping skills to hosts that depend on the legacy fat catalog.
+
+export interface CatalogParts {
+  lead: string;            // First sentence — kept in catalog
+  routingProse: string;    // "Use when asked to...", "Proactively..." paragraphs
+  voiceLine: string | null; // "Voice triggers (speech-to-text aliases): ..." line if present
+  hasGstackTag: boolean;
+}
+
+export function splitCatalogDescription(description: string): CatalogParts {
+  // Voice triggers line (folded in by processVoiceTriggers earlier)
+  const voiceMatch = description.match(/Voice triggers \(speech-to-text aliases\):[^\n]+/);
+  const voiceLine = voiceMatch ? voiceMatch[0] : null;
+  let working = voiceLine ? description.replace(voiceLine, '').trim() : description.trim();
+
+  const hasGstackTag = /\(gstack\)/.test(working);
+  if (hasGstackTag) working = working.replace(/\(gstack\)/, '').trim();
+
+  // Lead = first sentence (up to first period followed by space or end of string).
+  // We tolerate sentences with embedded periods (URLs, "v1.45.0.0") by requiring
+  // the period to be followed by whitespace OR end-of-text.
+  // First normalize to single-line for sentence detection, then back out.
+  const collapsed = working.replace(/\s+/g, ' ').trim();
+  const sentenceMatch = collapsed.match(/^([^.!?]*[.!?])(?:\s|$)/);
+  // sentenceLead is the FULL first sentence (no truncation). We compute routing
+  // from this position, then optionally truncate the displayed lead afterwards.
+  // Truncating first then computing routing was the v1.45.0.0 bug — when the
+  // first sentence exceeded 200 chars, the routing extraction would lose the
+  // entire tail of the description (design-consultation's "Use when..."
+  // routing prose silently dropped).
+  const sentenceLead = sentenceMatch ? sentenceMatch[1].trim() : collapsed.split(/\s/).slice(0, 20).join(' ');
+
+  // Routing prose: everything AFTER the first sentence boundary in the collapsed view.
+  const leadInCollapsed = collapsed.indexOf(sentenceLead);
+  const routingCollapsed = leadInCollapsed >= 0
+    ? collapsed.slice(leadInCollapsed + sentenceLead.length).trim()
+    : '';
+
+  // Now produce the displayed lead — truncated if too long. The original
+  // sentenceLead is preserved for routing extraction below.
+  let lead = sentenceLead;
+  if (lead.length > 200) {
+    const trunc = lead.slice(0, 197);
+    const lastSpace = trunc.lastIndexOf(' ');
+    lead = (lastSpace > 60 ? trunc.slice(0, lastSpace) : trunc) + '...';
+  }
+  // Restore line breaks for routing prose by mapping back to original layout.
+  // Use original whitespace structure where possible; fall back to collapsed.
+  // Anchor recovery on sentenceLead (the untruncated first sentence) — not
+  // `lead` (which may have a "..." suffix and won't substring-match `working`).
+  let routingProse = routingCollapsed;
+  const collapsedLeadIdx = working.replace(/\s+/g, ' ').indexOf(sentenceLead);
+  if (collapsedLeadIdx >= 0) {
+    let consumed = 0;
+    let cut = 0;
+    for (let i = 0; i < working.length && consumed < collapsedLeadIdx + sentenceLead.length; i++) {
+      if (/\s/.test(working[i])) {
+        if (i === 0 || /\s/.test(working[i - 1])) continue;
+        consumed += 1;
+      } else {
+        consumed += 1;
+      }
+      cut = i + 1;
+    }
+    const tail = working.slice(cut).trim();
+    if (tail.length > 0) routingProse = tail;
+  }
+
+  return { lead, routingProse, voiceLine, hasGstackTag };
+}
+
+/** Build the catalog-trimmed `description:` block. */
+export function buildTrimmedDescription(parts: CatalogParts): string {
+  const lead = parts.lead.trim();
+  const suffix = parts.hasGstackTag ? ' (gstack)' : '';
+  return `${lead}${suffix}`;
+}
+
+/** Build the body section that holds the routing/voice prose. */
+export function buildWhenToInvokeSection(parts: CatalogParts): string {
+  const lines: string[] = ['## When to invoke this skill', ''];
+  if (parts.routingProse) {
+    lines.push(parts.routingProse);
+    lines.push('');
+  }
+  if (parts.voiceLine) {
+    lines.push(parts.voiceLine);
+    lines.push('');
+  }
+  return lines.join('\n');
+}
+
+/**
+ * Apply catalog trim to a SKILL.md body:
+ *  - shorten frontmatter `description:` to lead + (gstack)
+ *  - insert "## When to invoke" body section AFTER the generated header
+ *    (so it lands near the top of body content, where routing guidance
+ *    belongs)
+ *
+ * Returns the rewritten content plus the parts (used for proactive-suggestions
+ * JSON aggregation at the end of the run).
+ */
+export function applyCatalogTrim(content: string, skillName: string): { content: string; parts: CatalogParts } | null {
+  // Locate description block in frontmatter
+  if (!content.startsWith('---\n')) return null;
+  const fmEnd = content.indexOf('\n---', 4);
+  if (fmEnd === -1) return null;
+  const frontmatter = content.slice(4, fmEnd);
+
+  // Match `description: |` block + indented body lines
+  const descMatch = frontmatter.match(/^description:\s*\|?\s*\n((?:\s{2,}.*(?:\n|$))+)/m)
+                    || frontmatter.match(/^description:\s+(.+)$/m);
+  if (!descMatch) return null;
+
+  // Extract full description text
+  let descText: string;
+  if (descMatch[0].startsWith('description: |') || /^description:\s*\|/.test(descMatch[0])) {
+    descText = descMatch[1].split('\n').map(l => l.replace(/^\s{2}/, '')).join('\n').trim();
+  } else {
+    descText = descMatch[1].trim();
+  }
+
+  // Skip skills with very short descriptions (already trimmed or no routing prose).
+  // Below ~120 chars, splitting adds no value.
+  if (descText.length < 120) return null;
+
+  const parts = splitCatalogDescription(descText);
+  // If lead + (gstack) is already most of the text, no trim needed.
+  const trimmedLen = buildTrimmedDescription(parts).length;
+  if (trimmedLen >= descText.length - 20) return null;
+
+  // Replace description in frontmatter — keep trailing newline so the next
+  // YAML field doesn't collide on the same line as the description value.
+  const newDesc = buildTrimmedDescription(parts);
+  const newFrontmatter = frontmatter.replace(descMatch[0], `description: ${newDesc}\n`);
+  let newContent = '---\n' + newFrontmatter + content.slice(fmEnd);
+
+  // Insert body section after frontmatter (after the closing ---\n and any
+  // existing GENERATED header). We insert before the first non-comment line.
+  const bodyStart = newContent.indexOf('\n---\n') + 5;
+  const whenToInvoke = '\n' + buildWhenToInvokeSection(parts).trim() + '\n';
+  // Skip past the generated header if present (it lives after frontmatter close)
+  const headerMatch = newContent.slice(bodyStart).match(/^(<!--[^>]*-->\s*\n)+/);
+  const insertAt = bodyStart + (headerMatch ? headerMatch[0].length : 0);
+  newContent = newContent.slice(0, insertAt) + whenToInvoke + '\n' + newContent.slice(insertAt);
+
+  return { content: newContent, parts };
+}
+
 const OPENAI_SHORT_DESCRIPTION_LIMIT = 120;

 function condenseOpenAIShortDescription(description: string): string {
@@ -401,7 +599,7 @@ function processExternalHost(
  return { content: result, outputPath, outputDir, symlinkLoop };
 }

-function processTemplate(tmplPath: string, host: Host = 'claude'): { outputPath: string; content: string; symlinkLoop?: boolean } {
+function processTemplate(tmplPath: string, host: Host = 'claude'): { outputPath: string; content: string; symlinkLoop?: boolean; catalogParts?: CatalogParts | null } {
  const tmplContent = fs.readFileSync(tmplPath, 'utf-8');
  const relTmplPath = path.relative(ROOT, tmplPath);
  let outputPath = tmplPath.replace(/\.tmpl$/, '');
@@ -430,7 +628,7 @@ function processTemplate(tmplPath: string, host: Host = 'claude'): { outputPath:
  const interactiveMatch = tmplContent.match(/^interactive:\s*(true|false)\s*$/m);
  const interactive = interactiveMatch ? interactiveMatch[1] === 'true' : undefined;

-  const ctx: TemplateContext = { skillName, tmplPath, benefitsFrom, host, paths: HOST_PATHS[host], preambleTier, model: MODEL_ARG_VAL, interactive };
+  const ctx: TemplateContext = { skillName, tmplPath, benefitsFrom, host, paths: HOST_PATHS[host], preambleTier, model: MODEL_ARG_VAL, interactive, explainLevel: EXPLAIN_LEVEL };

  // Replace placeholders (supports parameterized: {{NAME:arg1:arg2}})
  // Config-driven: suppressedResolvers return empty string for this host
@@ -441,9 +639,11 @@ function processTemplate(tmplPath: string, host: Host = 'claude'): { outputPath:
    const resolverName = parts[0];
    const args = parts.slice(1);
    if (suppressed.has(resolverName)) return '';
-    const resolver = RESOLVERS[resolverName];
-    if (!resolver) throw new Error(`Unknown placeholder {{${resolverName}}} in ${relTmplPath}`);
-    return args.length > 0 ? resolver(ctx, args) : resolver(ctx);
+    const entry = RESOLVERS[resolverName];
+    if (!entry) throw new Error(`Unknown placeholder {{${resolverName}}} in ${relTmplPath}`);
+    const { resolve, appliesTo } = unwrapResolver(entry);
+    if (appliesTo && !appliesTo(ctx)) return '';
+    return args.length > 0 ? resolve(ctx, args) : resolve(ctx);
  });

  // Check for any remaining unresolved placeholders
@@ -483,7 +683,17 @@ function processTemplate(tmplPath: string, host: Host = 'claude'): { outputPath:
    content = header + content;
  }

-  return { outputPath, content, symlinkLoop };
+  // Catalog trim (Claude only — external hosts have their own frontmatter shapes)
+  let catalogParts: CatalogParts | null = null;
+  if (host === 'claude' && CATALOG_MODE === 'trim') {
+    const trimmed = applyCatalogTrim(content, skillName);
+    if (trimmed) {
+      content = trimmed.content;
+      catalogParts = trimmed.parts;
+    }
+  }
+
+  return { outputPath, content, symlinkLoop, catalogParts };
 }

 // ─── Main ───────────────────────────────────────────────────
@@ -503,6 +713,14 @@ for (const currentHost of hostsToRun) {
    let hasChanges = false;
    const tokenBudget: Array<{ skill: string; lines: number; tokens: number }> = [];

+    // T4 catalog trim: collect routing/voice parts across all Claude skills,
+    // then write scripts/proactive-suggestions.json once per gen-skill-docs run.
+    const proactiveAggregate: Record<string, {
+      lead: string;
+      routing: string;
+      voice_line: string | null;
+    }> = {};
+
    const currentHostConfig = getHostConfig(currentHost);
    for (const tmplPath of findTemplates()) {
      const dir = path.basename(path.dirname(tmplPath));
@@ -516,7 +734,24 @@ for (const currentHost of hostsToRun) {
        if (currentHostConfig.generation.skipSkills.includes(dir)) continue;
      }

-      const { outputPath, content, symlinkLoop } = processTemplate(tmplPath, currentHost);
+      const { outputPath, content, symlinkLoop, catalogParts } = processTemplate(tmplPath, currentHost);
+      if (catalogParts) {
+        // Root-skill detection: when the template lives at ROOT/SKILL.md.tmpl,
+        // path.basename(path.dirname(tmplPath)) returns the repo's directory
+        // name (e.g. "seville-v3" in a Conductor worktree, "gstack" on CI).
+        // That's non-deterministic across machines and breaks CI freshness
+        // checks. Use the frontmatter `name` field as the registry key — the
+        // root SKILL.md.tmpl declares `name: gstack` explicitly. For all other
+        // skills, `dir` matches the directory name which matches the
+        // frontmatter name by convention.
+        const isRoot = path.dirname(tmplPath) === ROOT;
+        const key = isRoot ? 'gstack' : dir;
+        proactiveAggregate[key] = {
+          lead: catalogParts.lead,
+          routing: catalogParts.routingProse,
+          voice_line: catalogParts.voiceLine,
+        };
+      }
      const relOutput = path.relative(ROOT, outputPath);

      if (symlinkLoop) {
@@ -620,6 +855,40 @@ The orchestrator will persist the plan link to its own memory/knowledge store.
      failures.push({ host: currentHost, error: new Error('Stale files detected') });
    }

+    // T4 catalog trim: write aggregated proactive-suggestions.json (Claude only).
+    // The JSON registry lets agents pull voice triggers / routing prose for any
+    // skill on demand instead of paying for it always-loaded in the catalog.
+    //
+    // No timestamp field — keeps the file content-deterministic across runs so
+    // CI dry-run freshness checks don't flap on regen. If a per-run timestamp
+    // is ever needed for debugging, write it to a separate `.gen-stamp` file.
+    if (currentHost === 'claude' && CATALOG_MODE === 'trim' && Object.keys(proactiveAggregate).length > 0 && !DRY_RUN) {
+      const proactivePath = path.join(ROOT, 'scripts', 'proactive-suggestions.json');
+      // Sort keys alphabetically so the serialized JSON is identical across
+      // machines regardless of filesystem-iteration order. Without this, CI
+      // freshness checks fail when the local dev machine and CI runner
+      // discover templates in different orders.
+      const sortedSkills: typeof proactiveAggregate = {};
+      for (const key of Object.keys(proactiveAggregate).sort()) {
+        sortedSkills[key] = proactiveAggregate[key];
+      }
+      const payload = {
+        $schema: 'https://gstack.dev/schemas/proactive-suggestions.json',
+        catalog_mode: 'trim',
+        note: 'Routing / voice-trigger prose extracted from SKILL.md frontmatter descriptions during catalog trim. Loaded on demand when routing guidance is needed.',
+        skills: sortedSkills,
+      };
+      const serialized = JSON.stringify(payload, null, 2) + '\n';
+      // Only write if content actually changed — prevents needless touches that
+      // would flap CI freshness checks. Read existing file, compare, skip write
+      // when identical.
+      let existing = '';
+      try { existing = fs.readFileSync(proactivePath, 'utf-8'); } catch { /* first run */ }
+      if (existing !== serialized) {
+        fs.writeFileSync(proactivePath, serialized);
+      }
+    }
+
    // Print token budget summary
    if (!DRY_RUN && tokenBudget.length > 0) {
      tokenBudget.sort((a, b) => b.lines - a.lines);
@@ -0,0 +1,272 @@
+{
+  "$schema": "https://gstack.dev/schemas/proactive-suggestions.json",
+  "catalog_mode": "trim",
+  "note": "Routing / voice-trigger prose extracted from SKILL.md frontmatter descriptions during catalog trim. Loaded on demand when routing guidance is needed.",
+  "skills": {
+    "autoplan": {
+      "lead": "Auto-review pipeline — reads the full CEO, design, eng, and DX review skills from disk and runs them sequentially with auto-decisions using 6 decision principles.",
+      "routing": "Surfaces\ntaste decisions (close approaches, borderline scope, codex disagreements) at a final\napproval gate. One command, fully reviewed plan out.\nUse when asked to \"auto review\", \"autoplan\", \"run all reviews\", \"review this plan\nautomatically\", or \"make the decisions for me\".\nProactively suggest when the user has a plan file and wants to run the full review\ngauntlet without answering 15-30 intermediate questions.",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"auto plan\", \"automatic review\"."
+    },
+    "benchmark": {
+      "lead": "Performance regression detection using the browse daemon.",
+      "routing": "Establishes\nbaselines for page load times, Core Web Vitals, and resource sizes.\nCompares before/after on every PR. Tracks performance trends over time.\nUse when: \"performance\", \"benchmark\", \"page speed\", \"lighthouse\", \"web vitals\",\n\"bundle size\", \"load time\".",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"speed test\", \"check performance\"."
+    },
+    "benchmark-models": {
+      "lead": "Cross-model benchmark for gstack skills.",
+      "routing": "Runs the same prompt through Claude,\nGPT (via Codex CLI), and Gemini side-by-side — compares latency, tokens, cost,\nand optionally quality via LLM judge. Answers \"which model is actually best\nfor this skill?\" with data instead of vibes. Separate from /benchmark, which\nmeasures web page performance. Use when: \"benchmark models\", \"compare models\",\n\"which model is best for X\", \"cross-model comparison\", \"model shootout\".",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"compare models\", \"model shootout\", \"which model is best\"."
+    },
+    "browse": {
+      "lead": "Fast headless browser for QA testing and site dogfooding.",
+      "routing": "Navigate any URL, interact with\nelements, verify page state, diff before/after actions, take annotated screenshots, check\nresponsive layouts, test forms and uploads, handle dialogs, and assert element states.\n~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a\nuser flow, or file a bug with evidence. Use when asked to \"open in browser\", \"test the\nsite\", \"take a screenshot\", or \"dogfood this\".",
+      "voice_line": null
+    },
+    "canary": {
+      "lead": "Post-deploy canary monitoring.",
+      "routing": "Watches the live app for console errors,\nperformance regressions, and page failures using the browse daemon. Takes\nperiodic screenshots, compares against pre-deploy baselines, and alerts\non anomalies. Use when: \"monitor deploy\", \"canary\", \"post-deploy check\",\n\"watch production\", \"verify deploy\".",
+      "voice_line": null
+    },
+    "careful": {
+      "lead": "Safety guardrails for destructive commands.",
+      "routing": "Warns before rm -rf, DROP TABLE,\nforce-push, git reset --hard, kubectl delete, and similar destructive operations.\nUser can override each warning. Use when touching prod, debugging live systems,\nor working in a shared environment. Use when asked to \"be careful\", \"safety mode\",\n\"prod mode\", or \"careful mode\".",
+      "voice_line": null
+    },
+    "codex": {
+      "lead": "OpenAI Codex CLI wrapper — three modes.",
+      "routing": "Code review: independent diff review via\ncodex review with pass/fail gate. Challenge: adversarial mode that tries to break\nyour code. Consult: ask codex anything with session continuity for follow-ups.\nThe \"200 IQ autistic developer\" second opinion. Use when asked to \"codex review\",\n\"codex challenge\", \"ask codex\", \"second opinion\", or \"consult codex\".",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"code x\", \"code ex\", \"get another opinion\"."
+    },
+    "context-restore": {
+      "lead": "Restore working context saved earlier by /context-save.",
+      "routing": "Loads the most recent\nsaved state (across all branches by default) so you can pick up where you\nleft off — even across Conductor workspace handoffs.\nUse when asked to \"resume\", \"restore context\", \"where was I\", or\n\"pick up where I left off\". Pair with /context-save.\nFormerly /checkpoint resume — renamed because Claude Code treats /checkpoint\nas a native rewind alias in current environments.",
+      "voice_line": null
+    },
+    "context-save": {
+      "lead": "Save working context.",
+      "routing": "Captures git state, decisions made, and remaining work\nso any future session can pick up without losing a beat.\nUse when asked to \"save progress\", \"save state\", \"context save\", or\n\"save my work\". Pair with /context-restore to resume later.\nFormerly /checkpoint — renamed because Claude Code treats /checkpoint as a\nnative rewind alias in current environments, which was shadowing this skill.",
+      "voice_line": null
+    },
+    "cso": {
+      "lead": "Chief Security Officer mode.",
+      "routing": "Infrastructure-first security audit: secrets archaeology,\ndependency supply chain, CI/CD pipeline security, LLM/AI security, skill supply chain\nscanning, plus OWASP Top 10, STRIDE threat modeling, and active verification.\nTwo modes: daily (zero-noise, 8/10 confidence gate) and comprehensive (monthly deep\nscan, 2/10 bar). Trend tracking across audit runs.\nUse when: \"security audit\", \"threat model\", \"pentest review\", \"OWASP\", \"CSO review\".",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"see-so\", \"see so\", \"security review\", \"security check\", \"vulnerability scan\", \"run security\"."
+    },
+    "design-consultation": {
+      "lead": "Design consultation: understands your product, researches the landscape, proposes a complete design system (aesthetic, typography, color, layout, spacing, motion), and generates font+color preview...",
+      "routing": "Creates DESIGN.md as your project's design source\nof truth. For existing sites, use /plan-design-review to infer the system instead.\nUse when asked to \"design system\", \"brand guidelines\", or \"create DESIGN.md\".\nProactively suggest when starting a new project's UI with no existing\ndesign system or DESIGN.md.",
+      "voice_line": null
+    },
+    "design-html": {
+      "lead": "Design finalization: generates production-quality Pretext-native HTML/CSS.",
+      "routing": "Works with approved mockups from /design-shotgun, CEO plans from /plan-ceo-review,\ndesign review context from /plan-design-review, or from scratch with a user\ndescription. Text actually reflows, heights are computed, layouts are dynamic.\n30KB overhead, zero deps. Smart API routing: picks the right Pretext patterns\nfor each design type. Use when: \"finalize this design\", \"turn this into HTML\",\n\"build me a page\", \"implement this design\", or after any planning skill.\nProactively suggest when user has approved a design or has a plan ready.",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"build the design\", \"code the mockup\", \"make it real\"."
+    },
+    "design-review": {
+      "lead": "Designer's eye QA: finds visual inconsistency, spacing issues, hierarchy problems, AI slop patterns, and slow interactions — then fixes them.",
+      "routing": "Iteratively fixes issues\nin source code, committing each fix atomically and re-verifying with before/after\nscreenshots. For plan-mode design review (before implementation), use /plan-design-review.\nUse when asked to \"audit the design\", \"visual QA\", \"check if it looks good\", or \"design polish\".\nProactively suggest when the user mentions visual inconsistencies or\nwants to polish the look of a live site.",
+      "voice_line": null
+    },
+    "design-shotgun": {
+      "lead": "Design shotgun: generate multiple AI design variants, open a comparison board, collect structured feedback, and iterate.",
+      "routing": "Standalone design exploration you can\nrun anytime. Use when: \"explore designs\", \"show me options\", \"design variants\",\n\"visual brainstorm\", or \"I don't like how this looks\".\nProactively suggest when the user describes a UI feature but hasn't seen\nwhat it could look like.",
+      "voice_line": null
+    },
+    "devex-review": {
+      "lead": "Live developer experience audit.",
+      "routing": "Uses the browse tool to actually TEST the\ndeveloper experience: navigates docs, tries the getting started flow, times\nTTHW, screenshots error messages, evaluates CLI help text. Produces a DX\nscorecard with evidence. Compares against /plan-devex-review scores if they\nexist (the boomerang: plan said 3 minutes, reality says 8). Use when asked to\n\"test the DX\", \"DX audit\", \"developer experience test\", or \"try the\nonboarding\". Proactively suggest after shipping a developer-facing feature.",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"dx audit\", \"test the developer experience\", \"try the onboarding\", \"developer experience test\"."
+    },
+    "document-generate": {
+      "lead": "Generate missing documentation from scratch for a feature, module, or entire project.",
+      "routing": "Uses the Diataxis framework (tutorial / how-to / reference / explanation) to produce\ncomplete, structured documentation. Can be invoked standalone or called by\n/document-release when it finds coverage gaps. Use when asked to \"write docs\",\n\"generate documentation\", \"document this feature\", \"create a tutorial\", or\n\"explain this module\".",
+      "voice_line": null
+    },
+    "document-release": {
+      "lead": "Post-ship documentation update.",
+      "routing": "Reads all project docs, cross-references the\ndiff, builds a Diataxis coverage map (reference/how-to/tutorial/explanation),\nupdates README/ARCHITECTURE/CONTRIBUTING/CLAUDE.md to match what shipped,\ndetects architecture diagram drift, polishes CHANGELOG voice with a sell-test\nrubric, cleans up TODOS, and optionally bumps VERSION. Surfaces documentation\ndebt in the PR body. Use when asked to \"update the docs\", \"sync documentation\",\nor \"post-ship docs\". Proactively suggest after a PR is merged or code is shipped.",
+      "voice_line": null
+    },
+    "freeze": {
+      "lead": "Restrict file edits to a specific directory for the session.",
+      "routing": "Blocks Edit and\nWrite outside the allowed path. Use when debugging to prevent accidentally\n\"fixing\" unrelated code, or when you want to scope changes to one module.\nUse when asked to \"freeze\", \"restrict edits\", \"only edit this folder\",\nor \"lock down edits\".",
+      "voice_line": null
+    },
+    "gstack": {
+      "lead": "Fast headless browser for QA testing and site dogfooding.",
+      "routing": "Navigate pages, interact with\nelements, verify state, diff before/after, take annotated screenshots, test responsive\nlayouts, forms, uploads, dialogs, and capture bug evidence. Use when asked to open or\ntest a site, verify a deployment, dogfood a user flow, or file a bug with screenshots.",
+      "voice_line": null
+    },
+    "gstack-upgrade": {
+      "lead": "Upgrade gstack to the latest version.",
+      "routing": "Detects global vs vendored install,\nruns the upgrade, and shows what's new. Use when asked to \"upgrade gstack\",\n\"update gstack\", or \"get latest version\".",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"upgrade the tools\", \"update the tools\", \"gee stack upgrade\", \"g stack upgrade\"."
+    },
+    "guard": {
+      "lead": "Full safety mode: destructive command warnings + directory-scoped edits.",
+      "routing": "Combines /careful (warns before rm -rf, DROP TABLE, force-push, etc.) with\n/freeze (blocks edits outside a specified directory). Use for maximum safety\nwhen touching prod or debugging live systems. Use when asked to \"guard mode\",\n\"full safety\", \"lock it down\", or \"maximum safety\".",
+      "voice_line": null
+    },
+    "health": {
+      "lead": "Code quality dashboard.",
+      "routing": "Wraps existing project tools (type checker, linter,\ntest runner, dead code detector, shell linter), computes a weighted composite\n0-10 score, and tracks trends over time. Use when: \"health check\",\n\"code quality\", \"how healthy is the codebase\", \"run all checks\",\n\"quality score\".",
+      "voice_line": null
+    },
+    "investigate": {
+      "lead": "Systematic debugging with root cause investigation.",
+      "routing": "Four phases: investigate,\nanalyze, hypothesize, implement. Iron Law: no fixes without root cause.\nUse when asked to \"debug this\", \"fix this bug\", \"why is this broken\",\n\"investigate this error\", or \"root cause analysis\".\nProactively invoke this skill (do NOT debug directly) when the user reports\nerrors, 500 errors, stack traces, unexpected behavior, \"it was working\nyesterday\", or is troubleshooting why something stopped working.",
+      "voice_line": null
+    },
+    "ios-clean": {
+      "lead": "Remove the DebugBridge SPM package and all #if DEBUG wiring from an iOS app.",
+      "routing": "Cleans up StateServer, DebugOverlay, accessor codegen output, and\napp-side hooks installed by /ios-qa. This is a convenience wrapper —\nthe structural Release-build guard (Package.swift conditional + CI\nswift build -c release check) is the safety-critical path.\nUse when asked to \"clean the iOS debug bridge\", \"remove DebugBridge\",\nor \"strip the gstack iOS instrumentation\".",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"clean the iOS debug bridge\", \"remove DebugBridge\", \"strip the gstack iOS instrumentation\"."
+    },
+    "ios-design-review": {
+      "lead": "Visual design audit for iOS apps on real hardware.",
+      "routing": "Connects to a real\niPhone via the same StateServer as /ios-qa, screenshots every screen,\nevaluates against Apple HIG, DESIGN.md, and design best practices. Scores\neach dimension 0-10 with \"what would make it a 10\" framing — mirrors\n/plan-design-review for browser. For plan-stage design review (before\nimplementation), use /plan-design-review. For live web visual audits, use\n/design-review.\nUse when asked to \"review the iOS design\", \"audit the iPhone app's\nvisuals\", or \"design QA the iOS app\".",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"review the iOS design\", \"audit the iPhone app's visuals\", \"design QA the iPhone app\"."
+    },
+    "ios-fix": {
+      "lead": "Autonomous iOS bug fixer.",
+      "routing": "Takes a bug found by /ios-qa, reads the source,\nwrites the fix, rebuilds, redeploys, and verifies the fix on the real\ndevice. Closes the loop: find bug → fix bug → confirm fix — zero human\nintervention. Captures the pre-bug state snapshot as a regression test\nfixture, so the bug can never recur silently.\nUse when /ios-qa reports a bug and you want it fixed automatically, or\nwhen asked to \"fix this iOS bug\", \"patch the iPhone app\", or \"auto-fix\nthe iOS issue\".",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"fix the iOS bug\", \"patch the iPhone app\", \"auto-fix the iOS issue\"."
+    },
+    "ios-qa": {
+      "lead": "Live-device iOS QA for SwiftUI apps.",
+      "routing": "Connects to a real iPhone via USB\nCoreDevice IPv6 tunnel, reads Swift source to understand every screen, then\nruns a vision-driven agent loop: screenshot → analyze → decide → act →\nverify → repeat. All interaction happens via HTTP to an embedded\nStateServer in the app under test. Optionally exposes the device over\nTailscale so remote agents (OpenClaw, Codex, any HTTP-capable agent) can\nrun iOS QA from anywhere without touching the hardware.\nUse when asked to \"ios qa\", \"test my iPhone app\", \"find bugs on the device\",\nor \"qa the iOS app\".",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"iOS quality check\", \"test the iPhone app\", \"run iOS QA\"."
+    },
+    "ios-sync": {
+      "lead": "Regenerate the iOS debug bridge against the latest upstream gstack templates.",
+      "routing": "Updates StateServer.swift, DebugOverlay.swift, Package.swift,\nand the typed @Observable state accessors. Use after you upgrade gstack\nor add new ViewModels/properties that need accessor coverage.\nUse when asked to \"resync the iOS debug bridge\", \"regenerate iOS\naccessors\", or \"update the gstack iOS instrumentation\".",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"resync the iOS debug bridge\", \"regenerate iOS accessors\", \"update the gstack iOS instrumentation\"."
+    },
+    "land-and-deploy": {
+      "lead": "Land and deploy workflow.",
+      "routing": "Merges the PR, waits for CI and deploy,\nverifies production health via canary checks. Takes over after /ship\ncreates the PR. Use when: \"merge\", \"land\", \"deploy\", \"merge and verify\",\n\"land it\", \"ship it to production\".",
+      "voice_line": null
+    },
+    "landing-report": {
+      "lead": "Read-only queue dashboard for workspace-aware ship.",
+      "routing": "Shows which VERSION slots\nare currently claimed by open PRs, which sibling Conductor workspaces have\nWIP work likely to ship soon, and what slot /ship would pick next. No\nmutations — just a snapshot. Use when asked to \"landing report\", \"what's in\nthe queue\", \"show me open PRs\", or \"which version do I claim next\".",
+      "voice_line": null
+    },
+    "learn": {
+      "lead": "Manage project learnings.",
+      "routing": "Review, search, prune, and export what gstack\nhas learned across sessions. Use when asked to \"what have we learned\",\n\"show learnings\", \"prune stale learnings\", or \"export learnings\".\nProactively suggest when the user asks about past patterns or wonders\n\"didn't we fix this before?\"",
+      "voice_line": null
+    },
+    "make-pdf": {
+      "lead": "Turn any markdown file into a publication-quality PDF.",
+      "routing": "Proper 1in margins,\nintelligent page breaks, page numbers, cover pages, running headers, curly\nquotes and em dashes, clickable TOC, diagonal DRAFT watermark. Not a draft\nartifact — a finished artifact. Use when asked to \"make a PDF\", \"export to\nPDF\", \"turn this markdown into a PDF\", or \"generate a document\".",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"make this a pdf\", \"make it a pdf\", \"export to pdf\", \"turn this into a pdf\", \"turn this markdown into a pdf\", \"generate a pdf\", \"make a pdf from\", \"pdf this markdown\"."
+    },
+    "office-hours": {
+      "lead": "YC Office Hours — two modes.",
+      "routing": "Startup mode: six forcing questions that expose\ndemand reality, status quo, desperate specificity, narrowest wedge, observation,\nand future-fit. Builder mode: design thinking brainstorming for side projects,\nhackathons, learning, and open source. Saves a design doc.\nUse when asked to \"brainstorm this\", \"I have an idea\", \"help me think through\nthis\", \"office hours\", or \"is this worth building\".\nProactively invoke this skill (do NOT answer directly) when the user describes\na new product idea, asks whether something is worth building, wants to think\nthrough design decisions for something that doesn't exist yet, or is exploring\na concept before any code is written.\nUse before /plan-ceo-review or /plan-eng-review.",
+      "voice_line": null
+    },
+    "open-gstack-browser": {
+      "lead": "Launch GStack Browser — AI-controlled Chromium with the sidebar extension baked in.",
+      "routing": "Opens a visible browser window where you can watch every action in real time.\nThe sidebar shows a live activity feed and chat. Anti-bot stealth built in.\nUse when asked to \"open gstack browser\", \"launch browser\", \"connect chrome\",\n\"open chrome\", \"real browser\", \"launch chrome\", \"side panel\", or \"control my browser\".",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"show me the browser\"."
+    },
+    "pair-agent": {
+      "lead": "Pair a remote AI agent with your browser.",
+      "routing": "One command generates a setup key and\nprints instructions the other agent can follow to connect. Works with OpenClaw,\nHermes, Codex, Cursor, or any agent that can make HTTP requests. The remote agent\ngets its own tab with scoped access (read+write by default, admin on request).\nUse when asked to \"pair agent\", \"connect agent\", \"share browser\", \"remote browser\",\n\"let another agent use my browser\", or \"give browser access\".",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"pair agent\", \"connect agent\", \"share my browser\", \"remote browser access\"."
+    },
+    "plan-ceo-review": {
+      "lead": "CEO/founder-mode plan review.",
+      "routing": "Rethink the problem, find the 10-star product,\nchallenge premises, expand scope when it creates a better product. Four modes:\nSCOPE EXPANSION (dream big), SELECTIVE EXPANSION (hold scope + cherry-pick\nexpansions), HOLD SCOPE (maximum rigor), SCOPE REDUCTION (strip to essentials).\nUse when asked to \"think bigger\", \"expand scope\", \"strategy review\", \"rethink this\",\nor \"is this ambitious enough\".\nProactively suggest when the user is questioning scope or ambition of a plan,\nor when the plan feels like it could be thinking bigger.",
+      "voice_line": null
+    },
+    "plan-design-review": {
+      "lead": "Designer's eye plan review — interactive, like CEO and Eng review.",
+      "routing": "Rates each design dimension 0-10, explains what would make it a 10,\nthen fixes the plan to get there. Works in plan mode. For live site\nvisual audits, use /design-review. Use when asked to \"review the design plan\"\nor \"design critique\".\nProactively suggest when the user has a plan with UI/UX components that\nshould be reviewed before implementation.",
+      "voice_line": null
+    },
+    "plan-devex-review": {
+      "lead": "Interactive developer experience plan review.",
+      "routing": "Explores developer personas,\nbenchmarks against competitors, designs magical moments, and traces friction\npoints before scoring. Three modes: DX EXPANSION (competitive advantage),\nDX POLISH (bulletproof every touchpoint), DX TRIAGE (critical gaps only).\nUse when asked to \"DX review\", \"developer experience audit\", \"devex review\",\nor \"API design review\".\nProactively suggest when the user has a plan for developer-facing products\n(APIs, CLIs, SDKs, libraries, platforms, docs).",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"dx review\", \"developer experience review\", \"devex review\", \"devex audit\", \"API design review\", \"onboarding review\"."
+    },
+    "plan-eng-review": {
+      "lead": "Eng manager-mode plan review.",
+      "routing": "Lock in the execution plan — architecture,\ndata flow, diagrams, edge cases, test coverage, performance. Walks through\nissues interactively with opinionated recommendations. Use when asked to\n\"review the architecture\", \"engineering review\", or \"lock in the plan\".\nProactively suggest when the user has a plan or design doc and is about to\nstart coding — to catch architecture issues before implementation.",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"tech review\", \"technical review\", \"plan engineering review\"."
+    },
+    "plan-tune": {
+      "lead": "Self-tuning question sensitivity + developer psychographic for gstack (v1: observational).",
+      "routing": "Review which AskUserQuestion prompts fire across gstack skills, set per-question preferences\n(never-ask / always-ask / ask-only-for-one-way), inspect the dual-track\nprofile (what you declared vs what your behavior suggests), and enable/disable\nquestion tuning. Conversational interface — no CLI syntax required.\n\nUse when asked to \"tune questions\", \"stop asking me that\", \"too many questions\",\n\"show my profile\", \"what questions have I been asked\", \"show my vibe\",\n\"developer profile\", or \"turn off question tuning\". \n\nProactively suggest when the user says the same gstack question has come up before,\nor when they explicitly override a recommendation for the Nth time.",
+      "voice_line": null
+    },
+    "qa": {
+      "lead": "Systematically QA test a web application and fix bugs found.",
+      "routing": "Runs QA testing,\nthen iteratively fixes bugs in source code, committing each fix atomically and\nre-verifying. Use when asked to \"qa\", \"QA\", \"test this site\", \"find bugs\",\n\"test and fix\", or \"fix what's broken\".\nProactively suggest when the user says a feature is ready for testing\nor asks \"does this work?\". Three tiers: Quick (critical/high only),\nStandard (+ medium), Exhaustive (+ cosmetic). Produces before/after health scores,\nfix evidence, and a ship-readiness summary. For report-only mode, use /qa-only.",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"quality check\", \"test the app\", \"run QA\"."
+    },
+    "qa-only": {
+      "lead": "Report-only QA testing.",
+      "routing": "Systematically tests a web application and produces a\nstructured report with health score, screenshots, and repro steps — but never\nfixes anything. Use when asked to \"just report bugs\", \"qa report only\", or\n\"test but don't fix\". For the full test-fix-verify loop, use /qa instead.\nProactively suggest when the user wants a bug report without any code changes.",
+      "voice_line": "Voice triggers (speech-to-text aliases): \"bug report\", \"just check for bugs\"."
+    },
+    "retro": {
+      "lead": "Weekly engineering retrospective.",
+      "routing": "Analyzes commit history, work patterns,\nand code quality metrics with persistent history and trend tracking.\nTeam-aware: breaks down per-person contributions with praise and growth areas.\nUse when asked to \"weekly retro\", \"what did we ship\", or \"engineering retrospective\".\nProactively suggest at the end of a work week or sprint.",
+      "voice_line": null
+    },
+    "review": {
+      "lead": "Pre-landing PR review.",
+      "routing": "Analyzes diff against the base branch for SQL safety, LLM trust\nboundary violations, conditional side effects, and other structural issues. Use when\nasked to \"review this PR\", \"code review\", \"pre-landing review\", or \"check my diff\".\nProactively suggest when the user is about to merge or land code changes.",
+      "voice_line": null
+    },
+    "scrape": {
+      "lead": "Pull data from a web page.",
+      "routing": "First call on a new intent prototypes the flow\nvia $B primitives and returns JSON. Subsequent calls on a matching intent\nroute to a codified browser-skill and return in ~200ms. Read-only — for\nmutating flows (form fills, clicks, submissions), use /automate.\nUse when asked to \"scrape\", \"get data from\", \"pull\", \"extract from\", or\n\"what's on\" a page.",
+      "voice_line": null
+    },
+    "setup-browser-cookies": {
+      "lead": "Import cookies from your real Chromium browser into the headless browse session.",
+      "routing": "Opens an interactive picker UI where you select which cookie domains to import.\nUse before QA testing authenticated pages. Use when asked to \"import cookies\",\n\"login to the site\", or \"authenticate the browser\".",
+      "voice_line": null
+    },
+    "setup-deploy": {
+      "lead": "Configure deployment settings for /land-and-deploy.",
+      "routing": "Detects your deploy\nplatform (Fly.io, Render, Vercel, Netlify, Heroku, GitHub Actions, custom),\nproduction URL, health check endpoints, and deploy status commands. Writes\nthe configuration to CLAUDE.md so all future deploys are automatic.\nUse when: \"setup deploy\", \"configure deployment\", \"set up land-and-deploy\",\n\"how do I deploy with gstack\", \"add deploy config\".",
+      "voice_line": null
+    },
+    "setup-gbrain": {
+      "lead": "Set up gbrain for this coding agent: install the CLI, initialize a local PGLite or Supabase brain, register MCP, capture per-remote trust policy.",
+      "routing": "One command from zero to \"gbrain is running, and this agent\ncan call it.\" Use when: \"setup gbrain\", \"connect gbrain\", \"start\ngbrain\", \"install gbrain\", \"configure gbrain for this machine\".",
+      "voice_line": null
+    },
+    "ship": {
+      "lead": "Ship workflow: detect + merge base branch, run tests, review diff, bump VERSION, update CHANGELOG, commit, push, create PR.",
+      "routing": "Use when asked to \"ship\", \"deploy\",\n\"push to main\", \"create a PR\", \"merge and push\", or \"get it deployed\".\nProactively invoke this skill (do NOT push/PR directly) when the user says code\nis ready, asks about deploying, wants to push code up, or asks to create a PR.",
+      "voice_line": null
+    },
+    "skillify": {
+      "lead": "Codify the most recent successful /scrape flow into a permanent browser-skill on disk.",
+      "routing": "Future /scrape calls with the same intent run\nthe codified script in ~200ms instead of re-driving the page. Walks\nback through the conversation, synthesizes script.ts + script.test.ts\n+ fixture, runs the test in a temp dir, and asks before committing.\nUse when asked to \"skillify\", \"codify\", \"save this scrape\", or\n\"make this permanent\".",
+      "voice_line": null
+    },
+    "spec": {
+      "lead": "Turn vague intent into a precise, executable spec in five phases.",
+      "routing": "Files the issue,\noptionally spawns a Claude Code agent in a fresh worktree, and lets /ship close\nthe source issue on merge. Use when asked to \"spec this out\", \"file an issue\",\n\"write up a ticket\", \"make this a GitHub issue\", or \"turn this into a backlog item\".",
+      "voice_line": null
+    },
+    "sync-gbrain": {
+      "lead": "Keep gbrain current with this repo's code and refresh agent search guidance in CLAUDE.md. Wraps the gstack-gbrain-sync orchestrator with state",
+      "routing": "probing, native code-surface registration, capability checks,\nand a verdict block. Re-runnable, idempotent. Use when: \"sync gbrain\",\n\"refresh gbrain\", \"re-index this repo\", \"gbrain search isn't finding\nthings\".",
+      "voice_line": null
+    },
+    "unfreeze": {
+      "lead": "Clear the freeze boundary set by /freeze, allowing edits to all directories again.",
+      "routing": "Use when you want to widen edit scope without ending the session.\nUse when asked to \"unfreeze\", \"unlock edits\", \"remove freeze\", or\n\"allow all edits\".",
+      "voice_line": null
+    }
+  }
+}
@@ -1,9 +1,20 @@
 /**
- * RESOLVERS record — maps {{PLACEHOLDER}} names to generator functions.
+ * RESOLVERS record — maps {{PLACEHOLDER}} names to generator functions
+ * or gated entries.
+ *
 * Each resolver takes a TemplateContext and returns the replacement string.
+ * Resolvers may be either a bare function (always fires) or a gated entry
+ * ({ resolve, appliesTo }) where appliesTo can return false to skip the
+ * resolver for a given skill. See ./types.ts: ResolverEntry.
+ *
+ * Most resolvers don't need a gate — the {{NAME}} placeholder system is
+ * already conditional at the template level (the resolver only fires for
+ * skills that reference it). Use a gate when you want a structural
+ * guardrail that says "this placeholder is meaningful only in skills X, Y, Z"
+ * even if someone later adds {{NAME}} to skill W.
 */

-import type { TemplateContext, ResolverFn } from './types';
+import type { TemplateContext, ResolverFn, ResolverValue } from './types';

 // Domain modules
 import { generatePreamble } from './preamble';
@@ -24,7 +35,7 @@ import { generateQuestionPreferenceCheck, generateQuestionLog, generateInlineTun
 import { generateMakePdfSetup } from './make-pdf';
 import { generateTasksSectionEmit, generateTasksSectionAggregate } from './tasks-section';

-export const RESOLVERS: Record<string, ResolverFn> = {
+export const RESOLVERS: Record<string, ResolverValue> = {
  SLUG_EVAL: generateSlugEval,
  SLUG_SETUP: generateSlugSetup,
  COMMAND_REFERENCE: generateCommandReference,
@@ -109,10 +109,10 @@ export function generatePreamble(ctx: TemplateContext): string {
    ...(tier >= 2 ? [
      generateContextRecovery(ctx),
      generateWritingStyle(ctx),
-      generateCompletenessSection(),
-      generateConfusionProtocol(),
+      generateCompletenessSection(ctx),
+      generateConfusionProtocol(ctx),
      generateContinuousCheckpoint(),
-      generateContextHealth(),
+      generateContextHealth(ctx),
      generateQuestionTuning(ctx),
    ] : []),
    ...(tier >= 3 ? [generateRepoModeSection(), generateSearchBeforeBuildingSection(ctx)] : []),
@@ -1,6 +1,7 @@
+import type { TemplateContext } from '../types';

-
-export function generateCompletenessSection(): string {
+export function generateCompletenessSection(ctx?: TemplateContext): string {
+  if (ctx?.explainLevel === 'terse') return '';
  return `## Completeness Principle — Boil the Lake

 AI makes completeness cheap. Recommend complete lakes (tests, edge cases, error paths); flag oceans (rewrites, multi-quarter migrations).
@@ -1,4 +1,7 @@
-export function generateConfusionProtocol(): string {
+import type { TemplateContext } from '../types';
+
+export function generateConfusionProtocol(ctx?: TemplateContext): string {
+  if (ctx?.explainLevel === 'terse') return '';
  return `## Confusion Protocol

 For high-stakes ambiguity (architecture, data model, destructive scope, missing context), STOP. Name it in one sentence, present 2-3 options with tradeoffs, and ask. Do not use for routine coding or obvious changes.`;
@@ -1,6 +1,7 @@
+import type { TemplateContext } from '../types';

-
-export function generateContextHealth(): string {
+export function generateContextHealth(ctx?: TemplateContext): string {
+  if (ctx?.explainLevel === 'terse') return '';
  return `## Context Health (soft directive)

 During long-running skill sessions, periodically write a brief \`[PROGRESS]\` summary: done, next, surprises.
@@ -90,6 +90,19 @@ _CHECKPOINT_MODE=$(${ctx.paths.binDir}/gstack-config get checkpoint_mode 2>/dev/
 _CHECKPOINT_PUSH=$(${ctx.paths.binDir}/gstack-config get checkpoint_push 2>/dev/null || echo "false")
 echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
 echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
+# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
+# Claude Code exposes plan mode via system reminders; we detect best-effort
+# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
+# fall back to "inactive". Codex hosts and Claude execution mode both end up
+# inactive, which is the safe default (defaults to file+execute pipeline).
+if [ -n "\${CLAUDE_PLAN_FILE:-}\${GSTACK_PLAN_MODE_FORCE:-}" ]; then
+  export GSTACK_PLAN_MODE="active"
+elif [ "\${GSTACK_PLAN_MODE:-}" = "active" ]; then
+  export GSTACK_PLAN_MODE="active"
+else
+  export GSTACK_PLAN_MODE="inactive"
+fi
+echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
 [ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true${ctx.host === 'gbrain' || ctx.host === 'hermes' ? `
 if command -v gbrain &>/dev/null; then
  _BRAIN_JSON=$(gbrain doctor --fast --json 2>/dev/null || echo '{}')
@@ -33,6 +33,7 @@ Key routing rules:
 - Ship/deploy/PR → invoke /ship or /land-and-deploy
 - Save progress → invoke /context-save
 - Resume context → invoke /context-restore
+- Author a backlog-ready spec/issue → invoke /spec
 \`\`\`

 Then commit the change: \`git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"\`
@@ -1,25 +1,24 @@
-import * as fs from 'fs';
-import * as path from 'path';
 import type { TemplateContext } from '../types';

-function loadJargonList(): string[] {
-  const jargonPath = path.join(__dirname, '..', '..', 'jargon-list.json');
-  try {
-    const raw = fs.readFileSync(jargonPath, 'utf-8');
-    const data = JSON.parse(raw);
-    if (Array.isArray(data?.terms)) return data.terms.filter((t: unknown): t is string => typeof t === 'string');
-  } catch {
-    // Missing or malformed: fall back to empty list. Writing Style block still fires,
-    // but with no terms to gloss — graceful degradation.
+/**
+ * Writing Style preamble section.
+ *
+ * v1.45.0.0 changes (T3):
+ * - Jargon list is referenced by path, not inlined. The 80-term list was
+ *   duplicated into every tier-2+ skill (~1.5-2 KB × 48 skills = ~80 KB
+ *   across the corpus). The pointer asks the agent to Read the JSON on
+ *   first jargon term encountered — one extra Read per session, but the
+ *   per-corpus payload is ~30 bytes.
+ * - When `ctx.explainLevel === 'terse'`, the entire section is replaced
+ *   with a one-line pointer. Saves ~1.5 KB per tier-2+ skill in the
+ *   opt-in terse build.
+ */
+export function generateWritingStyle(ctx: TemplateContext): string {
+  if (ctx.explainLevel === 'terse') {
+    return `## Writing Style\n\nTerse mode (build-time): skip jargon glossing, outcome-framing layer, and decision-impact closers. Lead with the answer.\n`;
  }
-  return [];
-}

-export function generateWritingStyle(_ctx: TemplateContext): string {
-  const terms = loadJargonList();
-  const jargonBlock = terms.length > 0
-    ? `Jargon list, gloss on first use if the term appears:\n${terms.map(t => `- ${t}`).join('\n')}`
-    : `Jargon list unavailable. Skip jargon glossing until \`scripts/jargon-list.json\` is restored.`;
+  const jargonPath = `${ctx.paths.skillRoot}/scripts/jargon-list.json`;

  return `## Writing Style (skip entirely if \`EXPLAIN_LEVEL: terse\` appears in the preamble echo OR the user's current message explicitly requests terse / no-explanations output)

@@ -32,6 +31,6 @@ Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format i
 - User-turn override wins: if the current message asks for terse / no explanations / just the answer, skip this section.
 - Terse mode (EXPLAIN_LEVEL: terse): no glosses, no outcome-framing layer, shorter responses.

-${jargonBlock}
+Curated jargon list lives at \`${jargonPath}\` (80+ terms). On the first jargon term you encounter this session, Read that file once; treat the \`terms\` array as the canonical list. The list is repo-owned and may grow between releases.
 `;
 }
@@ -62,7 +62,56 @@ export interface TemplateContext {
  preambleTier?: number;  // 1-4, controls which preamble sections are included
  model?: Model;  // model family for behavioral overlay. Omitted/undefined → no overlay.
  interactive?: boolean;  // true → emit plan-mode handshake in preamble. Generator-only, not written to SKILL.md.
+  /**
+   * Build-time compression mode. Defaults to 'default'.
+   *
+   * - 'default': full preamble prose ships as today (writing style, completeness,
+   *   confusion protocol, context health are all present).
+   * - 'terse': writing-style + completeness + confusion-protocol + context-health
+   *   sections are compressed to a one-line pointer at gen time. Saves ~3-5 KB
+   *   per tier-2+ skill. Opt-in via `--explain-level=terse` build flag for
+   *   users who want shipped skills to match their runtime preference and
+   *   avoid the per-session terse-mode prose.
+   *
+   * Default builds keep the runtime-conditional behavior intact (Writing Style
+   * section says "skip entirely if EXPLAIN_LEVEL: terse appears in preamble echo").
+   * Terse builds make the compression structural — bytes never ship in the first place.
+   */
+  explainLevel?: 'default' | 'terse';
 }

 /** Resolver function signature. args is populated for parameterized placeholders like {{INVOKE_SKILL:name}}. */
 export type ResolverFn = (ctx: TemplateContext, args?: string[]) => string;
+
+/**
+ * Optional gated resolver. When the gate returns false, the resolver is
+ * skipped (substituted with empty string) — same effect as the placeholder
+ * not being referenced. Use when a resolver's output is only meaningful for
+ * a known subset of skills, so future template authors get a structural
+ * guardrail instead of relying on social knowledge.
+ *
+ * Most resolvers don't need this — the {{NAME}} placeholder system is
+ * already conditional at the template level. Use only when a resolver
+ * lives inside another resolver (e.g. via preamble composition) AND must
+ * be conditionalized, or when a top-level resolver has a small, well-defined
+ * audience.
+ */
+export interface ResolverEntry {
+  resolve: ResolverFn;
+  appliesTo?: (ctx: TemplateContext) => boolean;
+}
+
+/** Anything the RESOLVERS map accepts — either a bare function or a gated entry. */
+export type ResolverValue = ResolverFn | ResolverEntry;
+
+/**
+ * Type-narrowing helper for the gen-skill-docs lookup.
+ * Returns (resolverFn, gate) so callers can do gate?.(ctx) before invoking.
+ */
+export function unwrapResolver(entry: ResolverValue): {
+  resolve: ResolverFn;
+  appliesTo?: (ctx: TemplateContext) => boolean;
+} {
+  if (typeof entry === 'function') return { resolve: entry };
+  return { resolve: entry.resolve, appliesTo: entry.appliesTo };
+}