Merge remote-tracking branch 'origin/main' into garrytan/plan-review-regressions

2026-05-05 05:05:08 +02:00 · 2026-04-22 12:29:35 -07:00
parent 5fe1814310 656df0e37e
commit 00e8a8599c
81 changed files with 4209 additions and 857 deletions
@@ -13,6 +13,7 @@

 export const ALL_MODEL_NAMES = [
  'claude',
+  'opus-4-7',
  'gpt',
  'gpt-5.4',
  'gemini',
@@ -51,6 +52,7 @@ export function resolveModel(input: string): Model | null {
  if (/^gpt-5\.4(-|$)/.test(s)) return 'gpt-5.4';
  if (/^gpt(-|$)/.test(s)) return 'gpt';
  if (/^o[0-9]+(-|$)/.test(s)) return 'o-series';
+  if (/^claude-opus-4-7(-|$)/.test(s)) return 'opus-4-7';
  if (/^claude(-|$)/.test(s)) return 'claude';
  if (/^gemini(-|$)/.test(s)) return 'gemini';

@@ -20,23 +20,44 @@ If A: Append this section to the end of CLAUDE.md:

 ## Skill routing

-When the user's request matches an available skill, ALWAYS invoke it using the Skill
-tool as your FIRST action. Do NOT answer directly, do NOT use other tools first.
-The skill has specialized workflows that produce better results than ad-hoc answers.
+When the user's request matches an available skill, invoke it via the Skill tool. The
+skill has multi-step workflows, checklists, and quality gates that produce better
+results than an ad-hoc answer. When in doubt, invoke the skill. A false positive is
+cheaper than a false negative.

 Key routing rules:
- Product ideas, "is this worth building", brainstorming → invoke office-hours
- Bugs, errors, "why is this broken", 500 errors → invoke investigate
- Ship, deploy, push, create PR → invoke ship
- QA, test the site, find bugs → invoke qa
- Code review, check my diff → invoke review
- Update docs after shipping → invoke document-release
- Weekly retro → invoke retro
- Design system, brand → invoke design-consultation
- Visual audit, design polish → invoke design-review
- Architecture review → invoke plan-eng-review
- Save progress, checkpoint, resume → invoke checkpoint
- Code quality, health check → invoke health
+- Product ideas, "is this worth building", brainstorming → invoke /office-hours
+- Strategy, scope, "think bigger", "what should we build" → invoke /plan-ceo-review
+- Architecture, "does this design make sense" → invoke /plan-eng-review
+- Design system, brand, "how should this look" → invoke /design-consultation
+- Design review of a plan → invoke /plan-design-review
+- Developer experience of a plan → invoke /plan-devex-review
+- "Review everything", full review pipeline → invoke /autoplan
+- Bugs, errors, "why is this broken", "wtf", "this doesn't work" → invoke /investigate
+- Test the site, find bugs, "does this work" → invoke /qa (or /qa-only for report only)
+- Code review, check the diff, "look at my changes" → invoke /review
+- Visual polish, design audit, "this looks off" → invoke /design-review
+- Developer experience audit, try onboarding → invoke /devex-review
+- Ship, deploy, create a PR, "send it" → invoke /ship
+- Merge + deploy + verify → invoke /land-and-deploy
+- Configure deployment → invoke /setup-deploy
+- Post-deploy monitoring → invoke /canary
+- Update docs after shipping → invoke /document-release
+- Weekly retro, "how'd we do" → invoke /retro
+- Second opinion, codex review → invoke /codex
+- Safety mode, careful mode, lock it down → invoke /careful or /guard
+- Restrict edits to a directory → invoke /freeze or /unfreeze
+- Upgrade gstack → invoke /gstack-upgrade
+- Save progress, "save my work" → invoke /context-save
+- Resume, restore, "where was I" → invoke /context-restore
+- Security audit, OWASP, "is this secure" → invoke /cso
+- Make a PDF, document, publication → invoke /make-pdf
+- Launch real browser for QA → invoke /open-gstack-browser
+- Import cookies for authenticated testing → invoke /setup-browser-cookies
+- Performance regression, page speed, benchmarks → invoke /benchmark
+- Review what gstack has learned → invoke /learn
+- Tune question sensitivity → invoke /plan-tune
+- Code quality dashboard → invoke /health
 \`\`\`

 Then commit the change: \`git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"\`
@@ -46,4 +67,3 @@ Say "No problem. You can add routing rules later by running \`gstack-config set

 This only happens once per project. If \`HAS_ROUTING\` is \`yes\` or \`ROUTING_DECLINED\` is \`true\`, skip this entirely.`;
 }
-
@@ -55,6 +55,10 @@ Avoid filler, throat-clearing, generic optimism, founder cosplay, and unsupporte
 - Stay curious, not lecturing. "What's interesting here is..." beats "It is important to understand..."
 - End with what to do. Give the action.

+**Example of the right voice:**
+"auth.ts:47 returns undefined when the session cookie expires. Your users hit a white screen. Fix: add a null check and redirect to /login. Two lines. Want me to fix it?"
+Not: "I've identified a potential issue in the authentication flow that may cause problems for some users under certain conditions. Let me explain the approach I'd recommend..."
+
 **Final test:** does this sound like a real cross-functional builder who wants to help someone make something people want, ship it, and make it actually work?`;
 }

@@ -369,7 +369,7 @@ Minimum 0 per category.
 export function generateCoAuthorTrailer(ctx: TemplateContext): string {
  const { getHostConfig } = require('../../hosts/index');
  const hostConfig = getHostConfig(ctx.host);
-  return hostConfig.coAuthorTrailer || 'Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>';
+  return hostConfig.coAuthorTrailer || 'Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>';
 }

 export function generateChangelogWorkflow(_ctx: TemplateContext): string {
@@ -11,48 +11,55 @@
 *   bun run slop:diff origin/release  # diff against another base
 */

-import { spawnSync } from 'child_process';
-import * as fs from 'fs';
-import * as os from 'os';
-import * as path from 'path';
+import { spawnSync } from "child_process";
+import * as fs from "fs";
+import * as os from "os";
+import * as path from "path";

-const base = process.argv[2] || 'main';
+const base = process.argv[2] || "main";

 // 1. Find changed files
-const diffResult = spawnSync('git', ['diff', '--name-only', `${base}...HEAD`], {
-  encoding: 'utf-8', timeout: 10000,
+const diffResult = spawnSync("git", ["diff", "--name-only", `${base}...HEAD`], {
+  encoding: "utf-8",
+  timeout: 10000,
 });
 const changedFiles = new Set(
-  (diffResult.stdout || '').trim().split('\n').filter(Boolean)
+  (diffResult.stdout || "").trim().split("\n").filter(Boolean),
 );
 if (changedFiles.size === 0) {
-  console.log('No files changed vs', base, '— nothing to check.');
+  console.log("No files changed vs", base, "— nothing to check.");
  process.exit(0);
 }

 // 2. Run slop-scan on HEAD
-const scanHead = spawnSync('npx', ['slop-scan', 'scan', '.', '--json'], {
-  encoding: 'utf-8', timeout: 120000, shell: true,
+const scanHead = spawnSync("npx", ["slop-scan", "scan", ".", "--json"], {
+  encoding: "utf-8",
+  timeout: 120000,
+  shell: process.platform === "win32",
 });
 if (!scanHead.stdout) {
-  console.log('slop-scan not available. Install: npm i -g slop-scan');
+  console.log("slop-scan not available. Install: npm i -g slop-scan");
  process.exit(0);
 }
 let headReport: any;
-try { headReport = JSON.parse(scanHead.stdout); } catch {
-  console.log('slop-scan returned invalid JSON.'); process.exit(0);
+try {
+  headReport = JSON.parse(scanHead.stdout);
+} catch {
+  console.log("slop-scan returned invalid JSON.");
+  process.exit(0);
 }

 // 3. Get base branch findings using git stash approach
 //    Check out base versions of changed files, scan, then restore
-const mergeBase = spawnSync('git', ['merge-base', base, 'HEAD'], {
-  encoding: 'utf-8', timeout: 5000,
+const mergeBase = spawnSync("git", ["merge-base", base, "HEAD"], {
+  encoding: "utf-8",
+  timeout: 5000,
 }).stdout?.trim();

 // Fingerprint: strip line numbers so shifting code doesn't create false positives
 // "line 142: empty catch, boundary=none" -> "empty catch, boundary=none"
 function stripLineNum(evidence: string): string {
-  return evidence.replace(/^line \d+: /, '').replace(/ at line \d+ /, ' ');
+  return evidence.replace(/^line \d+: /, "").replace(/ at line \d+ /, " ");
 }

 // Count evidence items per (rule, file, stripped-evidence) for the base
@@ -61,27 +68,40 @@ const baseCounts = new Map<string, number>();
 if (mergeBase) {
  // Create temp worktree for base scan
  const tmpWorktree = path.join(os.tmpdir(), `slop-base-${Date.now()}`);
-  const wtResult = spawnSync('git', ['worktree', 'add', '--detach', tmpWorktree, mergeBase], {
-    encoding: 'utf-8', timeout: 30000,
-  });
+  const wtResult = spawnSync(
+    "git",
+    ["worktree", "add", "--detach", tmpWorktree, mergeBase],
+    {
+      encoding: "utf-8",
+      timeout: 30000,
+    },
+  );

  if (wtResult.status === 0) {
    // Copy slop-scan config if it exists
-    const configFile = 'slop-scan.config.json';
+    const configFile = "slop-scan.config.json";
    if (fs.existsSync(configFile)) {
-      try { fs.copyFileSync(configFile, path.join(tmpWorktree, configFile)); } catch {}
+      try {
+        fs.copyFileSync(configFile, path.join(tmpWorktree, configFile));
+      } catch {}
    }

-    const scanBase = spawnSync('npx', ['slop-scan', 'scan', tmpWorktree, '--json'], {
-      encoding: 'utf-8', timeout: 120000, shell: true,
-    });
+    const scanBase = spawnSync(
+      "npx",
+      ["slop-scan", "scan", tmpWorktree, "--json"],
+      {
+        encoding: "utf-8",
+        timeout: 120000,
+        shell: process.platform === "win32",
+      },
+    );

    if (scanBase.stdout) {
      try {
        const baseReport = JSON.parse(scanBase.stdout);
        for (const f of baseReport.findings) {
          // Remap worktree paths back to repo-relative
-          const realPath = f.path.replace(tmpWorktree + '/', '');
+          const realPath = f.path.replace(tmpWorktree + "/", "");
          if (!changedFiles.has(realPath)) continue;
          for (const ev of f.evidence || []) {
            const key = `${f.ruleId}|${realPath}|${stripLineNum(ev)}`;
@@ -92,7 +112,7 @@ if (mergeBase) {
    }

    // Clean up worktree
-    spawnSync('git', ['worktree', 'remove', '--force', tmpWorktree], {
+    spawnSync("git", ["worktree", "remove", "--force", tmpWorktree], {
      timeout: 10000,
    });
  }
@@ -102,7 +122,9 @@ if (mergeBase) {
 //    For each evidence item on HEAD, check if the base had the same (rule, file, stripped-evidence).
 //    Use counts to handle duplicates: if base had 2 and HEAD has 3, that's 1 new.
 const headCounts = new Map<string, { count: number; evidence: string[] }>();
-const headFindings = headReport.findings.filter((f: any) => changedFiles.has(f.path));
+const headFindings = headReport.findings.filter((f: any) =>
+  changedFiles.has(f.path),
+);

 for (const f of headFindings) {
  for (const ev of f.evidence || []) {
@@ -123,7 +145,7 @@ for (const [key, entry] of headCounts) {
  const baseCount = baseCounts.get(key) || 0;
  const netNew = entry.count - baseCount;
  if (netNew > 0) {
-    const [ruleId, filePath] = key.split('|');
+    const [ruleId, filePath] = key.split("|");
    // Take the last N evidence items as the "new" ones
    for (const ev of entry.evidence.slice(-netNew)) {
      newFindings.push({ ruleId, filePath, evidence: ev });
@@ -139,14 +161,20 @@ for (const [key, baseCount] of baseCounts) {
 // 5. Print results
 if (newFindings.length === 0) {
  if (removedCount > 0) {
-    console.log(`\n  slop-scan: no new findings. Removed ${removedCount} pre-existing findings.\n`);
+    console.log(
+      `\n  slop-scan: no new findings. Removed ${removedCount} pre-existing findings.\n`,
+    );
  } else {
-    console.log(`\n  slop-scan: no new findings in ${changedFiles.size} changed files.\n`);
+    console.log(
+      `\n  slop-scan: no new findings in ${changedFiles.size} changed files.\n`,
+    );
  }
  process.exit(0);
 }

-console.log(`\n── slop-scan: ${newFindings.length} new findings (+${newFindings.length} / -${removedCount}) ──\n`);
+console.log(
+  `\n── slop-scan: ${newFindings.length} new findings (+${newFindings.length} / -${removedCount}) ──\n`,
+);

 // Group by file, then by rule
 const grouped = new Map<string, Map<string, string[]>>();