v1.26.3.0 feat: /sync-gbrain skill + native code-surface orchestrator (#1314)

* feat: native gbrain code-surface orchestrator + ensureSourceRegistered helper Replaces gbrain import (markdown only) with gbrain sources add + sync --strategy code (or reindex-code on --full). Adds lib/gbrain-sources.ts exporting ensureSourceRegistered/probeSource/sourcePageCount, plus lock file + tmp-rename atomicity + dry-run write skip in the orchestrator. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat: setup-gbrain Step 8 writes ## GBrain Search Guidance after smoke test Extends Step 8 to write a machine-agnostic guidance block that teaches the agent when to prefer gbrain CLI (search/query/code-def/code-refs/ code-callers/code-callees) over Grep. Gated on smoke test pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat: /sync-gbrain skill — keep gbrain current and refresh agent guidance New top-level skill that wraps gstack-gbrain-sync with state probing, capability check (write+search round-trip, not gbrain doctor), CLAUDE.md guidance lifecycle (write iff healthy, remove iff broken), and a per-source verdict block. Re-runnable, idempotent. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * feat: preamble emits gbrain-availability block when capability ok Extends generate-brain-sync-block.ts to emit Variant A (steady-state, 4 lines) when cwd page_count > 0 or Variant B (empty-corpus emergency, 3 lines) when 0; empty string otherwise. Reads cached page_count from .gbrain-sync-state.json (handles pretty + compact JSON). Refreshes ship golden fixtures and bumps the plan-review preamble byte budget to 35K to absorb the new block. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * docs: register /sync-gbrain in AGENTS.md and docs/skills.md Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * chore: regenerate SKILL.md across all hosts (gen:skill-docs) Mechanical regeneration after preamble + setup-gbrain template + new sync-gbrain skill. Run via: bun run gen:skill-docs --host all. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * chore: bump version and changelog (v1.26.3.0) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * docs: add /sync-gbrain to README skills table and gbrain section Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 06:26:45 +02:00 · 2026-05-04 09:29:48 -07:00
parent 30fe6bb11c
commit db9447c333
59 changed files with 3412 additions and 79 deletions
@@ -4,29 +4,38 @@
 *
 * Orchestrates three storage tiers per plan §"Storage tiering":
 *
- *   1. Code (current repo)         → gbrain import (Supabase or local PGLite)
+ *   1. Code (current repo)         → `gbrain sources add` (idempotent via
+ *                                    lib/gbrain-sources.ts) + `gbrain sync
+ *                                    --strategy code` (incremental) or
+ *                                    `gbrain reindex-code --yes` (--full).
+ *                                    NEVER `gbrain import` (markdown only).
 *   2. Transcripts + curated memory → gstack-memory-ingest (typed put_page)
 *   3. Curated artifacts to git    → gstack-brain-sync (existing pipeline)
 *
 * Modes:
 *   --incremental (default) — mtime fast-path; runs all 3 stages with cache hits
- *   --full                  — first-run; full walk + import; honest budget per ED2
- *   --dry-run               — preview what would sync; no writes
+ *   --full                  — first-run; full walk + reindex; honest budget per ED2
+ *   --dry-run               — preview what would sync; no writes anywhere (incl. state file)
 *
- * --watch (V1.5 P0 TODO): file-watcher daemon. Deferred per Codex F3 ("no daemon"
- * invariant). For V1, continuous sync rides the preamble-boundary hook only.
+ * Concurrency safety per /plan-eng-review D1:
+ *   - Lock file at ~/.gstack/.sync-gbrain.lock (PID + start ts).
+ *   - Stale-lock takeover after 5 min (process death).
+ *   - State file written via tmp+rename for atomicity.
+ *   - Lock released in finally; SIGINT/SIGTERM trapped for cleanup.
 *
- * Cross-repo TODO (V1.5): when gbrain CLI ships `put_file` + `restore-from-sync`,
- * this helper picks them up via version probe (Codex F6 + D9) and routes
- * code/transcripts to Supabase Storage instead of put_page.
+ * --watch (V1.5 P0 TODO): file-watcher daemon. NOTE: gbrain v0.25.1 already
+ * ships `gbrain sync --watch [--interval N]` and `gbrain sync --install-cron`;
+ * when revisited, /sync-gbrain --watch wires through to the gbrain CLI rather
+ * than building a gstack-side daemon.
 */

-import { existsSync, statSync, mkdirSync, writeFileSync, readFileSync } from "fs";
+import { existsSync, statSync, mkdirSync, writeFileSync, readFileSync, unlinkSync, renameSync } from "fs";
 import { join, dirname } from "path";
-import { execSync, spawnSync } from "child_process";
+import { execSync, execFileSync, spawnSync } from "child_process";
 import { homedir } from "os";

-import { detectEngineTier, withErrorContext } from "../lib/gstack-memory-helpers";
+import { detectEngineTier, withErrorContext, canonicalizeRemote } from "../lib/gstack-memory-helpers";
+import { sourcePageCount } from "../lib/gbrain-sources";

 // ── Types ──────────────────────────────────────────────────────────────────

@@ -41,12 +50,22 @@ interface CliArgs {
  codeOnly: boolean;
 }

+interface CodeStageDetail {
+  source_id?: string;
+  source_path?: string;
+  page_count?: number | null;
+  last_imported?: string;
+  status?: "ok" | "skipped" | "failed";
+}
+
 interface StageResult {
  name: string;
  ran: boolean;
  ok: boolean;
  duration_ms: number;
  summary: string;
+  /** Stage-specific structured detail. Code stage carries source_id + page_count. */
+  detail?: CodeStageDetail;
 }

 // ── Constants ──────────────────────────────────────────────────────────────
@@ -54,6 +73,8 @@ interface StageResult {
 const HOME = homedir();
 const GSTACK_HOME = process.env.GSTACK_HOME || join(HOME, ".gstack");
 const STATE_PATH = join(GSTACK_HOME, ".gbrain-sync-state.json");
+const LOCK_PATH = join(GSTACK_HOME, ".sync-gbrain.lock");
+const STALE_LOCK_MS = 5 * 60 * 1000;

 // ── CLI ────────────────────────────────────────────────────────────────────

@@ -62,18 +83,18 @@ function printUsage(): void {

 Modes:
  --incremental        Default. mtime fast-path; ~50ms steady-state.
-  --full               First-run; full walk + import. Honest ~25-35 min for big Macs (ED2).
-  --dry-run            Preview what would sync; no writes.
+  --full               First-run; full walk + reindex. Honest ~25-35 min for big Macs (ED2).
+  --dry-run            Preview what would sync; no writes anywhere.

 Options:
  --quiet              Suppress per-stage output.
-  --no-code            Skip the gbrain import (current repo) stage.
+  --no-code            Skip the cwd code-import stage.
  --no-memory          Skip the gstack-memory-ingest stage (transcripts + artifacts).
  --no-brain-sync      Skip the gstack-brain-sync git pipeline stage.
-  --code-only          Only run the gbrain import stage (alias for --no-memory --no-brain-sync).
+  --code-only          Only run the code-import stage (alias for --no-memory --no-brain-sync).
  --help               This text.

-Stages run in order: code import → memory ingest → curated git push.
+Stages run in order: code → memory ingest → curated git push.
 Each stage failure is non-fatal; subsequent stages still run.
 `);
 }
@@ -116,7 +137,7 @@ function parseArgs(): CliArgs {
  return { mode, quiet, noCode, noMemory, noBrainSync, codeOnly };
 }

-// ── Stage runners ──────────────────────────────────────────────────────────
+// ── Helpers ────────────────────────────────────────────────────────────────

 function repoRoot(): string | null {
  try {
@@ -127,6 +148,32 @@ function repoRoot(): string | null {
  }
 }

+function originUrl(): string | null {
+  try {
+    const out = execSync("git remote get-url origin", { encoding: "utf-8", timeout: 2000 });
+    return out.trim();
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Derive a stable source id for the cwd code corpus. Pattern: `gstack-code-<slug>`,
+ * where <slug> comes from canonicalizeRemote() then `/` → `-` (e.g.,
+ * `github.com/garrytan/gstack` → `gstack-code-github-com-garrytan-gstack`).
+ *
+ * Falls back to `gstack-code-<basename(repo)>` when there is no origin (local repo).
+ */
+function deriveCodeSourceId(repoPath: string): string {
+  const remote = canonicalizeRemote(originUrl());
+  if (remote) {
+    return `gstack-code-${remote.replace(/[\/\s]+/g, "-").replace(/-+/g, "-")}`;
+  }
+  // Fallback for repos without a remote.
+  const base = repoPath.split("/").pop() || "repo";
+  return `gstack-code-${base.toLowerCase().replace(/[^a-z0-9-]+/g, "-").replace(/-+/g, "-")}`;
+}
+
 function gbrainAvailable(): boolean {
  try {
    execSync("command -v gbrain", { stdio: "ignore" });
@@ -136,6 +183,55 @@ function gbrainAvailable(): boolean {
  }
 }

+// ── Lock file (D1) ─────────────────────────────────────────────────────────
+
+interface LockInfo {
+  pid: number;
+  started_at: string;
+}
+
+function acquireLock(): boolean {
+  mkdirSync(GSTACK_HOME, { recursive: true });
+  if (existsSync(LOCK_PATH)) {
+    // Check if stale.
+    try {
+      const stat = statSync(LOCK_PATH);
+      const ageMs = Date.now() - stat.mtimeMs;
+      if (ageMs > STALE_LOCK_MS) {
+        // Stale; take over.
+        unlinkSync(LOCK_PATH);
+      } else {
+        return false;
+      }
+    } catch {
+      // Cannot stat; bail conservatively.
+      return false;
+    }
+  }
+  const info: LockInfo = { pid: process.pid, started_at: new Date().toISOString() };
+  try {
+    writeFileSync(LOCK_PATH, JSON.stringify(info), { encoding: "utf-8", flag: "wx" });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+function releaseLock(): void {
+  try {
+    if (!existsSync(LOCK_PATH)) return;
+    const raw = readFileSync(LOCK_PATH, "utf-8");
+    const info = JSON.parse(raw) as LockInfo;
+    if (info.pid === process.pid) {
+      unlinkSync(LOCK_PATH);
+    }
+  } catch {
+    // Best-effort cleanup.
+  }
+}
+
+// ── Stage runners ──────────────────────────────────────────────────────────
+
 function runCodeImport(args: CliArgs): StageResult {
  const t0 = Date.now();
  const root = repoRoot();
@@ -145,42 +241,135 @@ function runCodeImport(args: CliArgs): StageResult {
  if (!gbrainAvailable()) {
    return { name: "code", ran: false, ok: false, duration_ms: 0, summary: "skipped (gbrain CLI not in PATH)" };
  }
+
+  const sourceId = deriveCodeSourceId(root);
+
  if (args.mode === "dry-run") {
-    return { name: "code", ran: false, ok: true, duration_ms: 0, summary: `would: gbrain import ${root} --no-embed` };
-  }
-
-  const importArgs = ["import", root, "--no-embed"];
-  if (args.mode === "incremental") {
-    // gbrain import is itself idempotent on re-import; --incremental flag if it supports
-    importArgs.push("--incremental");
-  }
-
-  try {
-    spawnSync("gbrain", importArgs, {
-      stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
-      timeout: 5 * 60 * 1000,
-    });
-    // Trigger background embedding catch-up
-    spawnSync("gbrain", ["embed", "--stale"], {
-      stdio: ["ignore", "ignore", "ignore"],
-      timeout: 1000, // background spawn; don't wait
-    });
    return {
      name: "code",
-      ran: true,
+      ran: false,
      ok: true,
-      duration_ms: Date.now() - t0,
-      summary: `imported ${root}`,
+      duration_ms: 0,
+      summary: `would: gbrain sources add ${sourceId} --path ${root} --federated; gbrain sync --strategy code --source ${sourceId}`,
+      detail: { source_id: sourceId, source_path: root, status: "skipped" },
    };
+  }
+
+  // Step 1: Ensure source registered (idempotent).
+  let registered = false;
+  try {
+    // ensureSourceRegistered is async — but we're in a sync stage runner. Use a deasync pattern.
+    // Bun supports top-level await in main(), but stage runners are sync per orchestrator contract.
+    // Workaround: run as a child Bun script for the registration probe.
+    // Simpler: call gbrain CLI directly via the sync helpers in lib/gbrain-sources.ts probeSource.
+    // For symmetry, we duplicate the small ensureSourceRegistered logic synchronously here using
+    // execFileSync. (The lib helper is preferred for async callers; sync helpers below.)
+    registered = ensureSourceRegisteredSync(sourceId, root);
  } catch (err) {
    return {
      name: "code",
      ran: true,
      ok: false,
      duration_ms: Date.now() - t0,
-      summary: `gbrain import failed: ${(err as Error).message}`,
+      summary: `source registration failed: ${(err as Error).message}`,
+      detail: { source_id: sourceId, source_path: root, status: "failed" },
    };
  }
+
+  // Step 2: Run sync or reindex.
+  const syncArgs = args.mode === "full"
+    ? ["reindex-code", "--source", sourceId, "--yes"]
+    : ["sync", "--strategy", "code", "--source", sourceId];
+
+  const syncResult = spawnSync("gbrain", syncArgs, {
+    stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
+    timeout: 35 * 60 * 1000,
+  });
+
+  if (syncResult.status !== 0) {
+    return {
+      name: "code",
+      ran: true,
+      ok: false,
+      duration_ms: Date.now() - t0,
+      summary: `gbrain ${syncArgs.join(" ")} exited ${syncResult.status}`,
+      detail: { source_id: sourceId, source_path: root, status: "failed" },
+    };
+  }
+
+  // Step 3: Read page_count from gbrain sources list.
+  const pageCount = sourcePageCount(sourceId);
+
+  return {
+    name: "code",
+    ran: true,
+    ok: true,
+    duration_ms: Date.now() - t0,
+    summary: `${registered ? "registered + " : ""}synced ${sourceId} (page_count=${pageCount ?? "unknown"})`,
+    detail: {
+      source_id: sourceId,
+      source_path: root,
+      page_count: pageCount,
+      last_imported: new Date().toISOString(),
+      status: "ok",
+    },
+  };
+}
+
+/**
+ * Synchronous mirror of ensureSourceRegistered for use inside the synchronous
+ * stage runner. Returns true if registration changed (added or re-added).
+ */
+function ensureSourceRegisteredSync(id: string, path: string): boolean {
+  // Probe.
+  let probeOut: string;
+  try {
+    probeOut = execFileSync("gbrain", ["sources", "list", "--json"], {
+      encoding: "utf-8",
+      timeout: 10_000,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+  } catch (err) {
+    const e = err as NodeJS.ErrnoException & { stderr?: Buffer };
+    const stderr = e.stderr?.toString() || "";
+    if (e.code === "ENOENT") throw new Error("gbrain CLI not on PATH");
+    if (stderr.includes("Cannot connect to database") || stderr.includes("config.json")) {
+      throw new Error("gbrain not configured (run /setup-gbrain)");
+    }
+    throw err;
+  }
+
+  let parsed: { sources?: Array<{ id?: string; local_path?: string }> };
+  try {
+    parsed = JSON.parse(probeOut);
+  } catch (err) {
+    throw new Error(`gbrain sources list returned non-JSON: ${(err as Error).message}`);
+  }
+  const sources = parsed.sources || [];
+  const match = sources.find((s) => s.id === id);
+
+  if (match && match.local_path === path) {
+    return false; // no-op
+  }
+
+  if (match && match.local_path !== path) {
+    const rm = spawnSync("gbrain", ["sources", "remove", id, "--yes"], {
+      encoding: "utf-8",
+      timeout: 30_000,
+    });
+    if (rm.status !== 0) {
+      throw new Error(`gbrain sources remove ${id} failed: ${rm.stderr || rm.stdout || `exit ${rm.status}`}`);
+    }
+  }
+
+  const add = spawnSync("gbrain", ["sources", "add", id, "--path", path, "--federated"], {
+    encoding: "utf-8",
+    timeout: 30_000,
+  });
+  if (add.status !== 0) {
+    throw new Error(`gbrain sources add ${id} failed: ${add.stderr || add.stdout || `exit ${add.status}`}`);
+  }
+  return true;
 }

 function runMemoryIngest(args: CliArgs): StageResult {
@@ -198,7 +387,7 @@ function runMemoryIngest(args: CliArgs): StageResult {

  const result = spawnSync("bun", ingestArgs, {
    encoding: "utf-8",
-    timeout: 35 * 60 * 1000, // honest 35-min ceiling per ED2
+    timeout: 35 * 60 * 1000,
  });

  const summary = (result.stderr || "").split("\n").filter((l) => l.includes("[memory-ingest]")).slice(-1)[0] || "ingest pass complete";
@@ -224,7 +413,6 @@ function runBrainSyncPush(args: CliArgs): StageResult {
    return { name: "brain-sync", ran: false, ok: true, duration_ms: 0, summary: "skipped (gstack-brain-sync not installed)" };
  }

-  // Discover new artifacts then drain queue
  spawnSync(brainSyncPath, ["--discover-new"], {
    stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
    timeout: 60 * 1000,
@@ -243,7 +431,7 @@ function runBrainSyncPush(args: CliArgs): StageResult {
  };
 }

-// ── State file (records last sync timestamp + stage outcomes) ──────────────
+// ── State file ─────────────────────────────────────────────────────────────

 interface SyncState {
  schema_version: 1;
@@ -266,10 +454,16 @@ function loadSyncState(): SyncState {
  return { schema_version: 1, last_writer: "gstack-gbrain-sync" };
 }

+/**
+ * Atomic state file write per /plan-eng-review D1: write tmp file then rename.
+ * rename(2) is atomic on POSIX filesystems.
+ */
 function saveSyncState(state: SyncState): void {
  try {
    mkdirSync(dirname(STATE_PATH), { recursive: true });
-    writeFileSync(STATE_PATH, JSON.stringify(state, null, 2), "utf-8");
+    const tmp = `${STATE_PATH}.tmp.${process.pid}`;
+    writeFileSync(tmp, JSON.stringify(state, null, 2), "utf-8");
+    renameSync(tmp, STATE_PATH);
  } catch {
    // non-fatal
  }
@@ -293,40 +487,67 @@ async function main(): Promise<void> {
    console.error(`[gbrain-sync] mode=${args.mode} engine=${engine.engine}`);
  }

-  const state = loadSyncState();
-  const stages: StageResult[] = [];
-
-  if (!args.noCode) {
-    stages.push(await withErrorContext("sync:code", () => runCodeImport(args), "gstack-gbrain-sync"));
-  }
-  if (!args.noMemory) {
-    stages.push(await withErrorContext("sync:memory", () => runMemoryIngest(args), "gstack-gbrain-sync"));
-  }
-  if (!args.noBrainSync) {
-    stages.push(await withErrorContext("sync:brain-sync", () => runBrainSyncPush(args), "gstack-gbrain-sync"));
+  // Acquire lock (skip on dry-run since dry-run never writes).
+  const needsLock = args.mode !== "dry-run";
+  let haveLock = false;
+  if (needsLock) {
+    haveLock = acquireLock();
+    if (!haveLock) {
+      console.error(
+        `[gbrain-sync] another /sync-gbrain is running (lock at ${LOCK_PATH}). ` +
+        `If that process died, the lock auto-clears after 5 min, or remove it manually.`
+      );
+      process.exit(2);
+    }
  }

-  // Persist state (skip on dry-run)
-  if (args.mode !== "dry-run") {
-    state.last_sync = new Date().toISOString();
-    if (args.mode === "full") state.last_full_sync = state.last_sync;
-    state.last_stages = stages;
-    saveSyncState(state);
+  const cleanup = () => {
+    if (haveLock) releaseLock();
+  };
+  process.on("SIGINT", () => { cleanup(); process.exit(130); });
+  process.on("SIGTERM", () => { cleanup(); process.exit(143); });
+
+  let exitCode = 0;
+  try {
+    const state = loadSyncState();
+    const stages: StageResult[] = [];
+
+    if (!args.noCode) {
+      stages.push(await withErrorContext("sync:code", () => runCodeImport(args), "gstack-gbrain-sync"));
+    }
+    if (!args.noMemory) {
+      stages.push(await withErrorContext("sync:memory", () => runMemoryIngest(args), "gstack-gbrain-sync"));
+    }
+    if (!args.noBrainSync) {
+      stages.push(await withErrorContext("sync:brain-sync", () => runBrainSyncPush(args), "gstack-gbrain-sync"));
+    }
+
+    if (args.mode !== "dry-run") {
+      state.last_sync = new Date().toISOString();
+      if (args.mode === "full") state.last_full_sync = state.last_sync;
+      state.last_stages = stages;
+      saveSyncState(state);
+    }
+
+    if (!args.quiet || args.mode === "dry-run") {
+      console.log(`\ngstack-gbrain-sync (${args.mode}):`);
+      for (const s of stages) console.log(formatStage(s));
+      const okCount = stages.filter((s) => s.ok).length;
+      const errCount = stages.filter((s) => !s.ok && s.ran).length;
+      console.log(`\n  ${okCount} ok, ${errCount} error, ${stages.length - okCount - errCount} skipped`);
+    }
+
+    const anyError = stages.some((s) => s.ran && !s.ok);
+    exitCode = anyError ? 1 : 0;
+  } finally {
+    cleanup();
  }

-  if (!args.quiet || args.mode === "dry-run") {
-    console.log(`\ngstack-gbrain-sync (${args.mode}):`);
-    for (const s of stages) console.log(formatStage(s));
-    const okCount = stages.filter((s) => s.ok).length;
-    const errCount = stages.filter((s) => !s.ok && s.ran).length;
-    console.log(`\n  ${okCount} ok, ${errCount} error, ${stages.length - okCount - errCount} skipped`);
-  }
-
-  const anyError = stages.some((s) => s.ran && !s.ok);
-  process.exit(anyError ? 1 : 0);
+  process.exit(exitCode);
 }

 main().catch((err) => {
  console.error(`gstack-gbrain-sync fatal: ${err instanceof Error ? err.message : String(err)}`);
+  releaseLock();
  process.exit(1);
 });