v1.29.0.0 feat: worktree-aware gbrain code sources via path-hash IDs and CWD pin (#1382)

* feat: worktree-aware gbrain code sources via path-hash IDs and CWD pin Conductor sibling worktrees of the same repo no longer collide on a shared gstack-code-<slug> source ID. /sync-gbrain now derives a path-hashed source ID per worktree, runs gbrain sources attach to write .gbrain-source in the worktree root, and removes the legacy unsuffixed source on first new-format sync to prevent orphan accumulation. Bug fixes surfaced by /codex during /ship: - Silent attach failure now treated as stage failure (no more ok:true while pin is missing → unqualified code-def hits wrong source). - Startup preamble checks .gbrain-source in the cwd worktree, not global state, so an unsynced worktree no longer claims "indexed" because a sibling synced. - Code stage no longer skipped on remote-MCP (Path 4); the early-exit was in the SKILL template, not the orchestrator. - Source registration routes through lib/gbrain-sources.ts only; deleted the near-duplicate ensureSourceRegisteredSync from the orchestrator. Requires gbrain v0.30.0+ (uses sources attach). Phase 0 spike report: ~/.gstack/projects/garrytan-gstack/2026-05-08-gbrain-split-engine-spike.md Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * chore: bump version and changelog (v1.29.0.0) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-10 07:07:34 +02:00 · 2026-05-08 12:46:15 -07:00
parent 443bde054c
commit 06605477e2
49 changed files with 928 additions and 721 deletions
@@ -31,12 +31,12 @@

 import { existsSync, statSync, mkdirSync, writeFileSync, readFileSync, unlinkSync, renameSync } from "fs";
 import { join, dirname } from "path";
-import { execSync, execFileSync, spawnSync } from "child_process";
+import { execSync, spawnSync } from "child_process";
 import { homedir } from "os";
 import { createHash } from "crypto";

 import { detectEngineTier, withErrorContext, canonicalizeRemote } from "../lib/gstack-memory-helpers";
-import { sourcePageCount } from "../lib/gbrain-sources";
+import { ensureSourceRegistered, sourcePageCount } from "../lib/gbrain-sources";

 // ── Types ──────────────────────────────────────────────────────────────────

@@ -159,17 +159,43 @@ function originUrl(): string | null {
 }

 /**
- * Derive a stable source id for the cwd code corpus. Pattern: `gstack-code-<slug>`.
+ * Derive a worktree-aware source id for the cwd code corpus.
 *
- * gbrain enforces source ids to be 1-32 lowercase alnum chars with optional interior
- * hyphens. We use the last two segments of the canonical remote (org/repo) and skip
- * the host — `github.com` etc. is the same for nearly every user and just eats budget.
- * If the resulting id still exceeds 32 chars, we keep the tail (most distinctive end)
- * and append a 6-char hash of the full slug for collision resistance.
+ * Pattern: `gstack-code-<slug>-<pathhash8>` where slug comes from origin
+ * (org/repo) and pathhash8 is the first 8 hex chars of sha1(absolute repo
+ * path). The pathhash8 is what makes Conductor worktrees of the same repo
+ * coexist as separate sources in the same gbrain DB instead of stomping on
+ * each other.
 *
 * Falls back to the repo basename when there is no origin (local repo).
+ *
+ * gbrain enforces source ids to be 1-32 lowercase alnum chars with
+ * optional interior hyphens. `constrainSourceId` handles the 32-char cap
+ * with a hashed-tail fallback when the combined slug exceeds budget.
 */
 function deriveCodeSourceId(repoPath: string): string {
+  const pathHash = createHash("sha1").update(repoPath).digest("hex").slice(0, 8);
+  const remote = canonicalizeRemote(originUrl());
+  if (remote) {
+    const segs = remote.split("/").filter(Boolean);
+    const slugSource = segs.slice(-2).join("-");
+    return constrainSourceId("gstack-code", `${slugSource}-${pathHash}`);
+  }
+  const base = repoPath.split("/").pop() || "repo";
+  return constrainSourceId("gstack-code", `${base}-${pathHash}`);
+}
+
+/**
+ * Pre-pathhash source id, kept for orphan detection only.
+ *
+ * Earlier /sync-gbrain versions registered `gstack-code-<slug>` (no pathhash
+ * suffix). On a multi-worktree repo, those collapsed onto a single source id
+ * with last-sync-wins semantics. The new path-keyed id leaves the legacy
+ * source orphaned in the brain — federated cross-source search would return
+ * stale duplicate hits. We remove the legacy id once, on the first new-format
+ * sync from any worktree of this repo, so users don't accumulate orphans.
+ */
+function deriveLegacyCodeSourceId(repoPath: string): string {
  const remote = canonicalizeRemote(originUrl());
  if (remote) {
    const segs = remote.split("/").filter(Boolean);
@@ -264,7 +290,7 @@ function releaseLock(): void {

 // ── Stage runners ──────────────────────────────────────────────────────────

-function runCodeImport(args: CliArgs): StageResult {
+async function runCodeImport(args: CliArgs): Promise<StageResult> {
  const t0 = Date.now();
  const root = repoRoot();
  if (!root) {
@@ -282,21 +308,37 @@ function runCodeImport(args: CliArgs): StageResult {
      ran: false,
      ok: true,
      duration_ms: 0,
-      summary: `would: gbrain sources add ${sourceId} --path ${root} --federated; gbrain sync --strategy code --source ${sourceId}`,
+      summary: `would: gbrain sources add ${sourceId} --path ${root} --federated; gbrain sync --strategy code --source ${sourceId}; gbrain sources attach ${sourceId}`,
      detail: { source_id: sourceId, source_path: root, status: "skipped" },
    };
  }

-  // Step 1: Ensure source registered (idempotent).
+  // Step 0: Best-effort cleanup of pre-pathhash legacy source.
+  // Earlier /sync-gbrain versions registered `gstack-code-<slug>` (no path
+  // suffix). On a multi-worktree repo, those collapsed onto a single id
+  // with last-sync-wins. Federated search would return stale duplicate
+  // hits forever if we left the orphan in place. Remove the legacy id once
+  // here so users don't accumulate orphans.
+  // Failure is non-fatal — we still register the new id below.
+  const legacyId = deriveLegacyCodeSourceId(root);
+  let legacyRemoved = false;
+  if (legacyId !== sourceId) {
+    const rm = spawnSync("gbrain", ["sources", "remove", legacyId, "--confirm-destructive"], {
+      encoding: "utf-8",
+      timeout: 30_000,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+    // Treat absent-source as success (clean state). gbrain emits "not found" on
+    // missing id; treat any non-zero exit without "not found" as a soft fail.
+    if (rm.status === 0) legacyRemoved = true;
+  }
+
+  // Step 1: Ensure source registered (idempotent). Single source of truth in lib —
+  // no synchronous duplicate here (per /codex review #12).
  let registered = false;
  try {
-    // ensureSourceRegistered is async — but we're in a sync stage runner. Use a deasync pattern.
-    // Bun supports top-level await in main(), but stage runners are sync per orchestrator contract.
-    // Workaround: run as a child Bun script for the registration probe.
-    // Simpler: call gbrain CLI directly via the sync helpers in lib/gbrain-sources.ts probeSource.
-    // For symmetry, we duplicate the small ensureSourceRegistered logic synchronously here using
-    // execFileSync. (The lib helper is preferred for async callers; sync helpers below.)
-    registered = ensureSourceRegisteredSync(sourceId, root);
+    const result = await ensureSourceRegistered(sourceId, root, { federated: true });
+    registered = result.changed;
  } catch (err) {
    return {
      name: "code",
@@ -329,15 +371,49 @@ function runCodeImport(args: CliArgs): StageResult {
    };
  }

-  // Step 3: Read page_count from gbrain sources list.
+  // Step 3: Pin this worktree's CWD to the source via .gbrain-source. Subsequent
+  // gbrain code-def / code-refs / code-callers calls from anywhere under <root>
+  // route to this source by default — no --source flag needed.
+  //
+  // If attach fails the whole flow has a silent correctness problem: sync
+  // succeeded but unqualified `gbrain code-def` from this worktree will hit
+  // the wrong/default source. Treat it as a stage failure (ok=false) so the
+  // verdict block surfaces ERR and the user knows to retry rather than
+  // trusting stale results.
+  const attach = spawnSync("gbrain", ["sources", "attach", sourceId], {
+    encoding: "utf-8",
+    timeout: 10_000,
+    cwd: root,
+    stdio: ["ignore", "pipe", "pipe"],
+  });
  const pageCount = sourcePageCount(sourceId);
+  const legacyNote = legacyRemoved ? `, removed legacy ${legacyId}` : "";
+  const baseSummary = `${registered ? "registered + " : ""}synced ${sourceId} (page_count=${pageCount ?? "unknown"}${legacyNote})`;
+
+  if (attach.status !== 0) {
+    const reason = (attach.stderr || attach.stdout || "").trim().split("\n").pop() || `exit ${attach.status}`;
+    return {
+      name: "code",
+      ran: true,
+      ok: false,
+      duration_ms: Date.now() - t0,
+      summary: `${baseSummary}; attach FAILED (${reason}) — code-def queries from this worktree will hit the default source until /sync-gbrain succeeds`,
+      detail: {
+        source_id: sourceId,
+        source_path: root,
+        page_count: pageCount,
+        last_imported: new Date().toISOString(),
+        status: "failed",
+      },
+    };
+  }

  return {
    name: "code",
    ran: true,
    ok: true,
    duration_ms: Date.now() - t0,
-    summary: `${registered ? "registered + " : ""}synced ${sourceId} (page_count=${pageCount ?? "unknown"})`,
+    summary: baseSummary,
    detail: {
      source_id: sourceId,
      source_path: root,
@@ -348,62 +424,6 @@ function runCodeImport(args: CliArgs): StageResult {
  };
 }

-/**
- * Synchronous mirror of ensureSourceRegistered for use inside the synchronous
- * stage runner. Returns true if registration changed (added or re-added).
- */
-function ensureSourceRegisteredSync(id: string, path: string): boolean {
-  // Probe.
-  let probeOut: string;
-  try {
-    probeOut = execFileSync("gbrain", ["sources", "list", "--json"], {
-      encoding: "utf-8",
-      timeout: 10_000,
-      stdio: ["ignore", "pipe", "pipe"],
-    });
-  } catch (err) {
-    const e = err as NodeJS.ErrnoException & { stderr?: Buffer };
-    const stderr = e.stderr?.toString() || "";
-    if (e.code === "ENOENT") throw new Error("gbrain CLI not on PATH");
-    if (stderr.includes("Cannot connect to database") || stderr.includes("config.json")) {
-      throw new Error("gbrain not configured (run /setup-gbrain)");
-    }
-    throw err;
-  }
-
-  let parsed: { sources?: Array<{ id?: string; local_path?: string }> };
-  try {
-    parsed = JSON.parse(probeOut);
-  } catch (err) {
-    throw new Error(`gbrain sources list returned non-JSON: ${(err as Error).message}`);
-  }
-  const sources = parsed.sources || [];
-  const match = sources.find((s) => s.id === id);
-
-  if (match && match.local_path === path) {
-    return false; // no-op
-  }
-
-  if (match && match.local_path !== path) {
-    const rm = spawnSync("gbrain", ["sources", "remove", id, "--yes"], {
-      encoding: "utf-8",
-      timeout: 30_000,
-    });
-    if (rm.status !== 0) {
-      throw new Error(`gbrain sources remove ${id} failed: ${rm.stderr || rm.stdout || `exit ${rm.status}`}`);
-    }
-  }
-
-  const add = spawnSync("gbrain", ["sources", "add", id, "--path", path, "--federated"], {
-    encoding: "utf-8",
-    timeout: 30_000,
-  });
-  if (add.status !== 0) {
-    throw new Error(`gbrain sources add ${id} failed: ${add.stderr || add.stdout || `exit ${add.status}`}`);
-  }
-  return true;
-}
-
 function runMemoryIngest(args: CliArgs): StageResult {
  const t0 = Date.now();