v1.29.0.0 feat: worktree-aware gbrain code sources via path-hash IDs and CWD pin (#1382)

* feat: worktree-aware gbrain code sources via path-hash IDs and CWD pin

Conductor sibling worktrees of the same repo no longer collide on a shared
gstack-code-<slug> source ID. /sync-gbrain now derives a path-hashed source
ID per worktree, runs gbrain sources attach to write .gbrain-source in the
worktree root, and removes the legacy unsuffixed source on first new-format
sync to prevent orphan accumulation.

Bug fixes surfaced by /codex during /ship:
- Silent attach failure now treated as stage failure (no more ok:true while
  pin is missing → unqualified code-def hits wrong source).
- Startup preamble checks .gbrain-source in the cwd worktree, not global
  state, so an unsynced worktree no longer claims "indexed" because a
  sibling synced.
- Code stage no longer skipped on remote-MCP (Path 4); the early-exit was
  in the SKILL template, not the orchestrator.
- Source registration routes through lib/gbrain-sources.ts only; deleted
  the near-duplicate ensureSourceRegisteredSync from the orchestrator.

Requires gbrain v0.30.0+ (uses sources attach). Phase 0 spike report:
~/.gstack/projects/garrytan-gstack/2026-05-08-gbrain-split-engine-spike.md

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* chore: bump version and changelog (v1.29.0.0)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-05-08 12:46:15 -07:00
committed by GitHub
parent 443bde054c
commit 06605477e2
49 changed files with 928 additions and 721 deletions
+96 -76
View File
@@ -31,12 +31,12 @@
import { existsSync, statSync, mkdirSync, writeFileSync, readFileSync, unlinkSync, renameSync } from "fs";
import { join, dirname } from "path";
import { execSync, execFileSync, spawnSync } from "child_process";
import { execSync, spawnSync } from "child_process";
import { homedir } from "os";
import { createHash } from "crypto";
import { detectEngineTier, withErrorContext, canonicalizeRemote } from "../lib/gstack-memory-helpers";
import { sourcePageCount } from "../lib/gbrain-sources";
import { ensureSourceRegistered, sourcePageCount } from "../lib/gbrain-sources";
// ── Types ──────────────────────────────────────────────────────────────────
@@ -159,17 +159,43 @@ function originUrl(): string | null {
}
/**
* Derive a stable source id for the cwd code corpus. Pattern: `gstack-code-<slug>`.
* Derive a worktree-aware source id for the cwd code corpus.
*
* gbrain enforces source ids to be 1-32 lowercase alnum chars with optional interior
* hyphens. We use the last two segments of the canonical remote (org/repo) and skip
* the host — `github.com` etc. is the same for nearly every user and just eats budget.
* If the resulting id still exceeds 32 chars, we keep the tail (most distinctive end)
* and append a 6-char hash of the full slug for collision resistance.
* Pattern: `gstack-code-<slug>-<pathhash8>` where slug comes from origin
* (org/repo) and pathhash8 is the first 8 hex chars of sha1(absolute repo
* path). The pathhash8 is what makes Conductor worktrees of the same repo
* coexist as separate sources in the same gbrain DB instead of stomping on
* each other.
*
* Falls back to the repo basename when there is no origin (local repo).
*
* gbrain enforces source ids to be 1-32 lowercase alnum chars with
* optional interior hyphens. `constrainSourceId` handles the 32-char cap
* with a hashed-tail fallback when the combined slug exceeds budget.
*/
function deriveCodeSourceId(repoPath: string): string {
const pathHash = createHash("sha1").update(repoPath).digest("hex").slice(0, 8);
const remote = canonicalizeRemote(originUrl());
if (remote) {
const segs = remote.split("/").filter(Boolean);
const slugSource = segs.slice(-2).join("-");
return constrainSourceId("gstack-code", `${slugSource}-${pathHash}`);
}
const base = repoPath.split("/").pop() || "repo";
return constrainSourceId("gstack-code", `${base}-${pathHash}`);
}
/**
* Pre-pathhash source id, kept for orphan detection only.
*
* Earlier /sync-gbrain versions registered `gstack-code-<slug>` (no pathhash
* suffix). On a multi-worktree repo, those collapsed onto a single source id
* with last-sync-wins semantics. The new path-keyed id leaves the legacy
* source orphaned in the brain — federated cross-source search would return
* stale duplicate hits. We remove the legacy id once, on the first new-format
* sync from any worktree of this repo, so users don't accumulate orphans.
*/
function deriveLegacyCodeSourceId(repoPath: string): string {
const remote = canonicalizeRemote(originUrl());
if (remote) {
const segs = remote.split("/").filter(Boolean);
@@ -264,7 +290,7 @@ function releaseLock(): void {
// ── Stage runners ──────────────────────────────────────────────────────────
function runCodeImport(args: CliArgs): StageResult {
async function runCodeImport(args: CliArgs): Promise<StageResult> {
const t0 = Date.now();
const root = repoRoot();
if (!root) {
@@ -282,21 +308,37 @@ function runCodeImport(args: CliArgs): StageResult {
ran: false,
ok: true,
duration_ms: 0,
summary: `would: gbrain sources add ${sourceId} --path ${root} --federated; gbrain sync --strategy code --source ${sourceId}`,
summary: `would: gbrain sources add ${sourceId} --path ${root} --federated; gbrain sync --strategy code --source ${sourceId}; gbrain sources attach ${sourceId}`,
detail: { source_id: sourceId, source_path: root, status: "skipped" },
};
}
// Step 1: Ensure source registered (idempotent).
// Step 0: Best-effort cleanup of pre-pathhash legacy source.
// Earlier /sync-gbrain versions registered `gstack-code-<slug>` (no path
// suffix). On a multi-worktree repo, those collapsed onto a single id
// with last-sync-wins. Federated search would return stale duplicate
// hits forever if we left the orphan in place. Remove the legacy id once
// here so users don't accumulate orphans.
// Failure is non-fatal — we still register the new id below.
const legacyId = deriveLegacyCodeSourceId(root);
let legacyRemoved = false;
if (legacyId !== sourceId) {
const rm = spawnSync("gbrain", ["sources", "remove", legacyId, "--confirm-destructive"], {
encoding: "utf-8",
timeout: 30_000,
stdio: ["ignore", "pipe", "pipe"],
});
// Treat absent-source as success (clean state). gbrain emits "not found" on
// missing id; treat any non-zero exit without "not found" as a soft fail.
if (rm.status === 0) legacyRemoved = true;
}
// Step 1: Ensure source registered (idempotent). Single source of truth in lib —
// no synchronous duplicate here (per /codex review #12).
let registered = false;
try {
// ensureSourceRegistered is async — but we're in a sync stage runner. Use a deasync pattern.
// Bun supports top-level await in main(), but stage runners are sync per orchestrator contract.
// Workaround: run as a child Bun script for the registration probe.
// Simpler: call gbrain CLI directly via the sync helpers in lib/gbrain-sources.ts probeSource.
// For symmetry, we duplicate the small ensureSourceRegistered logic synchronously here using
// execFileSync. (The lib helper is preferred for async callers; sync helpers below.)
registered = ensureSourceRegisteredSync(sourceId, root);
const result = await ensureSourceRegistered(sourceId, root, { federated: true });
registered = result.changed;
} catch (err) {
return {
name: "code",
@@ -329,15 +371,49 @@ function runCodeImport(args: CliArgs): StageResult {
};
}
// Step 3: Read page_count from gbrain sources list.
// Step 3: Pin this worktree's CWD to the source via .gbrain-source. Subsequent
// gbrain code-def / code-refs / code-callers calls from anywhere under <root>
// route to this source by default — no --source flag needed.
//
// If attach fails the whole flow has a silent correctness problem: sync
// succeeded but unqualified `gbrain code-def` from this worktree will hit
// the wrong/default source. Treat it as a stage failure (ok=false) so the
// verdict block surfaces ERR and the user knows to retry rather than
// trusting stale results.
const attach = spawnSync("gbrain", ["sources", "attach", sourceId], {
encoding: "utf-8",
timeout: 10_000,
cwd: root,
stdio: ["ignore", "pipe", "pipe"],
});
const pageCount = sourcePageCount(sourceId);
const legacyNote = legacyRemoved ? `, removed legacy ${legacyId}` : "";
const baseSummary = `${registered ? "registered + " : ""}synced ${sourceId} (page_count=${pageCount ?? "unknown"}${legacyNote})`;
if (attach.status !== 0) {
const reason = (attach.stderr || attach.stdout || "").trim().split("\n").pop() || `exit ${attach.status}`;
return {
name: "code",
ran: true,
ok: false,
duration_ms: Date.now() - t0,
summary: `${baseSummary}; attach FAILED (${reason}) — code-def queries from this worktree will hit the default source until /sync-gbrain succeeds`,
detail: {
source_id: sourceId,
source_path: root,
page_count: pageCount,
last_imported: new Date().toISOString(),
status: "failed",
},
};
}
return {
name: "code",
ran: true,
ok: true,
duration_ms: Date.now() - t0,
summary: `${registered ? "registered + " : ""}synced ${sourceId} (page_count=${pageCount ?? "unknown"})`,
summary: baseSummary,
detail: {
source_id: sourceId,
source_path: root,
@@ -348,62 +424,6 @@ function runCodeImport(args: CliArgs): StageResult {
};
}
/**
* Synchronous mirror of ensureSourceRegistered for use inside the synchronous
* stage runner. Returns true if registration changed (added or re-added).
*/
function ensureSourceRegisteredSync(id: string, path: string): boolean {
// Probe.
let probeOut: string;
try {
probeOut = execFileSync("gbrain", ["sources", "list", "--json"], {
encoding: "utf-8",
timeout: 10_000,
stdio: ["ignore", "pipe", "pipe"],
});
} catch (err) {
const e = err as NodeJS.ErrnoException & { stderr?: Buffer };
const stderr = e.stderr?.toString() || "";
if (e.code === "ENOENT") throw new Error("gbrain CLI not on PATH");
if (stderr.includes("Cannot connect to database") || stderr.includes("config.json")) {
throw new Error("gbrain not configured (run /setup-gbrain)");
}
throw err;
}
let parsed: { sources?: Array<{ id?: string; local_path?: string }> };
try {
parsed = JSON.parse(probeOut);
} catch (err) {
throw new Error(`gbrain sources list returned non-JSON: ${(err as Error).message}`);
}
const sources = parsed.sources || [];
const match = sources.find((s) => s.id === id);
if (match && match.local_path === path) {
return false; // no-op
}
if (match && match.local_path !== path) {
const rm = spawnSync("gbrain", ["sources", "remove", id, "--yes"], {
encoding: "utf-8",
timeout: 30_000,
});
if (rm.status !== 0) {
throw new Error(`gbrain sources remove ${id} failed: ${rm.stderr || rm.stdout || `exit ${rm.status}`}`);
}
}
const add = spawnSync("gbrain", ["sources", "add", id, "--path", path, "--federated"], {
encoding: "utf-8",
timeout: 30_000,
});
if (add.status !== 0) {
throw new Error(`gbrain sources add ${id} failed: ${add.stderr || add.stdout || `exit ${add.status}`}`);
}
return true;
}
function runMemoryIngest(args: CliArgs): StageResult {
const t0 = Date.now();