From f35afb7b1f2c4b2b90820131293cc7b865a707f6 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 13 May 2026 11:30:19 -0700 Subject: [PATCH] feat(gbrain): orchestrator SKIP when local engine not ok + remote-http transcripts via artifacts pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes in the sync orchestrator, both per plan D11/D12: 1. bin/gstack-gbrain-sync.ts: runCodeImport + runMemoryIngest call localEngineStatus() (shared classifier from lib/gbrain-local-status.ts). When status is not 'ok', return a SKIP stage result with a clear reason instead of crashing with "source registration failed: gbrain not configured". Brain-sync stage runs regardless — it doesn't depend on local engine. dry-run preview path is gated above the check so it continues to show would-do steps even when the engine is broken. 2. bin/gstack-memory-ingest.ts: when gbrain MCP is registered as remote-http (Path 4), persist staged transcripts to ~/.gstack/transcripts/run--/ instead of the ephemeral ~/.gstack/.staging-ingest--/ tmp dir, and SKIP the local `gbrain import` call entirely. The artifacts pipeline (gstack-brain-sync push to git, brain admin pulls and indexes) handles routing to the remote brain. Local PGLite (when present via Step 4.5) stays code-only. State recording still happens — prepared pages get their mtime+sha256 stamped under remote-http mode so the next /sync-gbrain doesn't re-stage them. Cleanup is skipped intentionally so the persisted dir survives until gstack-brain-sync moves it. Adds test/gbrain-sync-skip.test.ts covering 5 SKIP scenarios (broken-db, broken-config, no-cli, missing-config, ok pass-through). All 25 sync-related unit tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- bin/gstack-gbrain-sync.ts | 60 +++++++++++ bin/gstack-memory-ingest.ts | 124 +++++++++++++++++++++- test/gbrain-sync-skip.test.ts | 191 ++++++++++++++++++++++++++++++++++ 3 files changed, 370 insertions(+), 5 deletions(-) create mode 100644 test/gbrain-sync-skip.test.ts diff --git a/bin/gstack-gbrain-sync.ts b/bin/gstack-gbrain-sync.ts index 36b265e42..732ee430c 100644 --- a/bin/gstack-gbrain-sync.ts +++ b/bin/gstack-gbrain-sync.ts @@ -37,6 +37,7 @@ import { createHash } from "crypto"; import { detectEngineTier, withErrorContext, canonicalizeRemote } from "../lib/gstack-memory-helpers"; import { ensureSourceRegistered, sourcePageCount } from "../lib/gbrain-sources"; +import { localEngineStatus, type LocalEngineStatus } from "../lib/gbrain-local-status"; // ── Types ────────────────────────────────────────────────────────────────── @@ -290,6 +291,42 @@ function releaseLock(): void { // ── Stage runners ────────────────────────────────────────────────────────── +/** + * Build a SKIP result for the code/memory stage when the local engine is + * not in 'ok' state (per plan D12). Surface the status verbatim so the + * verdict block tells the user exactly what's wrong without re-probing. + * + * Reasons mapped to user-actionable summaries: + * no-cli → "gbrain CLI not on PATH; install via /setup-gbrain" + * missing-config → "no local engine; run /setup-gbrain to add local PGLite" + * broken-config → "config file at ~/.gbrain/config.json is malformed; see /setup-gbrain Step 1.5" + * broken-db → "config points at unreachable DB; see /setup-gbrain Step 1.5" + */ +function skipStageForLocalStatus( + stage: "code" | "memory", + status: LocalEngineStatus, + t0: number, +): StageResult { + const reasons: Record, string> = { + "no-cli": "gbrain CLI not on PATH; install via /setup-gbrain", + "missing-config": + "no local engine; run /setup-gbrain to add local PGLite for code search", + "broken-config": + "config at ~/.gbrain/config.json is malformed; see /setup-gbrain Step 1.5", + "broken-db": + "config points at unreachable DB; see /setup-gbrain Step 1.5", + }; + const reason = reasons[status as Exclude]; + return { + name: stage, + ran: false, + ok: true, // SKIP (per D12) — not a stage failure, just an unsatisfied prerequisite + duration_ms: Date.now() - t0, + summary: `skipped — local engine ${status} — ${reason}`, + }; +} + + async function runCodeImport(args: CliArgs): Promise { const t0 = Date.now(); const root = repoRoot(); @@ -302,6 +339,9 @@ async function runCodeImport(args: CliArgs): Promise { const sourceId = deriveCodeSourceId(root); + // dry-run preview always shows the would-do steps, regardless of local + // engine state. Useful for "what would /sync-gbrain do" without probing + // the engine. if (args.mode === "dry-run") { return { name: "code", @@ -313,6 +353,17 @@ async function runCodeImport(args: CliArgs): Promise { }; } + // Split-engine pre-flight (per plan D12): when local engine is not ok, SKIP + // code stage cleanly. Brain-sync stage still runs because it doesn't depend + // on local engine. The /sync-gbrain Step 1.5 pre-flight surfaces the user + // remediation message; this skip just keeps the orchestrator from crashing + // when the local DB is dead. Skipped on --dry-run (above) since dry-run + // never actually probes anything. + const localStatus = localEngineStatus({ noCache: false }); + if (localStatus !== "ok") { + return skipStageForLocalStatus("code", localStatus, t0); + } + // Step 0: Best-effort cleanup of pre-pathhash legacy source. // Earlier /sync-gbrain versions registered `gstack-code-` (no path // suffix). On a multi-worktree repo, those collapsed onto a single id @@ -431,6 +482,15 @@ function runMemoryIngest(args: CliArgs): StageResult { return { name: "memory", ran: false, ok: true, duration_ms: 0, summary: "would: gstack-memory-ingest --probe" }; } + // Split-engine pre-flight (per plan D12). gstack-memory-ingest shells out + // to `gbrain import` which targets the LOCAL engine. When that engine is + // not ok, SKIP cleanly so brain-sync (the only stage that doesn't depend + // on local engine) still runs. + const localStatus = localEngineStatus({ noCache: false }); + if (localStatus !== "ok") { + return skipStageForLocalStatus("memory", localStatus, t0); + } + const ingestPath = join(import.meta.dir, "gstack-memory-ingest.ts"); const ingestArgs = ["run", ingestPath]; if (args.mode === "full") ingestArgs.push("--bulk"); diff --git a/bin/gstack-memory-ingest.ts b/bin/gstack-memory-ingest.ts index c6227341d..b1169ae69 100644 --- a/bin/gstack-memory-ingest.ts +++ b/bin/gstack-memory-ingest.ts @@ -1202,6 +1202,57 @@ function makeStagingDir(): string { return dir; } +/** + * Persistent staging dir used in remote-http MCP mode (split-engine D11). + * + * Instead of staging to ~/.gstack/.staging-ingest--/ and cleaning up + * after `gbrain import`, remote-http users get a stable path that survives. + * gstack-brain-sync's allowlist pushes ~/.gstack/transcripts/** to the + * artifacts repo; the brain admin's pull job indexes them into the remote + * brain. Local PGLite (if present) stays code-only. + * + * Path: ~/.gstack/transcripts// (run-id pid+ts so concurrent passes + * stay separate; brain-sync push doesn't care about subdir naming). + */ +function makePersistentTranscriptDir(): string { + const dir = join( + GSTACK_HOME, + "transcripts", + `run-${process.pid}-${Date.now()}`, + ); + mkdirSync(dir, { recursive: true }); + return dir; +} + +/** + * Detect whether the gbrain MCP is remote-http (Path 4) — and therefore we + * should NOT call `gbrain import` because we don't want the local PGLite + * polluted with transcripts (per plan D11). + * + * Reads ~/.claude.json directly (same fallback chain as gstack-gbrain-detect + * Tier 3). Cheap: one fs read, no fork-exec. + */ +function isRemoteHttpMcpMode(): boolean { + const home = process.env.HOME || homedir(); + const claudeJsonPath = join(home, ".claude.json"); + if (!existsSync(claudeJsonPath)) return false; + try { + const parsed = JSON.parse(readFileSync(claudeJsonPath, "utf-8")) as { + mcpServers?: { + gbrain?: { type?: string; transport?: string; url?: string }; + }; + }; + const entry = parsed.mcpServers?.gbrain; + if (!entry) return false; + const mtype = entry.type || entry.transport || ""; + if (mtype === "url" || mtype === "http" || mtype === "sse") return true; + if (entry.url) return true; + return false; + } catch { + return false; + } +} + /** * Best-effort recursive cleanup. Failures swallowed — at worst we leak a * staging dir to disk; the next run uses a new one and they age out via @@ -1387,12 +1438,24 @@ async function ingestPass(args: CliArgs): Promise { }; } - // Phase 2: stage to a per-run dir + invoke gbrain import. - const stagingDir = makeStagingDir(); + // Phase 2: stage + (optionally) invoke gbrain import. + // + // Split-engine branch per plan D11: in remote-http MCP mode, we stage to a + // PERSISTENT dir under ~/.gstack/transcripts/ and SKIP `gbrain import` + // entirely. gstack-brain-sync push will pick the dir up via its allowlist + // and the brain admin's pull job will index transcripts into the remote + // brain. Local PGLite (if any) stays code-only. + const remoteHttpMode = isRemoteHttpMcpMode(); + const stagingDir = remoteHttpMode + ? makePersistentTranscriptDir() + : makeStagingDir(); // Register staging dir with the signal forwarder so SIGTERM/SIGINT can // synchronously clean it up before process.exit (the async finally block - // below does NOT run after a signal-handler exit). - _activeStagingDir = stagingDir; + // below does NOT run after a signal-handler exit). In remote-http mode we + // skip registration — the dir is meant to persist. + if (!remoteHttpMode) { + _activeStagingDir = stagingDir; + } try { const staging = writeStaged(prep.prepared, stagingDir); failed += staging.errors.length; @@ -1415,11 +1478,62 @@ async function ingestPass(args: CliArgs): Promise { } if (!args.quiet) { + const action = remoteHttpMode + ? "persisting to artifacts pipeline (skipping local gbrain import — remote-http mode)" + : "running gbrain import"; console.error( - `[memory-ingest] staged ${staging.written} pages → ${stagingDir}; running gbrain import...`, + `[memory-ingest] staged ${staging.written} pages → ${stagingDir}; ${action}...`, ); } + // Remote-http branch (split-engine D11): no local gbrain import. The + // staged markdown lives under ~/.gstack/transcripts// and the + // next gstack-brain-sync push will move it to the artifacts repo. From + // there the brain admin's pull job indexes into the remote brain. + // + // We treat ALL prepared pages as "written" since the import didn't run + // and we have no per-page failures from gbrain to filter on. The + // brain admin's pull pipeline is the authoritative gate; from this + // machine's perspective, the act of staging IS the write. + if (remoteHttpMode) { + const nowIso = new Date().toISOString(); + for (const p of prep.prepared) { + try { + state.sessions[p.source_path] = { + mtime_ns: Math.floor(statSync(p.source_path).mtimeMs * 1e6), + sha256: fileSha256(p.source_path), + ingested_at: nowIso, + page_slug: p.page_slug, + partial: p.partial, + }; + written++; + } catch (err) { + console.error( + `[state-record] ${p.source_path}: ${(err as Error).message}`, + ); + } + } + state.last_full_walk = nowIso; + state.last_writer = "gstack-memory-ingest (remote-http mode)"; + saveState(state); + if (!args.quiet) { + console.error( + `[memory-ingest] persisted ${written} pages to ${stagingDir} (brain admin will index on next pull)`, + ); + } + // Skip the gbrain-import error handling + cleanupStagingDir paths + // below by short-circuiting the function. + return { + written, + skipped_secret: prep.skippedSecret, + skipped_dedup: prep.skippedDedup, + skipped_unattributed: prep.skippedUnattributed, + failed, + duration_ms: Date.now() - t0, + partial_pages: prep.partialPages, + }; + } + // D6: single batch import. `--no-embed` matches the prior per-file // behavior (we never enabled embedding); embeddings happen on-demand // via gbrain's own pipelines. `--json` gives us structured counts. diff --git a/test/gbrain-sync-skip.test.ts b/test/gbrain-sync-skip.test.ts new file mode 100644 index 000000000..c7fbabbe0 --- /dev/null +++ b/test/gbrain-sync-skip.test.ts @@ -0,0 +1,191 @@ +/** + * Tests the split-engine SKIP semantics in bin/gstack-gbrain-sync.ts (plan D12). + * + * When localEngineStatus() returns anything except 'ok', the orchestrator's + * code + memory stages return ran=false summaries; the brain-sync stage runs + * unchanged. This is the behavior that matters most for Garry's broken-db + * machine — instead of crashing two stages with ERR output, the orchestrator + * surfaces a clear skip reason and still pushes artifacts. + * + * We test via the script (spawn) rather than importing runCodeImport/runMemoryIngest + * directly because they're internal to the orchestrator. The fake gbrain + * binary controls localEngineStatus()'s output. + */ + +import { describe, it, expect } from "bun:test"; +import { + mkdtempSync, + mkdirSync, + writeFileSync, + chmodSync, + rmSync, +} from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; +import { execFileSync, spawnSync } from "child_process"; + +const SCRIPT = join(import.meta.dir, "..", "bin", "gstack-gbrain-sync.ts"); +const BUN_BIN = execFileSync("sh", ["-c", "command -v bun"], { encoding: "utf-8" }).trim(); + +interface FakeEnv { + tmp: string; + bindir: string; + home: string; + gstackHome: string; + cleanup: () => void; +} + +/** + * Build a sandboxed HOME with optional fake gbrain on PATH. + * `gbrainBehavior` controls how `gbrain sources list` reacts; this drives + * localEngineStatus()'s output. + */ +function makeEnv(opts: { + withGbrain: boolean; + gbrainBehavior?: "ok" | "broken-db" | "broken-config"; + withConfig: boolean; +}): FakeEnv { + const tmp = mkdtempSync(join(tmpdir(), "gbrain-sync-skip-")); + const bindir = join(tmp, "bin"); + const home = join(tmp, "home"); + const gstackHome = join(home, ".gstack"); + const gbrainDir = join(home, ".gbrain"); + + mkdirSync(bindir, { recursive: true }); + mkdirSync(home, { recursive: true }); + mkdirSync(gstackHome, { recursive: true }); + mkdirSync(gbrainDir, { recursive: true }); + + if (opts.withConfig) { + writeFileSync( + join(gbrainDir, "config.json"), + JSON.stringify({ engine: "pglite", database_url: "pglite:///fake" }), + ); + } + + if (opts.withGbrain) { + const behavior = opts.gbrainBehavior || "ok"; + const stderrLine = + behavior === "broken-db" + ? 'echo "Cannot connect to database: . Fix: Check your connection URL in ~/.gbrain/config.json" >&2' + : behavior === "broken-config" + ? 'echo "Error: malformed config.json" >&2' + : ""; + const exitCode = behavior === "ok" ? 0 : 1; + const fake = `#!/bin/sh +if [ "$1" = "--version" ]; then echo "gbrain 0.33.1.0"; exit 0; fi +if [ "$1 $2" = "sources list" ]; then + if [ ${exitCode} -eq 0 ]; then echo '{"sources":[]}'; exit 0; fi + ${stderrLine} + exit ${exitCode} +fi +if [ "$1" = "--help" ]; then echo " import"; exit 0; fi +exit 0 +`; + writeFileSync(join(bindir, "gbrain"), fake); + chmodSync(join(bindir, "gbrain"), 0o755); + } + + return { + tmp, + bindir, + home, + gstackHome, + cleanup: () => rmSync(tmp, { recursive: true, force: true }), + }; +} + +function runOrchestrator(env: FakeEnv, args: string[]): { stdout: string; stderr: string; exitCode: number } { + // Initialize a git repo in the sandbox so repoRoot() finds it (otherwise + // code stage skips with "not in git repo" before our check ever fires). + spawnSync("git", ["init", "-q", env.home], { encoding: "utf-8" }); + spawnSync("git", ["-C", env.home, "commit", "--allow-empty", "-m", "init", "-q"], { + encoding: "utf-8", + env: { ...process.env, GIT_AUTHOR_NAME: "T", GIT_AUTHOR_EMAIL: "t@t", GIT_COMMITTER_NAME: "T", GIT_COMMITTER_EMAIL: "t@t" }, + }); + + const result = spawnSync(BUN_BIN, [SCRIPT, ...args], { + encoding: "utf-8", + timeout: 30_000, + cwd: env.home, + env: { + ...process.env, + HOME: env.home, + GSTACK_HOME: env.gstackHome, + PATH: `${env.bindir}:/usr/bin:/bin`, + }, + }); + return { + stdout: result.stdout || "", + stderr: result.stderr || "", + exitCode: result.status ?? 1, + }; +} + +describe("gstack-gbrain-sync — split-engine SKIP (plan D12)", () => { + it("SKIPs code stage when local engine is broken-db; brain-sync still attempted", () => { + const env = makeEnv({ withGbrain: true, gbrainBehavior: "broken-db", withConfig: true }); + try { + const r = runOrchestrator(env, ["--code-only"]); + // Code stage should be SKIPped with a clear local-engine status reason. + // Match on the summary substring our skipStageForLocalStatus helper emits. + expect(r.stdout + r.stderr).toContain("local engine broken-db"); + // Crucial: NOT the legacy "source registration failed" error path that + // existed before this fix (codex #2 STOP-vs-SKIP consistency). + expect(r.stdout + r.stderr).not.toContain("source registration failed"); + } finally { + env.cleanup(); + } + }); + + it("SKIPs memory stage when local engine is broken-config", () => { + const env = makeEnv({ withGbrain: true, gbrainBehavior: "broken-config", withConfig: true }); + try { + const r = runOrchestrator(env, ["--no-code", "--no-brain-sync"]); + expect(r.stdout + r.stderr).toContain("local engine broken-config"); + } finally { + env.cleanup(); + } + }); + + it("SKIPs code stage when gbrain CLI is missing (no-cli)", () => { + const env = makeEnv({ withGbrain: false, withConfig: false }); + try { + const r = runOrchestrator(env, ["--code-only"]); + // Either "no-cli" (from skipStageForLocalStatus) OR the earlier + // gbrainAvailable() check (which fires first when the CLI is absent — + // returns "skipped (gbrain CLI not in PATH)"). Both are acceptable for + // this case; the user-visible outcome is the same. + const out = r.stdout + r.stderr; + const hasSkipReason = + out.includes("no-cli") || out.includes("gbrain CLI not in PATH"); + expect(hasSkipReason).toBe(true); + } finally { + env.cleanup(); + } + }); + + it("SKIPs code stage when config is missing (missing-config)", () => { + const env = makeEnv({ withGbrain: true, gbrainBehavior: "ok", withConfig: false }); + try { + const r = runOrchestrator(env, ["--code-only"]); + expect(r.stdout + r.stderr).toContain("local engine missing-config"); + } finally { + env.cleanup(); + } + }); + + it("runs code stage normally when local engine is ok", () => { + const env = makeEnv({ withGbrain: true, gbrainBehavior: "ok", withConfig: true }); + try { + const r = runOrchestrator(env, ["--code-only"]); + // When ok, the SKIP-for-local-status branch must NOT fire. + expect(r.stdout + r.stderr).not.toContain("local engine ok"); + expect(r.stdout + r.stderr).not.toContain("local engine no-cli"); + expect(r.stdout + r.stderr).not.toContain("local engine broken-db"); + expect(r.stdout + r.stderr).not.toContain("local engine missing-config"); + } finally { + env.cleanup(); + } + }); +});