mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-17 23:30:09 +02:00
64a7bee176
19 tests across three surfaces:
- resolveStageTimeoutMs (10 tests): undefined/empty → default; non-numeric,
zero, negative, below-floor, above-ceiling → warn + default; at-floor,
at-ceiling, valid mid-range → accepted as-is.
- decideResume (6 tests): no checkpoint, corrupt JSON, checkpoint + staging
ok, checkpoint + staging missing, checkpoint with no dir, checkpoint with
empty dir.
- SIGTERM staging preservation (3 static invariants): memory-ingest signal
handler must check stagingDirIsCheckpointed BEFORE cleanup; preserve
branch must come before cleanup branch (ordering); orchestrator must
pass GSTACK_INGEST_RESUME_DIR to the grandchild on resume.
Also threads process.env.HOME through readGbrainCheckpoint and
stagingDirIsCheckpointed so tests can redirect home. os.homedir() caches
at process start and ignores later mutation, so the env override is the
only reliable test injection point.
Failing build if the timeout bounds are removed, the resume detection
short-circuits incorrectly, or the SIGTERM handler regresses to
unconditional cleanup.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1104 lines
44 KiB
TypeScript
1104 lines
44 KiB
TypeScript
#!/usr/bin/env bun
|
|
/**
|
|
* gstack-gbrain-sync — V1 unified sync verb.
|
|
*
|
|
* Orchestrates three storage tiers per plan §"Storage tiering":
|
|
*
|
|
* 1. Code (current repo) → `gbrain sources add` (idempotent via
|
|
* lib/gbrain-sources.ts) + `gbrain sync
|
|
* --strategy code` (incremental) or
|
|
* `gbrain reindex-code --yes` (--full).
|
|
* NEVER `gbrain import` (markdown only).
|
|
* 2. Transcripts + curated memory → gstack-memory-ingest (typed put_page)
|
|
* 3. Curated artifacts to git → gstack-brain-sync (existing pipeline)
|
|
*
|
|
* Modes:
|
|
* --incremental (default) — mtime fast-path; runs all 3 stages with cache hits
|
|
* --full — first-run; full walk + reindex; honest budget per ED2
|
|
* --dry-run — preview what would sync; no writes anywhere (incl. state file)
|
|
*
|
|
* Concurrency safety per /plan-eng-review D1:
|
|
* - Lock file at ~/.gstack/.sync-gbrain.lock (PID + start ts).
|
|
* - Stale-lock takeover after 5 min (process death).
|
|
* - State file written via tmp+rename for atomicity.
|
|
* - Lock released in finally; SIGINT/SIGTERM trapped for cleanup.
|
|
*
|
|
* --watch (V1.5 P0 TODO): file-watcher daemon. NOTE: gbrain v0.25.1 already
|
|
* ships `gbrain sync --watch [--interval N]` and `gbrain sync --install-cron`;
|
|
* when revisited, /sync-gbrain --watch wires through to the gbrain CLI rather
|
|
* than building a gstack-side daemon.
|
|
*/
|
|
|
|
import { existsSync, statSync, mkdirSync, writeFileSync, readFileSync, unlinkSync, renameSync } from "fs";
|
|
import { join, dirname } from "path";
|
|
import { execSync, spawnSync } from "child_process";
|
|
import { homedir, hostname } from "os";
|
|
import { createHash } from "crypto";
|
|
|
|
import "../lib/conductor-env-shim";
|
|
import { detectEngineTier, withErrorContext, canonicalizeRemote } from "../lib/gstack-memory-helpers";
|
|
import { ensureSourceRegistered, sourcePageCount } from "../lib/gbrain-sources";
|
|
import { localEngineStatus, type LocalEngineStatus } from "../lib/gbrain-local-status";
|
|
import { buildGbrainEnv, spawnGbrain, execGbrainJson } from "../lib/gbrain-exec";
|
|
|
|
// ── Types ──────────────────────────────────────────────────────────────────
|
|
|
|
type Mode = "incremental" | "full" | "dry-run";
|
|
|
|
interface CliArgs {
|
|
mode: Mode;
|
|
quiet: boolean;
|
|
noCode: boolean;
|
|
noMemory: boolean;
|
|
noBrainSync: boolean;
|
|
codeOnly: boolean;
|
|
}
|
|
|
|
interface CodeStageDetail {
|
|
source_id?: string;
|
|
source_path?: string;
|
|
page_count?: number | null;
|
|
last_imported?: string;
|
|
status?: "ok" | "skipped" | "failed";
|
|
}
|
|
|
|
interface StageResult {
|
|
name: string;
|
|
ran: boolean;
|
|
ok: boolean;
|
|
duration_ms: number;
|
|
summary: string;
|
|
/** Stage-specific structured detail. Code stage carries source_id + page_count. */
|
|
detail?: CodeStageDetail;
|
|
}
|
|
|
|
// ── Constants ──────────────────────────────────────────────────────────────
|
|
|
|
const HOME = homedir();
|
|
const GSTACK_HOME = process.env.GSTACK_HOME || join(HOME, ".gstack");
|
|
const STATE_PATH = join(GSTACK_HOME, ".gbrain-sync-state.json");
|
|
const LOCK_PATH = join(GSTACK_HOME, ".sync-gbrain.lock");
|
|
const STALE_LOCK_MS = 5 * 60 * 1000;
|
|
|
|
// Default 35-minute timeout for code-walk + memory-ingest stages. Override via
|
|
// GSTACK_SYNC_CODE_TIMEOUT_MS / GSTACK_SYNC_MEMORY_TIMEOUT_MS. Bounds-checked
|
|
// in resolveStageTimeoutMs below so wildly-low values don't make resume
|
|
// useless and wildly-high values don't mask config typos. See #1611.
|
|
const DEFAULT_STAGE_TIMEOUT_MS = 35 * 60 * 1000; // 2_100_000ms = 35min
|
|
const MIN_STAGE_TIMEOUT_MS = 60_000; // 1 minute floor
|
|
const MAX_STAGE_TIMEOUT_MS = 86_400_000; // 24 hour ceiling
|
|
|
|
/**
|
|
* Parse a stage-timeout env value with bounds validation. Returns the bounded
|
|
* value or the default with a stderr warning if the env was malformed or
|
|
* out-of-range. Exported for the regression test.
|
|
*/
|
|
export function resolveStageTimeoutMs(
|
|
envValue: string | undefined,
|
|
envName: string,
|
|
): number {
|
|
if (envValue === undefined || envValue === "") return DEFAULT_STAGE_TIMEOUT_MS;
|
|
const n = Number.parseInt(envValue, 10);
|
|
if (!Number.isFinite(n) || Number.isNaN(n) || n <= 0) {
|
|
console.warn(
|
|
`[sync] ${envName}="${envValue}" is not a positive integer; falling back to ${DEFAULT_STAGE_TIMEOUT_MS}ms`,
|
|
);
|
|
return DEFAULT_STAGE_TIMEOUT_MS;
|
|
}
|
|
if (n < MIN_STAGE_TIMEOUT_MS) {
|
|
console.warn(
|
|
`[sync] ${envName}=${n} is below the ${MIN_STAGE_TIMEOUT_MS}ms (1min) floor; falling back to ${DEFAULT_STAGE_TIMEOUT_MS}ms`,
|
|
);
|
|
return DEFAULT_STAGE_TIMEOUT_MS;
|
|
}
|
|
if (n > MAX_STAGE_TIMEOUT_MS) {
|
|
console.warn(
|
|
`[sync] ${envName}=${n} is above the ${MAX_STAGE_TIMEOUT_MS}ms (24h) ceiling; falling back to ${DEFAULT_STAGE_TIMEOUT_MS}ms`,
|
|
);
|
|
return DEFAULT_STAGE_TIMEOUT_MS;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/**
|
|
* gbrain writes ~/.gbrain/import-checkpoint.json on every import run. If a
|
|
* previous /sync-gbrain hit the timeout (SIGTERM = exit 143), the checkpoint
|
|
* + its staging dir survive on disk. Detect both and let gbrain resume from
|
|
* processedIndex+1 on the next run. If the staging dir is missing/empty/
|
|
* unreadable, fall through to a fresh restage with a one-line warning so the
|
|
* user sees we noticed. See #1611 + plan D1/C1.
|
|
*/
|
|
interface GbrainCheckpoint {
|
|
dir?: string;
|
|
totalFiles?: number;
|
|
processedIndex?: number;
|
|
completedFiles?: number;
|
|
timestamp?: string;
|
|
}
|
|
|
|
export function readGbrainCheckpoint(): GbrainCheckpoint | null {
|
|
// Read HOME from env so tests can redirect via process.env.HOME = ...
|
|
// (Node/Bun's os.homedir() caches at process start and ignores later
|
|
// mutations.)
|
|
const home = process.env.HOME || homedir();
|
|
const cpPath = join(home, ".gbrain", "import-checkpoint.json");
|
|
if (!existsSync(cpPath)) return null;
|
|
try {
|
|
const raw = readFileSync(cpPath, "utf-8");
|
|
const parsed = JSON.parse(raw);
|
|
if (!parsed || typeof parsed !== "object") return null;
|
|
return parsed as GbrainCheckpoint;
|
|
} catch {
|
|
// Corrupt JSON — treat as no checkpoint and fall through to fresh restage.
|
|
return null;
|
|
}
|
|
}
|
|
|
|
export type ResumeVerdict =
|
|
| { kind: "no-checkpoint" }
|
|
| { kind: "resume"; stagingDir: string; processedIndex: number; totalFiles: number }
|
|
| { kind: "stale-staging-missing"; stagingDir: string };
|
|
|
|
/**
|
|
* Decide whether the next memory-ingest run should resume from gbrain's
|
|
* checkpoint or restage from scratch.
|
|
* - no checkpoint → run a fresh ingest pass
|
|
* - checkpoint + staging ok → resume (gbrain picks up at processedIndex+1)
|
|
* - checkpoint + staging gone → warn, fall through to fresh restage
|
|
*/
|
|
export function decideResume(): ResumeVerdict {
|
|
const cp = readGbrainCheckpoint();
|
|
if (!cp || !cp.dir) return { kind: "no-checkpoint" };
|
|
const stagingDir = cp.dir;
|
|
if (!existsSync(stagingDir)) {
|
|
return { kind: "stale-staging-missing", stagingDir };
|
|
}
|
|
// Treat "non-empty" as the safe-to-resume signal. statSync on a missing
|
|
// file throws; we already handled missing above so this is dir-level shape.
|
|
try {
|
|
const st = statSync(stagingDir);
|
|
if (!st.isDirectory()) return { kind: "stale-staging-missing", stagingDir };
|
|
} catch {
|
|
return { kind: "stale-staging-missing", stagingDir };
|
|
}
|
|
return {
|
|
kind: "resume",
|
|
stagingDir,
|
|
processedIndex: cp.processedIndex ?? 0,
|
|
totalFiles: cp.totalFiles ?? 0,
|
|
};
|
|
}
|
|
|
|
// ── CLI ────────────────────────────────────────────────────────────────────
|
|
|
|
function printUsage(): void {
|
|
console.error(`Usage: gstack-gbrain-sync [--incremental|--full|--dry-run] [options]
|
|
|
|
Modes:
|
|
--incremental Default. mtime fast-path; ~50ms steady-state.
|
|
--full First-run; full walk + reindex. Honest ~25-35 min for big Macs (ED2).
|
|
--dry-run Preview what would sync; no writes anywhere.
|
|
|
|
Options:
|
|
--quiet Suppress per-stage output.
|
|
--no-code Skip the cwd code-import stage.
|
|
--no-memory Skip the gstack-memory-ingest stage (transcripts + artifacts).
|
|
--no-brain-sync Skip the gstack-brain-sync git pipeline stage.
|
|
--code-only Only run the code-import stage (alias for --no-memory --no-brain-sync).
|
|
--help This text.
|
|
|
|
Stages run in order: code → memory ingest → curated git push.
|
|
Each stage failure is non-fatal; subsequent stages still run.
|
|
`);
|
|
}
|
|
|
|
function parseArgs(): CliArgs {
|
|
const args = process.argv.slice(2);
|
|
let mode: Mode = "incremental";
|
|
let quiet = false;
|
|
let noCode = false;
|
|
let noMemory = false;
|
|
let noBrainSync = false;
|
|
let codeOnly = false;
|
|
|
|
for (let i = 0; i < args.length; i++) {
|
|
const a = args[i];
|
|
switch (a) {
|
|
case "--incremental": mode = "incremental"; break;
|
|
case "--full": mode = "full"; break;
|
|
case "--dry-run": mode = "dry-run"; break;
|
|
case "--quiet": quiet = true; break;
|
|
case "--no-code": noCode = true; break;
|
|
case "--no-memory": noMemory = true; break;
|
|
case "--no-brain-sync": noBrainSync = true; break;
|
|
case "--code-only":
|
|
codeOnly = true;
|
|
noMemory = true;
|
|
noBrainSync = true;
|
|
break;
|
|
case "--help":
|
|
case "-h":
|
|
printUsage();
|
|
process.exit(0);
|
|
default:
|
|
console.error(`Unknown argument: ${a}`);
|
|
printUsage();
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
return { mode, quiet, noCode, noMemory, noBrainSync, codeOnly };
|
|
}
|
|
|
|
// ── Helpers ────────────────────────────────────────────────────────────────
|
|
|
|
function repoRoot(): string | null {
|
|
try {
|
|
const out = execSync("git rev-parse --show-toplevel", { encoding: "utf-8", timeout: 2000 });
|
|
return out.trim();
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
function originUrl(): string | null {
|
|
try {
|
|
const out = execSync("git remote get-url origin", { encoding: "utf-8", timeout: 2000 });
|
|
return out.trim();
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Derive a host- and worktree-aware source id for the cwd code corpus.
|
|
*
|
|
* Pattern: `gstack-code-<slug>-<hostpathhash8>` where slug comes from origin
|
|
* (org/repo) and hostpathhash8 is the first 8 hex chars of
|
|
* sha1(`${hostname}::${absolute repo path}`). Folding hostname into the hash
|
|
* keeps Conductor worktrees of the same repo as distinct sources on one host
|
|
* AND keeps two machines that share an absolute layout (e.g. chezmoi-managed
|
|
* home dirs against a federated brain) from colliding on each other.
|
|
*
|
|
* Falls back to the repo basename when there is no origin (local repo).
|
|
*
|
|
* `GSTACK_HOSTNAME` env override is honored for deterministic tests; in
|
|
* production paths it is unset and `os.hostname()` is used.
|
|
*
|
|
* gbrain enforces source ids to be 1-32 lowercase alnum chars with
|
|
* optional interior hyphens. `constrainSourceId` handles the 32-char cap
|
|
* with a hashed-tail fallback when the combined slug exceeds budget.
|
|
*/
|
|
function deriveCodeSourceId(repoPath: string): string {
|
|
const host = process.env.GSTACK_HOSTNAME || hostname();
|
|
const hostPathHash = createHash("sha1").update(`${host}::${repoPath}`).digest("hex").slice(0, 8);
|
|
const remote = canonicalizeRemote(originUrl());
|
|
if (remote) {
|
|
const segs = remote.split("/").filter(Boolean);
|
|
const slugSource = segs.slice(-2).join("-");
|
|
const fullId = constrainSourceId("gstack-code", `${slugSource}-${hostPathHash}`);
|
|
// If the org+repo+hostpathhash fits cleanly (suffix preserved), use it.
|
|
if (fullId.endsWith(`-${hostPathHash}`)) return fullId;
|
|
// Otherwise drop the org prefix and retry with just repo+hostpathhash so
|
|
// the repo name stays readable. If that still doesn't fit,
|
|
// constrainSourceId falls back to a deterministic hash-only form.
|
|
const repoOnly = segs[segs.length - 1] || "repo";
|
|
return constrainSourceId("gstack-code", `${repoOnly}-${hostPathHash}`);
|
|
}
|
|
const base = repoPath.split("/").pop() || "repo";
|
|
return constrainSourceId("gstack-code", `${base}-${hostPathHash}`);
|
|
}
|
|
|
|
/**
|
|
* Pre-pathhash source id, kept for orphan detection only.
|
|
*
|
|
* Earlier /sync-gbrain versions registered `gstack-code-<slug>` (no pathhash
|
|
* suffix). On a multi-worktree repo, those collapsed onto a single source id
|
|
* with last-sync-wins semantics. The new path-keyed id leaves the legacy
|
|
* source orphaned in the brain — federated cross-source search would return
|
|
* stale duplicate hits. We remove the legacy id once, on the first new-format
|
|
* sync from any worktree of this repo, so users don't accumulate orphans.
|
|
*/
|
|
function deriveLegacyCodeSourceId(repoPath: string): string {
|
|
const remote = canonicalizeRemote(originUrl());
|
|
if (remote) {
|
|
const segs = remote.split("/").filter(Boolean);
|
|
const slugSource = segs.slice(-2).join("-");
|
|
return constrainSourceId("gstack-code", slugSource);
|
|
}
|
|
const base = repoPath.split("/").pop() || "repo";
|
|
return constrainSourceId("gstack-code", base);
|
|
}
|
|
|
|
/**
|
|
* Pre-#1468 path-only-hash source id, kept for hostname-fold migration only.
|
|
*
|
|
* Before the hostname fold, `deriveCodeSourceId` hashed only the absolute
|
|
* repo path: `gstack-code-<slug>-<sha1(path).slice(0,8)>`. After #1468 the
|
|
* hash key is `${hostname}::${path}`, so every existing user's brain has a
|
|
* legacy id that no longer matches what `deriveCodeSourceId` produces. We
|
|
* detect this form once, attempt rename-in-place if the gbrain CLI supports
|
|
* `sources rename`, and otherwise clean up after the new source successfully
|
|
* syncs. Distinct from `deriveLegacyCodeSourceId` (pre-pathhash v1.x form);
|
|
* both probes run.
|
|
*/
|
|
export function derivePathOnlyHashLegacyId(repoPath: string): string {
|
|
const pathHash = createHash("sha1").update(repoPath).digest("hex").slice(0, 8);
|
|
const remote = canonicalizeRemote(originUrl());
|
|
if (remote) {
|
|
const segs = remote.split("/").filter(Boolean);
|
|
const slugSource = segs.slice(-2).join("-");
|
|
return constrainSourceId("gstack-code", `${slugSource}-${pathHash}`);
|
|
}
|
|
const base = repoPath.split("/").pop() || "repo";
|
|
return constrainSourceId("gstack-code", `${base}-${pathHash}`);
|
|
}
|
|
|
|
/**
|
|
* Feature-check whether the installed gbrain CLI ships `sources rename <old> <new>`.
|
|
*
|
|
* Per the v1.40.0.0 design review: probing `gbrain sources rename --help` and
|
|
* matching for the exact argument shape catches the case where gbrain's
|
|
* `sources` parent help mentions a `rename` subcommand but the CLI doesn't
|
|
* accept the `<old> <new>` form (or vice versa). Cached for the lifetime
|
|
* of the process. As of gbrain 0.35.0.0 this command does not exist, so the
|
|
* function returns false and the migration path falls back to register-new
|
|
* + sync-OK + remove-old.
|
|
*/
|
|
let _gbrainSupportsRenameCache: boolean | null = null;
|
|
export function _resetGbrainSupportsRenameCache(): void {
|
|
_gbrainSupportsRenameCache = null;
|
|
}
|
|
function gbrainSupportsSourcesRename(env?: NodeJS.ProcessEnv): boolean {
|
|
if (_gbrainSupportsRenameCache !== null) return _gbrainSupportsRenameCache;
|
|
try {
|
|
const r = spawnGbrain(["sources", "rename", "--help"], {
|
|
timeout: 5_000,
|
|
baseEnv: env,
|
|
});
|
|
const out = `${r.stdout || ""}\n${r.stderr || ""}`;
|
|
// Match the exact argument shape: `rename <old> <new>` (with literal
|
|
// angle brackets in usage strings) or `rename OLD NEW`.
|
|
const exact = /sources\s+rename\s+<old>\s+<new>/i.test(out)
|
|
|| /sources\s+rename\s+OLD\s+NEW/.test(out)
|
|
|| /sources\s+rename\s+<oldId>\s+<newId>/i.test(out);
|
|
_gbrainSupportsRenameCache = exact && r.status === 0;
|
|
} catch {
|
|
_gbrainSupportsRenameCache = false;
|
|
}
|
|
return _gbrainSupportsRenameCache;
|
|
}
|
|
|
|
/**
|
|
* Look up a source's `local_path` from `gbrain sources list --json`.
|
|
* Returns null when the source is absent or the listing fails.
|
|
*
|
|
* `env` is the environment passed to the spawned `gbrain` process; defaults
|
|
* to `process.env`. Tests inject a PATH that points at a gbrain shim so the
|
|
* helper can be exercised without a real gbrain CLI.
|
|
*
|
|
* Shape note: `gbrain sources list --json` returns `{sources: [...]}` (v0.20+);
|
|
* older versions returned a flat array. Accept both for forward/backward compat
|
|
* (mirrors `probeSource`/`sourcePageCount` in lib/gbrain-sources.ts).
|
|
*/
|
|
export function sourceLocalPath(sourceId: string, env?: NodeJS.ProcessEnv): string | null {
|
|
const raw = execGbrainJson<unknown>(
|
|
["sources", "list", "--json"],
|
|
{ baseEnv: env },
|
|
);
|
|
if (!raw) return null;
|
|
const list: Array<{ id?: string; local_path?: string }> = Array.isArray(raw)
|
|
? (raw as Array<{ id?: string; local_path?: string }>)
|
|
: ((raw as { sources?: Array<{ id?: string; local_path?: string }> }).sources ?? []);
|
|
const found = list.find((s) => s.id === sourceId);
|
|
return found?.local_path ?? null;
|
|
}
|
|
|
|
/** Result of `planHostnameFoldMigration` — informs `runCodeImport` of next steps. */
|
|
export type HostnameFoldMigration =
|
|
| { kind: "none"; reason: "ids-match" | "no-legacy-source" }
|
|
| { kind: "skipped-path-drift"; oldId: string; oldPath: string; currentPath: string }
|
|
| { kind: "renamed"; oldId: string; newId: string }
|
|
| { kind: "pending-cleanup"; oldId: string };
|
|
|
|
/**
|
|
* Decide how to migrate from the pre-#1468 path-only-hash source id to the
|
|
* new hostname-fold id.
|
|
*
|
|
* Order:
|
|
* 1. If old == new → no-op.
|
|
* 2. Look up old source's local_path. Absent → no legacy source to migrate.
|
|
* 3. local_path != currentRoot → user moved the repo or two machines share a
|
|
* hash slot. Skip migration; let the user clean up manually. We will NOT
|
|
* rename or remove anything; the new source is registered alongside.
|
|
* 4. Otherwise: feature-check `gbrain sources rename`. If supported and the
|
|
* rename call exits 0 → renamed, pages preserved.
|
|
* 5. Else: pending-cleanup. Caller registers + syncs new source first; only
|
|
* after sync succeeds with a non-zero page count does it remove the old.
|
|
* This avoids a data-loss window where the old source is gone before the
|
|
* new one is verifiably populated.
|
|
*/
|
|
export function planHostnameFoldMigration(
|
|
currentRoot: string,
|
|
newSourceId: string,
|
|
legacyPathHashId: string,
|
|
env?: NodeJS.ProcessEnv,
|
|
): HostnameFoldMigration {
|
|
if (legacyPathHashId === newSourceId) {
|
|
return { kind: "none", reason: "ids-match" };
|
|
}
|
|
const oldPath = sourceLocalPath(legacyPathHashId, env);
|
|
if (oldPath === null) {
|
|
return { kind: "none", reason: "no-legacy-source" };
|
|
}
|
|
if (oldPath !== currentRoot) {
|
|
return {
|
|
kind: "skipped-path-drift",
|
|
oldId: legacyPathHashId,
|
|
oldPath,
|
|
currentPath: currentRoot,
|
|
};
|
|
}
|
|
if (gbrainSupportsSourcesRename(env)) {
|
|
const r = spawnGbrain(["sources", "rename", legacyPathHashId, newSourceId], { baseEnv: env });
|
|
if (r.status === 0) {
|
|
return { kind: "renamed", oldId: legacyPathHashId, newId: newSourceId };
|
|
}
|
|
// Rename failed at runtime — fall through to cleanup path.
|
|
}
|
|
return { kind: "pending-cleanup", oldId: legacyPathHashId };
|
|
}
|
|
|
|
/**
|
|
* Remove an orphaned source. Called only after new-source sync verifies pages
|
|
* exist, so the old source is provably redundant before deletion.
|
|
*
|
|
* Flag note: existing call sites used `--confirm-destructive` here and
|
|
* `--yes` in `lib/gbrain-sources.ts` — gbrain 0.35.0.0 accepts neither
|
|
* deterministically (the subcommand surface help is generic). We pass
|
|
* `--confirm-destructive` to match the existing call site convention; the
|
|
* flag-helper centralization in commit 4 (lib/gbrain-exec.ts) will resolve
|
|
* the inconsistency across the codebase.
|
|
*/
|
|
export function removeOrphanedSource(oldId: string, env?: NodeJS.ProcessEnv): boolean {
|
|
const r = spawnGbrain(["sources", "remove", oldId, "--confirm-destructive"], { baseEnv: env });
|
|
return r.status === 0;
|
|
}
|
|
|
|
/**
|
|
* Build a gbrain-valid source id (1-32 lowercase alnum + interior hyphens). Sanitizes
|
|
* `raw`, prefixes with `prefix`, and falls back to a hashed-tail form when total length
|
|
* would exceed 32 chars.
|
|
*
|
|
* Truncation cuts on hyphen boundaries (whole-word units) from the right, never
|
|
* mid-word. Inputs like "drummerms-av-sow-wiz-skill-270c0001" produce
|
|
* "${prefix}-270c0001-<hash>", not "${prefix}-kill-270c0001-<hash>".
|
|
*/
|
|
function constrainSourceId(prefix: string, raw: string): string {
|
|
const MAX = 32;
|
|
const slug = raw.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
|
|
// Empty slug after sanitize (e.g. raw was all non-alnum like "___") would
|
|
// produce "${prefix}-" which fails gbrain's validator on the trailing
|
|
// hyphen. Fall back to a deterministic hash of the original input so the
|
|
// result is stable across runs of the same repo.
|
|
if (!slug) {
|
|
const hash = createHash("sha1").update(raw || "_empty").digest("hex").slice(0, 6);
|
|
return `${prefix}-${hash}`;
|
|
}
|
|
const full = `${prefix}-${slug}`;
|
|
if (full.length <= MAX) return full;
|
|
const hash = createHash("sha1").update(slug).digest("hex").slice(0, 6);
|
|
// Total budget: prefix + "-" + tail + "-" + hash
|
|
const tailBudget = MAX - prefix.length - 2 - hash.length;
|
|
if (tailBudget < 1) return `${prefix}-${hash}`;
|
|
// Cut on hyphen boundaries instead of mid-word. Walk tokens from the right,
|
|
// accumulating until adding the next token would exceed tailBudget. This
|
|
// preserves readable suffixes (pathhash, repo name) and avoids embarrassing
|
|
// mid-word artifacts like "skill" → "kill".
|
|
const tokens = slug.split("-").filter(Boolean);
|
|
const kept: string[] = [];
|
|
let len = 0;
|
|
for (let i = tokens.length - 1; i >= 0; i--) {
|
|
const add = kept.length === 0 ? tokens[i].length : tokens[i].length + 1;
|
|
if (len + add > tailBudget) break;
|
|
kept.unshift(tokens[i]);
|
|
len += add;
|
|
}
|
|
const tail = kept.join("-");
|
|
return tail ? `${prefix}-${tail}-${hash}` : `${prefix}-${hash}`;
|
|
}
|
|
|
|
// ── Lock file (D1) ─────────────────────────────────────────────────────────
|
|
|
|
interface LockInfo {
|
|
pid: number;
|
|
started_at: string;
|
|
}
|
|
|
|
function acquireLock(): boolean {
|
|
mkdirSync(GSTACK_HOME, { recursive: true });
|
|
if (existsSync(LOCK_PATH)) {
|
|
// Check if stale.
|
|
try {
|
|
const stat = statSync(LOCK_PATH);
|
|
const ageMs = Date.now() - stat.mtimeMs;
|
|
if (ageMs > STALE_LOCK_MS) {
|
|
// Stale; take over.
|
|
unlinkSync(LOCK_PATH);
|
|
} else {
|
|
return false;
|
|
}
|
|
} catch {
|
|
// Cannot stat; bail conservatively.
|
|
return false;
|
|
}
|
|
}
|
|
const info: LockInfo = { pid: process.pid, started_at: new Date().toISOString() };
|
|
try {
|
|
writeFileSync(LOCK_PATH, JSON.stringify(info), { encoding: "utf-8", flag: "wx" });
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
function releaseLock(): void {
|
|
try {
|
|
if (!existsSync(LOCK_PATH)) return;
|
|
const raw = readFileSync(LOCK_PATH, "utf-8");
|
|
const info = JSON.parse(raw) as LockInfo;
|
|
if (info.pid === process.pid) {
|
|
unlinkSync(LOCK_PATH);
|
|
}
|
|
} catch {
|
|
// Best-effort cleanup.
|
|
}
|
|
}
|
|
|
|
// ── Stage runners ──────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Build a SKIP result for the code/memory stage when the local engine is
|
|
* not in 'ok' state (per plan D12). Surface the status verbatim so the
|
|
* verdict block tells the user exactly what's wrong without re-probing.
|
|
*
|
|
* Reasons mapped to user-actionable summaries:
|
|
* no-cli → "gbrain CLI not on PATH; install via /setup-gbrain"
|
|
* missing-config → "no local engine; run /setup-gbrain to add local PGLite"
|
|
* broken-config → "config file at ~/.gbrain/config.json is malformed; see /setup-gbrain Step 1.5"
|
|
* broken-db → "config points at unreachable DB; see /setup-gbrain Step 1.5"
|
|
*/
|
|
function skipStageForLocalStatus(
|
|
stage: "code" | "memory",
|
|
status: LocalEngineStatus,
|
|
t0: number,
|
|
): StageResult {
|
|
const reasons: Record<Exclude<LocalEngineStatus, "ok">, string> = {
|
|
"no-cli": "gbrain CLI not on PATH; install via /setup-gbrain",
|
|
"missing-config":
|
|
"no local engine; run /setup-gbrain to add local PGLite for code search",
|
|
"broken-config":
|
|
"config at ~/.gbrain/config.json is malformed; see /setup-gbrain Step 1.5",
|
|
"broken-db":
|
|
"config points at unreachable DB; see /setup-gbrain Step 1.5",
|
|
};
|
|
const reason = reasons[status as Exclude<LocalEngineStatus, "ok">];
|
|
return {
|
|
name: stage,
|
|
ran: false,
|
|
ok: true, // SKIP (per D12) — not a stage failure, just an unsatisfied prerequisite
|
|
duration_ms: Date.now() - t0,
|
|
summary: `skipped — local engine ${status} — ${reason}`,
|
|
};
|
|
}
|
|
|
|
|
|
async function runCodeImport(args: CliArgs): Promise<StageResult> {
|
|
const t0 = Date.now();
|
|
const root = repoRoot();
|
|
if (!root) {
|
|
return { name: "code", ran: false, ok: true, duration_ms: 0, summary: "skipped (not in git repo)" };
|
|
}
|
|
|
|
const sourceId = deriveCodeSourceId(root);
|
|
|
|
// dry-run preview always shows the would-do steps, regardless of local
|
|
// engine state. Useful for "what would /sync-gbrain do" without probing
|
|
// the engine.
|
|
if (args.mode === "dry-run") {
|
|
return {
|
|
name: "code",
|
|
ran: false,
|
|
ok: true,
|
|
duration_ms: 0,
|
|
summary: `would: gbrain sources add ${sourceId} --path ${root} --federated; gbrain sync --strategy code --source ${sourceId}; gbrain sources attach ${sourceId}`,
|
|
detail: { source_id: sourceId, source_path: root, status: "skipped" },
|
|
};
|
|
}
|
|
|
|
// Split-engine pre-flight (per plan D12): when local engine is not ok, SKIP
|
|
// code stage cleanly. Brain-sync stage still runs because it doesn't depend
|
|
// on local engine. The /sync-gbrain Step 1.5 pre-flight surfaces the user
|
|
// remediation message; this skip just keeps the orchestrator from crashing
|
|
// when the local DB is dead. Skipped on --dry-run (above) since dry-run
|
|
// never actually probes anything.
|
|
const localStatus = localEngineStatus({ noCache: false });
|
|
if (localStatus !== "ok") {
|
|
return skipStageForLocalStatus("code", localStatus, t0);
|
|
}
|
|
|
|
// Step 0a: Best-effort cleanup of pre-pathhash legacy source (v1.x form).
|
|
// Earlier /sync-gbrain versions registered `gstack-code-<slug>` (no path
|
|
// suffix). On a multi-worktree repo, those collapsed onto a single id
|
|
// with last-sync-wins. Federated search would return stale duplicate
|
|
// hits forever if we left the orphan in place. Remove the legacy id once
|
|
// here so users don't accumulate orphans.
|
|
// Failure is non-fatal — we still register the new id below.
|
|
// gbrainEnv seeds DATABASE_URL from gbrain's config so this stage works
|
|
// inside Next.js / Prisma / Rails projects with their own .env.local
|
|
// (codex review #7 — bug fix is wider than #1508 as filed).
|
|
const gbrainEnv = buildGbrainEnv({ announce: !args.quiet });
|
|
const legacyId = deriveLegacyCodeSourceId(root);
|
|
let legacyRemoved = false;
|
|
if (legacyId !== sourceId) {
|
|
const rm = spawnGbrain(["sources", "remove", legacyId, "--confirm-destructive"], {
|
|
timeout: 30_000,
|
|
baseEnv: gbrainEnv,
|
|
});
|
|
// Treat absent-source as success (clean state). gbrain emits "not found" on
|
|
// missing id; treat any non-zero exit without "not found" as a soft fail.
|
|
if (rm.status === 0) legacyRemoved = true;
|
|
}
|
|
|
|
// Step 0b: Hostname-fold migration (#1414).
|
|
// Before #1468 the source id hashed only the absolute repo path. After the
|
|
// hostname fold, every existing user has a legacy id that no longer matches
|
|
// what deriveCodeSourceId produces. Try rename-in-place first (preserves
|
|
// pages); fall back to register-new → sync-OK → remove-old. Path-drift
|
|
// (user moved the repo, etc.) skips migration with a warning.
|
|
const pathOnlyHashLegacyId = derivePathOnlyHashLegacyId(root);
|
|
const migration = planHostnameFoldMigration(root, sourceId, pathOnlyHashLegacyId, gbrainEnv);
|
|
if (migration.kind === "skipped-path-drift" && !args.quiet) {
|
|
console.error(
|
|
`[sync:code] hostname-fold migration skipped: legacy source ${migration.oldId} `
|
|
+ `points at ${migration.oldPath}, current repo is ${migration.currentPath}. `
|
|
+ `Clean up manually with: gbrain sources remove ${migration.oldId} --confirm-destructive`,
|
|
);
|
|
} else if (migration.kind === "renamed" && !args.quiet) {
|
|
console.error(`[sync:code] hostname-fold migration: renamed ${migration.oldId} → ${migration.newId} (pages preserved)`);
|
|
}
|
|
|
|
// Step 1: Ensure source registered (idempotent). Single source of truth in lib —
|
|
// no synchronous duplicate here (per /codex review #12).
|
|
let registered = false;
|
|
try {
|
|
const result = await ensureSourceRegistered(sourceId, root, { federated: true, env: gbrainEnv });
|
|
registered = result.changed;
|
|
} catch (err) {
|
|
return {
|
|
name: "code",
|
|
ran: true,
|
|
ok: false,
|
|
duration_ms: Date.now() - t0,
|
|
summary: `source registration failed: ${(err as Error).message}`,
|
|
detail: { source_id: sourceId, source_path: root, status: "failed" },
|
|
};
|
|
}
|
|
|
|
// Step 2: Always run the page-creating file walk first, then (for --full)
|
|
// a full re-embed.
|
|
//
|
|
// `gbrain reindex-code` only RE-EMBEDS pages that already exist; it never
|
|
// walks the filesystem. On a freshly-registered source (0 pages) a --full
|
|
// run that called reindex-code alone found nothing ("No code pages to
|
|
// reindex"), finished in ~1s, and left the code index permanently empty
|
|
// while still reporting OK. The page-creating walk is `sync --strategy
|
|
// code`, so --full must run it FIRST, then reindex-code, to honor the
|
|
// documented "full walk + reindex" contract for both fresh and populated
|
|
// sources.
|
|
const codeTimeoutMs = resolveStageTimeoutMs(
|
|
process.env.GSTACK_SYNC_CODE_TIMEOUT_MS,
|
|
"GSTACK_SYNC_CODE_TIMEOUT_MS",
|
|
);
|
|
const walkResult = spawnGbrain(["sync", "--strategy", "code", "--source", sourceId], {
|
|
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
|
|
timeout: codeTimeoutMs,
|
|
baseEnv: gbrainEnv,
|
|
});
|
|
|
|
if (walkResult.status !== 0) {
|
|
return {
|
|
name: "code",
|
|
ran: true,
|
|
ok: false,
|
|
duration_ms: Date.now() - t0,
|
|
summary: `gbrain sync --strategy code --source ${sourceId} exited ${walkResult.status}`,
|
|
detail: { source_id: sourceId, source_path: root, status: "failed" },
|
|
};
|
|
}
|
|
|
|
if (args.mode === "full") {
|
|
const reindexResult = spawnGbrain(["reindex-code", "--source", sourceId, "--yes"], {
|
|
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
|
|
timeout: codeTimeoutMs,
|
|
baseEnv: gbrainEnv,
|
|
});
|
|
|
|
if (reindexResult.status !== 0) {
|
|
return {
|
|
name: "code",
|
|
ran: true,
|
|
ok: false,
|
|
duration_ms: Date.now() - t0,
|
|
summary: `gbrain reindex-code --source ${sourceId} exited ${reindexResult.status}`,
|
|
detail: { source_id: sourceId, source_path: root, status: "failed" },
|
|
};
|
|
}
|
|
}
|
|
|
|
// Step 3: Pin this worktree's CWD to the source via .gbrain-source. Subsequent
|
|
// gbrain code-def / code-refs / code-callers calls from anywhere under <root>
|
|
// route to this source by default — no --source flag needed.
|
|
//
|
|
// If attach fails the whole flow has a silent correctness problem: sync
|
|
// succeeded but unqualified `gbrain code-def` from this worktree will hit
|
|
// the wrong/default source. Treat it as a stage failure (ok=false) so the
|
|
// verdict block surfaces ERR and the user knows to retry rather than
|
|
// trusting stale results.
|
|
const attach = spawnGbrain(["sources", "attach", sourceId], {
|
|
timeout: 10_000,
|
|
cwd: root,
|
|
baseEnv: gbrainEnv,
|
|
});
|
|
const pageCount = sourcePageCount(sourceId, gbrainEnv);
|
|
|
|
// Step 4: Deferred hostname-fold cleanup.
|
|
// Only remove the pre-#1468 path-only-hash source NOW that the new source
|
|
// has registered + synced + has pages. Removing before sync would create a
|
|
// data-loss window if sync failed; removing without a page-count check would
|
|
// wipe pages when sync silently no-op'd. This is the codex-review-flagged
|
|
// safety: register → sync → verify → THEN delete.
|
|
let hostnameLegacyRemoved = false;
|
|
if (migration.kind === "pending-cleanup" && pageCount !== null && pageCount > 0) {
|
|
hostnameLegacyRemoved = removeOrphanedSource(migration.oldId, gbrainEnv);
|
|
if (hostnameLegacyRemoved && !args.quiet) {
|
|
console.error(`[sync:code] hostname-fold migration: removed legacy ${migration.oldId} after new source sync verified (page_count=${pageCount})`);
|
|
}
|
|
}
|
|
|
|
const legacyParts: string[] = [];
|
|
if (legacyRemoved) legacyParts.push(`removed legacy ${legacyId}`);
|
|
if (migration.kind === "renamed") legacyParts.push(`renamed ${migration.oldId}→${migration.newId}`);
|
|
if (hostnameLegacyRemoved) legacyParts.push(`removed pre-hostname-fold ${migration.kind === "pending-cleanup" ? migration.oldId : ""}`);
|
|
const legacyNote = legacyParts.length > 0 ? `, ${legacyParts.join(", ")}` : "";
|
|
const baseSummary = `${registered ? "registered + " : ""}synced ${sourceId} (page_count=${pageCount ?? "unknown"}${legacyNote})`;
|
|
|
|
if (attach.status !== 0) {
|
|
const reason = (attach.stderr || attach.stdout || "").trim().split("\n").pop() || `exit ${attach.status}`;
|
|
return {
|
|
name: "code",
|
|
ran: true,
|
|
ok: false,
|
|
duration_ms: Date.now() - t0,
|
|
summary: `${baseSummary}; attach FAILED (${reason}) — code-def queries from this worktree will hit the default source until /sync-gbrain succeeds`,
|
|
detail: {
|
|
source_id: sourceId,
|
|
source_path: root,
|
|
page_count: pageCount,
|
|
last_imported: new Date().toISOString(),
|
|
status: "failed",
|
|
},
|
|
};
|
|
}
|
|
|
|
// v1.29.0.0 changelog promised the per-worktree pin would be ignored in the
|
|
// consuming repo, but the change actually only added .gbrain-source to
|
|
// gstack's own .gitignore. Without the consumer-side entry, the pin gets
|
|
// committed and breaks the per-worktree promise: Conductor sibling worktrees
|
|
// step on each other's pin every time anyone commits (#1384).
|
|
ensureGbrainSourceGitignored(root);
|
|
|
|
return {
|
|
name: "code",
|
|
ran: true,
|
|
ok: true,
|
|
duration_ms: Date.now() - t0,
|
|
summary: baseSummary,
|
|
detail: {
|
|
source_id: sourceId,
|
|
source_path: root,
|
|
page_count: pageCount,
|
|
last_imported: new Date().toISOString(),
|
|
status: "ok",
|
|
},
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Ensure `.gbrain-source` is listed in the consumer repo's `.gitignore`.
|
|
*
|
|
* Idempotent: only appends when the entry is not already present (matched on
|
|
* trimmed lines so a leading/trailing whitespace difference doesn't add a
|
|
* second copy). Wraps writes in try/catch so a read-only checkout or weird
|
|
* perms logs a warning and lets the rest of the sync continue.
|
|
*/
|
|
export function ensureGbrainSourceGitignored(root: string): void {
|
|
const gitignorePath = join(root, ".gitignore");
|
|
try {
|
|
let existing = "";
|
|
try {
|
|
existing = readFileSync(gitignorePath, "utf-8");
|
|
} catch {
|
|
// No .gitignore yet — we'll create it.
|
|
}
|
|
const alreadyIgnored = existing
|
|
.split("\n")
|
|
.some((line) => line.trim() === ".gbrain-source");
|
|
if (alreadyIgnored) {
|
|
return;
|
|
}
|
|
const sep = existing.length > 0 && !existing.endsWith("\n") ? "\n" : "";
|
|
writeFileSync(gitignorePath, existing + sep + ".gbrain-source\n");
|
|
} catch (err) {
|
|
const msg = err instanceof Error ? err.message : String(err);
|
|
console.warn(
|
|
`[sync:code] could not add .gbrain-source to ${gitignorePath}: ${msg}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
function runMemoryIngest(args: CliArgs): StageResult {
|
|
const t0 = Date.now();
|
|
|
|
if (args.mode === "dry-run") {
|
|
return { name: "memory", ran: false, ok: true, duration_ms: 0, summary: "would: gstack-memory-ingest --probe" };
|
|
}
|
|
|
|
// Split-engine pre-flight (per plan D12). gstack-memory-ingest shells out
|
|
// to `gbrain import` which targets the LOCAL engine. When that engine is
|
|
// not ok, SKIP cleanly so brain-sync (the only stage that doesn't depend
|
|
// on local engine) still runs.
|
|
const localStatus = localEngineStatus({ noCache: false });
|
|
if (localStatus !== "ok") {
|
|
return skipStageForLocalStatus("memory", localStatus, t0);
|
|
}
|
|
|
|
// Resume detection (#1611 / plan D1 + C1). If a previous run hit the
|
|
// timeout and gbrain left ~/.gbrain/import-checkpoint.json plus its staging
|
|
// dir on disk, signal the grandchild via env so it skips the prepare phase
|
|
// and lets `gbrain import` resume from processedIndex+1 against the same
|
|
// staging dir. If the staging dir is gone (disk pressure cleanup, OS
|
|
// reboot, user manual cleanup), warn and fall through to a fresh restage.
|
|
const resume = decideResume();
|
|
const childEnv = buildGbrainEnv({ announce: false });
|
|
if (resume.kind === "resume") {
|
|
console.error(
|
|
`[sync:memory] resuming from gbrain checkpoint (${resume.processedIndex}/${resume.totalFiles} files staged at ${resume.stagingDir})`,
|
|
);
|
|
childEnv.GSTACK_INGEST_RESUME_DIR = resume.stagingDir;
|
|
} else if (resume.kind === "stale-staging-missing") {
|
|
console.error(
|
|
`[sync:memory] previous checkpoint stale (staging dir ${resume.stagingDir} gone), restaging from scratch`,
|
|
);
|
|
}
|
|
|
|
const ingestPath = join(import.meta.dir, "gstack-memory-ingest.ts");
|
|
const ingestArgs = ["run", ingestPath];
|
|
if (args.mode === "full") ingestArgs.push("--bulk");
|
|
else ingestArgs.push("--incremental");
|
|
if (args.quiet) ingestArgs.push("--quiet");
|
|
|
|
// Thread the seeded env into the bun grandchild (codex review #7 — the
|
|
// .env.local footgun affects gstack-memory-ingest.ts too, not just the
|
|
// direct gbrain spawns in this file). The grandchild calls gbrain import
|
|
// internally and must see the DATABASE_URL from gbrain's own config.
|
|
const memoryTimeoutMs = resolveStageTimeoutMs(
|
|
process.env.GSTACK_SYNC_MEMORY_TIMEOUT_MS,
|
|
"GSTACK_SYNC_MEMORY_TIMEOUT_MS",
|
|
);
|
|
const result = spawnSync("bun", ingestArgs, {
|
|
encoding: "utf-8",
|
|
timeout: memoryTimeoutMs,
|
|
env: childEnv,
|
|
});
|
|
|
|
// D6: parse [memory-ingest] lines from the child's stderr. ERR-prefixed
|
|
// lines indicate a system-level failure (gbrain crashed or CLI missing)
|
|
// and the child exits non-zero. Per-file failures are summarized in the
|
|
// last non-ERR [memory-ingest] line but do NOT make the verdict ERR.
|
|
const stderrLines = (result.stderr || "").split("\n");
|
|
const memLines = stderrLines.filter((l) => l.includes("[memory-ingest]"));
|
|
const errLine = memLines.find((l) => l.includes("[memory-ingest] ERR"));
|
|
const lastMemLine = memLines.slice(-1)[0];
|
|
const rawSummary = errLine || lastMemLine || "ingest pass complete";
|
|
// Strip the "[memory-ingest] " prefix and any leading "ERR: " for cleaner
|
|
// verdict output. The orchestrator's own formatStage will prefix with OK/ERR.
|
|
const summary = rawSummary
|
|
.replace(/^.*\[memory-ingest\]\s*/, "")
|
|
.replace(/^ERR:\s*/, "");
|
|
|
|
const ok = result.status === 0;
|
|
return {
|
|
name: "memory",
|
|
ran: true,
|
|
ok,
|
|
duration_ms: Date.now() - t0,
|
|
summary: ok
|
|
? summary
|
|
: `${summary}${result.status === null ? " (killed by signal / timeout)" : ` (exit ${result.status})`}`,
|
|
};
|
|
}
|
|
|
|
function runBrainSyncPush(args: CliArgs): StageResult {
|
|
const t0 = Date.now();
|
|
|
|
if (args.mode === "dry-run") {
|
|
return { name: "brain-sync", ran: false, ok: true, duration_ms: 0, summary: "would: gstack-brain-sync --discover-new --once" };
|
|
}
|
|
|
|
const brainSyncPath = join(import.meta.dir, "gstack-brain-sync");
|
|
if (!existsSync(brainSyncPath)) {
|
|
return { name: "brain-sync", ran: false, ok: true, duration_ms: 0, summary: "skipped (gstack-brain-sync not installed)" };
|
|
}
|
|
|
|
spawnSync(brainSyncPath, ["--discover-new"], {
|
|
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
|
|
timeout: 60 * 1000,
|
|
});
|
|
const result = spawnSync(brainSyncPath, ["--once"], {
|
|
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
|
|
timeout: 60 * 1000,
|
|
});
|
|
|
|
return {
|
|
name: "brain-sync",
|
|
ran: true,
|
|
ok: result.status === 0,
|
|
duration_ms: Date.now() - t0,
|
|
summary: result.status === 0 ? "curated artifacts pushed" : `gstack-brain-sync exited ${result.status}`,
|
|
};
|
|
}
|
|
|
|
// ── State file ─────────────────────────────────────────────────────────────
|
|
|
|
interface SyncState {
|
|
schema_version: 1;
|
|
last_writer: string;
|
|
last_sync?: string;
|
|
last_full_sync?: string;
|
|
last_stages?: StageResult[];
|
|
}
|
|
|
|
function loadSyncState(): SyncState {
|
|
if (!existsSync(STATE_PATH)) {
|
|
return { schema_version: 1, last_writer: "gstack-gbrain-sync" };
|
|
}
|
|
try {
|
|
const raw = JSON.parse(readFileSync(STATE_PATH, "utf-8")) as SyncState;
|
|
if (raw.schema_version === 1) return raw;
|
|
} catch {
|
|
// fall through
|
|
}
|
|
return { schema_version: 1, last_writer: "gstack-gbrain-sync" };
|
|
}
|
|
|
|
/**
|
|
* Atomic state file write per /plan-eng-review D1: write tmp file then rename.
|
|
* rename(2) is atomic on POSIX filesystems.
|
|
*/
|
|
function saveSyncState(state: SyncState): void {
|
|
try {
|
|
mkdirSync(dirname(STATE_PATH), { recursive: true });
|
|
const tmp = `${STATE_PATH}.tmp.${process.pid}`;
|
|
writeFileSync(tmp, JSON.stringify(state, null, 2), "utf-8");
|
|
renameSync(tmp, STATE_PATH);
|
|
} catch {
|
|
// non-fatal
|
|
}
|
|
}
|
|
|
|
// ── Output ─────────────────────────────────────────────────────────────────
|
|
|
|
function formatStage(s: StageResult): string {
|
|
const status = !s.ran ? "SKIP" : s.ok ? "OK" : "ERR";
|
|
const dur = s.duration_ms > 0 ? ` (${(s.duration_ms / 1000).toFixed(1)}s)` : "";
|
|
return ` ${status.padEnd(5)} ${s.name.padEnd(12)} ${s.summary}${dur}`;
|
|
}
|
|
|
|
// ── Main ───────────────────────────────────────────────────────────────────
|
|
|
|
async function main(): Promise<void> {
|
|
const args = parseArgs();
|
|
|
|
if (!args.quiet) {
|
|
const engine = detectEngineTier();
|
|
console.error(`[gbrain-sync] mode=${args.mode} engine=${engine.engine}`);
|
|
}
|
|
|
|
// Acquire lock (skip on dry-run since dry-run never writes).
|
|
const needsLock = args.mode !== "dry-run";
|
|
let haveLock = false;
|
|
if (needsLock) {
|
|
haveLock = acquireLock();
|
|
if (!haveLock) {
|
|
console.error(
|
|
`[gbrain-sync] another /sync-gbrain is running (lock at ${LOCK_PATH}). ` +
|
|
`If that process died, the lock auto-clears after 5 min, or remove it manually.`
|
|
);
|
|
process.exit(2);
|
|
}
|
|
}
|
|
|
|
const cleanup = () => {
|
|
if (haveLock) releaseLock();
|
|
};
|
|
process.on("SIGINT", () => { cleanup(); process.exit(130); });
|
|
process.on("SIGTERM", () => { cleanup(); process.exit(143); });
|
|
|
|
let exitCode = 0;
|
|
try {
|
|
const state = loadSyncState();
|
|
const stages: StageResult[] = [];
|
|
|
|
if (!args.noCode) {
|
|
stages.push(await withErrorContext("sync:code", () => runCodeImport(args), "gstack-gbrain-sync"));
|
|
}
|
|
if (!args.noMemory) {
|
|
stages.push(await withErrorContext("sync:memory", () => runMemoryIngest(args), "gstack-gbrain-sync"));
|
|
}
|
|
if (!args.noBrainSync) {
|
|
stages.push(await withErrorContext("sync:brain-sync", () => runBrainSyncPush(args), "gstack-gbrain-sync"));
|
|
}
|
|
|
|
if (args.mode !== "dry-run") {
|
|
state.last_sync = new Date().toISOString();
|
|
if (args.mode === "full") state.last_full_sync = state.last_sync;
|
|
state.last_stages = stages;
|
|
saveSyncState(state);
|
|
}
|
|
|
|
if (!args.quiet || args.mode === "dry-run") {
|
|
console.log(`\ngstack-gbrain-sync (${args.mode}):`);
|
|
for (const s of stages) console.log(formatStage(s));
|
|
const okCount = stages.filter((s) => s.ok).length;
|
|
const errCount = stages.filter((s) => !s.ok && s.ran).length;
|
|
console.log(`\n ${okCount} ok, ${errCount} error, ${stages.length - okCount - errCount} skipped`);
|
|
}
|
|
|
|
const anyError = stages.some((s) => s.ran && !s.ok);
|
|
exitCode = anyError ? 1 : 0;
|
|
} finally {
|
|
cleanup();
|
|
}
|
|
|
|
process.exit(exitCode);
|
|
}
|
|
|
|
if (import.meta.main) {
|
|
main().catch((err) => {
|
|
console.error(`gstack-gbrain-sync fatal: ${err instanceof Error ? err.message : String(err)}`);
|
|
releaseLock();
|
|
process.exit(1);
|
|
});
|
|
}
|