mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-18 15:50:11 +02:00
fix(gbrain-sync): fold hostname into code-source id hash + migration (#1414)
Cherry-picked from #1468 by 0xDevNinja and extended with the hostname-fold migration that codex review surfaced. Pre-fix `deriveCodeSourceId` hashed the absolute repo path alone, so two machines with identical home-dir layouts (chezmoi-managed dotfiles, ansible-provisioned VMs) derived the same id and clobbered each other's `local_path` in a federated brain. Last-writer-wins, with cryptic "Not a git repository" errors on the loser. Hash key is now `\${hostname}::\${path}`. Conductor worktrees on a single host stay distinct (path entropy unchanged within a host); cross-machine federations stop colliding. Migration (D1=B + codex refinements): every existing user has a pre-#1468 path-only-hash source id in their brain that no longer matches what `deriveCodeSourceId` produces. Without migration, the next sync registers a fresh source and orphans the old one. This commit adds: - \`derivePathOnlyHashLegacyId\` — separate helper for the pre-#1468 form. Distinct from \`deriveLegacyCodeSourceId\` (pre-pathhash v1.x form); both probes run. - \`planHostnameFoldMigration\` — feature-checks \`gbrain sources rename <old> <new>\` (exact argument shape, not just \`--help\`), gates on path-drift (skip migration if old source's \`local_path\` differs from current repo root), and falls back to register-new + sync-OK + remove-old when rename is unsupported. As of gbrain 0.35.0.0 the rename subcommand does not exist, so users go through the cleanup path; the rename path stays dormant until gbrain ships it. - \`removeOrphanedSource\` — called only AFTER new-source sync verifies page_count > 0. Closes the data-loss window codex flagged where "register new, remove old before sync" can wipe pages if sync fails. - \`sourceLocalPath\` — looks up a source's \`local_path\` from \`gbrain sources list --json\` for the drift gate. - Helpers accept an optional \`env\` parameter so tests can inject a gbrain shim via PATH without process-wide PATH mutation (Bun's spawnSync doesn't pick up runtime PATH changes). Pre-positions for commit 4's centralized gbrain-exec helper. - \`if (import.meta.main)\` guard around \`main()\` so the helpers can be imported for in-process unit tests. Tests cover: pure derivation, ids-match degenerate case, no-legacy short-circuit, path-drift skip path, rename path with shim, cleanup fallback when rename unsupported, cleanup fallback when rename call itself fails, source-lookup happy/missing/error paths. \`GSTACK_HOSTNAME\` env var is a test-only knob; production uses \`os.hostname()\`. Fixes #1414 Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -7,12 +7,19 @@
|
||||
* preview + state file lifecycle + flag composition.
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "bun:test";
|
||||
import { mkdtempSync, writeFileSync, readFileSync, existsSync, rmSync, mkdirSync } from "fs";
|
||||
import { describe, it, expect, beforeEach, afterEach } from "bun:test";
|
||||
import { mkdtempSync, writeFileSync, readFileSync, existsSync, rmSync, mkdirSync, chmodSync } from "fs";
|
||||
import { tmpdir } from "os";
|
||||
import { join } from "path";
|
||||
import { spawnSync } from "child_process";
|
||||
|
||||
import {
|
||||
derivePathOnlyHashLegacyId,
|
||||
planHostnameFoldMigration,
|
||||
sourceLocalPath,
|
||||
_resetGbrainSupportsRenameCache,
|
||||
} from "../bin/gstack-gbrain-sync";
|
||||
|
||||
const SCRIPT = join(import.meta.dir, "..", "bin", "gstack-gbrain-sync.ts");
|
||||
|
||||
function makeTestHome(): string {
|
||||
@@ -215,6 +222,62 @@ describe("gstack-gbrain-sync CLI", () => {
|
||||
rmSync(home, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("derives distinct source ids for the same absolute path on different hosts", () => {
|
||||
// Issue #1414: two machines with identical home-dir layouts (chezmoi-managed
|
||||
// dotfiles, ansible-provisioned VMs) collide on the same source id when
|
||||
// federated against a shared gbrain DB, because the pre-fix `pathHash` was
|
||||
// sha1(absolute path) only — host-agnostic. Folding hostname into the hash
|
||||
// key keeps them distinct. `GSTACK_HOSTNAME` env var is the test-only knob;
|
||||
// production uses `os.hostname()`.
|
||||
const home = makeTestHome();
|
||||
const gstackHome = join(home, ".gstack");
|
||||
mkdirSync(gstackHome, { recursive: true });
|
||||
const repo = mkdtempSync(join(tmpdir(), "gstack-host-collide-"));
|
||||
spawnSync("git", ["init", "--quiet", "-b", "main"], { cwd: repo });
|
||||
spawnSync("git", ["remote", "add", "origin", "https://github.com/example/multihost.git"], { cwd: repo });
|
||||
|
||||
// Dry-run still gates the code stage on `command -v gbrain`. Drop a no-op
|
||||
// shim on PATH so the stage runs (we only assert the preview line, never
|
||||
// invoke gbrain itself).
|
||||
const bindir = mkdtempSync(join(tmpdir(), "gstack-host-collide-bin-"));
|
||||
const shim = join(bindir, "gbrain");
|
||||
writeFileSync(shim, "#!/bin/sh\nexit 0\n");
|
||||
chmodSync(shim, 0o755);
|
||||
const PATH = `${bindir}:${process.env.PATH || ""}`;
|
||||
|
||||
const runAs = (host: string) =>
|
||||
spawnSync("bun", [SCRIPT, "--dry-run", "--code-only", "--quiet"], {
|
||||
encoding: "utf-8",
|
||||
timeout: 60000,
|
||||
cwd: repo,
|
||||
env: { ...process.env, HOME: home, GSTACK_HOME: gstackHome, GSTACK_HOSTNAME: host, PATH },
|
||||
});
|
||||
|
||||
const a = runAs("machine-a");
|
||||
const b = runAs("machine-b");
|
||||
expect(a.status).toBe(0);
|
||||
expect(b.status).toBe(0);
|
||||
const idA = (a.stdout || "").match(/gbrain sources add (\S+)/)?.[1];
|
||||
const idB = (b.stdout || "").match(/gbrain sources add (\S+)/)?.[1];
|
||||
expect(idA).toBeTruthy();
|
||||
expect(idB).toBeTruthy();
|
||||
expect(idA).not.toBe(idB);
|
||||
// Both still gbrain-valid.
|
||||
const VALID_ID = /^[a-z0-9](?:[a-z0-9-]{0,30}[a-z0-9])?$/;
|
||||
expect(idA!).toMatch(VALID_ID);
|
||||
expect(idB!).toMatch(VALID_ID);
|
||||
|
||||
// Same host + same path stays stable across invocations.
|
||||
const a2 = runAs("machine-a");
|
||||
expect(a2.status).toBe(0);
|
||||
const idA2 = (a2.stdout || "").match(/gbrain sources add (\S+)/)?.[1];
|
||||
expect(idA2).toBe(idA);
|
||||
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
rmSync(home, { recursive: true, force: true });
|
||||
rmSync(bindir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("dry-run does NOT acquire the lock file (lock is for write paths only)", () => {
|
||||
const home = makeTestHome();
|
||||
const gstackHome = join(home, ".gstack");
|
||||
@@ -476,3 +539,227 @@ describe("gstack-gbrain-sync CLI", () => {
|
||||
rmSync(home, { recursive: true, force: true });
|
||||
});
|
||||
});
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Hostname-fold migration (v1.40.0.0)
|
||||
//
|
||||
// Tests for `derivePathOnlyHashLegacyId` and `planHostnameFoldMigration`,
|
||||
// which together let an existing user's pre-#1468 path-only-hash source
|
||||
// transition to the new hostname-folded id without orphaning pages or
|
||||
// creating a data-loss window. See bin/gstack-gbrain-sync.ts and the
|
||||
// gbrain-sync-hardening plan.
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Build a gbrain shim that responds to specific subcommands with canned
|
||||
* output, then return PATH-prepend value. Lets us run helpers in-process
|
||||
* (which spawn `gbrain` from PATH) without a real gbrain CLI.
|
||||
*/
|
||||
function makeShim(bindir: string, responses: Record<string, { stdout?: string; stderr?: string; exit?: number }>): string {
|
||||
const shim = join(bindir, "gbrain");
|
||||
const cases = Object.entries(responses).map(([key, r]) => {
|
||||
const exit = r.exit ?? 0;
|
||||
const stdout = (r.stdout || "").replace(/'/g, "'\\''");
|
||||
const stderr = (r.stderr || "").replace(/'/g, "'\\''");
|
||||
// Patterns with spaces MUST be double-quoted in sh case statements,
|
||||
// otherwise the shell parses the second word as the start of the next
|
||||
// pattern and errors out.
|
||||
return ` "${key}") printf '%s' '${stdout}'; printf '%s' '${stderr}' >&2; exit ${exit} ;;`;
|
||||
}).join("\n");
|
||||
// Match on the full argument string, joined with literal spaces.
|
||||
const script = `#!/bin/sh\nARGS="$*"\ncase "$ARGS" in\n${cases}\n *) echo "shim: no match for [$ARGS]" >&2; exit 1 ;;\nesac\n`;
|
||||
writeFileSync(shim, script);
|
||||
chmodSync(shim, 0o755);
|
||||
return shim;
|
||||
}
|
||||
|
||||
describe("derivePathOnlyHashLegacyId", () => {
|
||||
it("returns the pre-#1468 form (path-only sha1, no hostname)", () => {
|
||||
// Pure function — no subprocess. The same repoPath must yield the same
|
||||
// legacy id regardless of $GSTACK_HOSTNAME, because the pre-#1468 hash
|
||||
// didn't include hostname.
|
||||
const repo = mkdtempSync(join(tmpdir(), "gstack-legacy-id-"));
|
||||
spawnSync("git", ["init", "--quiet", "-b", "main"], { cwd: repo });
|
||||
spawnSync("git", ["remote", "add", "origin", "https://github.com/example/legacy-test.git"], { cwd: repo });
|
||||
|
||||
const cwd = process.cwd();
|
||||
try {
|
||||
process.chdir(repo);
|
||||
const a = derivePathOnlyHashLegacyId(repo);
|
||||
process.env.GSTACK_HOSTNAME = "machine-a";
|
||||
const b = derivePathOnlyHashLegacyId(repo);
|
||||
process.env.GSTACK_HOSTNAME = "machine-b";
|
||||
const c = derivePathOnlyHashLegacyId(repo);
|
||||
expect(a).toBe(b);
|
||||
expect(b).toBe(c);
|
||||
expect(a.startsWith("gstack-code-")).toBe(true);
|
||||
expect(a.length).toBeLessThanOrEqual(32);
|
||||
} finally {
|
||||
delete process.env.GSTACK_HOSTNAME;
|
||||
process.chdir(cwd);
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("produces a different id than the new hostname-folded form", () => {
|
||||
// The whole point of the migration: the path-only-hash legacy id and the
|
||||
// host-fold id must differ for any non-empty hostname, so the migration
|
||||
// can detect + clean up the orphan.
|
||||
const repo = mkdtempSync(join(tmpdir(), "gstack-legacy-id-distinct-"));
|
||||
spawnSync("git", ["init", "--quiet", "-b", "main"], { cwd: repo });
|
||||
spawnSync("git", ["remote", "add", "origin", "https://github.com/example/distinct.git"], { cwd: repo });
|
||||
|
||||
const cwd = process.cwd();
|
||||
try {
|
||||
process.chdir(repo);
|
||||
process.env.GSTACK_HOSTNAME = "machine-x";
|
||||
const legacy = derivePathOnlyHashLegacyId(repo);
|
||||
// Drive the new id through the CLI so we use the same code path users hit.
|
||||
const home = makeTestHome();
|
||||
const gstackHome = join(home, ".gstack");
|
||||
mkdirSync(gstackHome, { recursive: true });
|
||||
const bindir = mkdtempSync(join(tmpdir(), "gstack-legacy-id-distinct-bin-"));
|
||||
makeShim(bindir, { "--help": { stdout: "gbrain\n" } });
|
||||
const r = spawnSync("bun", [SCRIPT, "--dry-run", "--code-only", "--quiet"], {
|
||||
encoding: "utf-8",
|
||||
timeout: 60000,
|
||||
cwd: repo,
|
||||
env: { ...process.env, HOME: home, GSTACK_HOME: gstackHome, GSTACK_HOSTNAME: "machine-x", PATH: `${bindir}:${process.env.PATH || ""}` },
|
||||
});
|
||||
const newId = (r.stdout || "").match(/gbrain sources add (\S+)/)?.[1];
|
||||
expect(newId).toBeTruthy();
|
||||
expect(newId).not.toBe(legacy);
|
||||
rmSync(home, { recursive: true, force: true });
|
||||
rmSync(bindir, { recursive: true, force: true });
|
||||
} finally {
|
||||
delete process.env.GSTACK_HOSTNAME;
|
||||
process.chdir(cwd);
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Build an env dict that prepends `bindir` to PATH. Bun's spawnSync does NOT
|
||||
* pick up runtime mutations of `process.env.PATH` — the env must be passed
|
||||
* explicitly to each spawn for the override to take effect.
|
||||
*/
|
||||
function envWithBindir(bindir: string): NodeJS.ProcessEnv {
|
||||
return { ...process.env, PATH: `${bindir}:${process.env.PATH || ""}` };
|
||||
}
|
||||
|
||||
describe("planHostnameFoldMigration", () => {
|
||||
let bindir: string;
|
||||
|
||||
beforeEach(() => {
|
||||
bindir = mkdtempSync(join(tmpdir(), "gstack-mig-plan-bin-"));
|
||||
_resetGbrainSupportsRenameCache();
|
||||
});
|
||||
afterEach(() => {
|
||||
rmSync(bindir, { recursive: true, force: true });
|
||||
_resetGbrainSupportsRenameCache();
|
||||
});
|
||||
|
||||
it("returns ids-match when legacy == new (degenerate case)", () => {
|
||||
const result = planHostnameFoldMigration("/repo/path", "gstack-code-same-abc12345", "gstack-code-same-abc12345");
|
||||
expect(result).toEqual({ kind: "none", reason: "ids-match" });
|
||||
});
|
||||
|
||||
it("returns no-legacy-source when sources list does not include the legacy id", () => {
|
||||
makeShim(bindir, {
|
||||
"sources list --json": { stdout: "[]" },
|
||||
});
|
||||
const result = planHostnameFoldMigration("/repo/path", "new-id", "legacy-id", envWithBindir(bindir));
|
||||
expect(result).toEqual({ kind: "none", reason: "no-legacy-source" });
|
||||
});
|
||||
|
||||
it("returns skipped-path-drift when old source local_path differs from current repo root", () => {
|
||||
makeShim(bindir, {
|
||||
"sources list --json": {
|
||||
stdout: JSON.stringify([{ id: "legacy-id", local_path: "/some/other/repo" }]),
|
||||
},
|
||||
});
|
||||
const result = planHostnameFoldMigration("/repo/here", "new-id", "legacy-id", envWithBindir(bindir));
|
||||
expect(result.kind).toBe("skipped-path-drift");
|
||||
if (result.kind === "skipped-path-drift") {
|
||||
expect(result.oldId).toBe("legacy-id");
|
||||
expect(result.oldPath).toBe("/some/other/repo");
|
||||
expect(result.currentPath).toBe("/repo/here");
|
||||
}
|
||||
});
|
||||
|
||||
it("returns renamed when rename is supported and exits 0", () => {
|
||||
makeShim(bindir, {
|
||||
"sources list --json": {
|
||||
stdout: JSON.stringify([{ id: "legacy-id", local_path: "/repo/here" }]),
|
||||
},
|
||||
"sources rename --help": {
|
||||
stdout: "Usage: gbrain sources rename <old> <new>\n",
|
||||
},
|
||||
"sources rename legacy-id new-id": { exit: 0 },
|
||||
});
|
||||
const result = planHostnameFoldMigration("/repo/here", "new-id", "legacy-id", envWithBindir(bindir));
|
||||
expect(result).toEqual({ kind: "renamed", oldId: "legacy-id", newId: "new-id" });
|
||||
});
|
||||
|
||||
it("returns pending-cleanup when rename is unsupported (current gbrain 0.35.0.0)", () => {
|
||||
makeShim(bindir, {
|
||||
"sources list --json": {
|
||||
stdout: JSON.stringify([{ id: "legacy-id", local_path: "/repo/here" }]),
|
||||
},
|
||||
// No `sources rename --help` match → shim falls into the catch-all and exits 1.
|
||||
});
|
||||
const result = planHostnameFoldMigration("/repo/here", "new-id", "legacy-id", envWithBindir(bindir));
|
||||
expect(result).toEqual({ kind: "pending-cleanup", oldId: "legacy-id" });
|
||||
});
|
||||
|
||||
it("returns pending-cleanup when rename is supported but the rename call itself fails", () => {
|
||||
makeShim(bindir, {
|
||||
"sources list --json": {
|
||||
stdout: JSON.stringify([{ id: "legacy-id", local_path: "/repo/here" }]),
|
||||
},
|
||||
"sources rename --help": {
|
||||
stdout: "Usage: gbrain sources rename <old> <new>\n",
|
||||
},
|
||||
"sources rename legacy-id new-id": { exit: 1, stderr: "rename failed: db locked" },
|
||||
});
|
||||
const result = planHostnameFoldMigration("/repo/here", "new-id", "legacy-id", envWithBindir(bindir));
|
||||
expect(result).toEqual({ kind: "pending-cleanup", oldId: "legacy-id" });
|
||||
});
|
||||
});
|
||||
|
||||
describe("sourceLocalPath", () => {
|
||||
let bindir: string;
|
||||
beforeEach(() => {
|
||||
bindir = mkdtempSync(join(tmpdir(), "gstack-source-lp-bin-"));
|
||||
});
|
||||
afterEach(() => {
|
||||
rmSync(bindir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("returns local_path when the source exists", () => {
|
||||
makeShim(bindir, {
|
||||
"sources list --json": {
|
||||
stdout: JSON.stringify([
|
||||
{ id: "other-source", local_path: "/x" },
|
||||
{ id: "target-id", local_path: "/repo/match" },
|
||||
]),
|
||||
},
|
||||
});
|
||||
expect(sourceLocalPath("target-id", envWithBindir(bindir))).toBe("/repo/match");
|
||||
});
|
||||
|
||||
it("returns null when the source is missing", () => {
|
||||
makeShim(bindir, {
|
||||
"sources list --json": { stdout: "[]" },
|
||||
});
|
||||
expect(sourceLocalPath("missing-id", envWithBindir(bindir))).toBeNull();
|
||||
});
|
||||
|
||||
it("returns null when gbrain exits non-zero or returns malformed JSON", () => {
|
||||
makeShim(bindir, {
|
||||
"sources list --json": { exit: 2, stderr: "db unreachable" },
|
||||
});
|
||||
expect(sourceLocalPath("any-id", envWithBindir(bindir))).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user