mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-08 19:13:56 +02:00
* fix(sync): fail-closed staging-dir ownership guard — prevent rm -rf of repo (#1802) Adopts community fix #1827 by @diazMelgarejo (cyre). New lib/staging-guard.ts exports checkOwnedStagingDir(), the single fail-closed predicate for 'safe to recurse-delete or resume into', wired at cleanupStagingDir() (the deletion chokepoint), decideResume(), the ingest entry point, and makeStagingDir() (mints the .gstack-staging marker). Fixes #1802. Co-Authored-By: cyre <diazMelgarejo@users.noreply.github.com> Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(sync): don't route the remote-http persistent transcript dir through cleanup (#1802) The ingest finally ran cleanupStagingDir() unconditionally, but in remote-http mode stagingDir is the PERSISTENT transcript dir (~/.gstack/transcripts/) that gstack-brain-sync push must consume. The remote-http branch documents the intent to skip cleanup, but a finally runs on its return. Gate the call on !remoteHttpMode so the ownership guard only ever sees .staging-ingest-* dirs. Pre-gate this dir was deleted outright (broken artifacts handoff); post-#1827 it produced a false 'prevent data loss' warning every sync. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(sync): preserve staging dir on internal import timeout (#1802 C3) The import-timeout branch printed 'checkpoint preserved' but the finally then deleted the staging dir: the SIGTERM forwarder's preserve branch only runs when the PARENT is signalled, and an internal runGbrainImport timeout kills just the child and returns normally. So #1611 resume-after-timeout never actually worked. Mirror the forwarder in the timeout branch: set preserveStaging only when gbrain checkpointed against this dir (finally then skips cleanup); otherwise clean up and tell the user it restages instead of falsely promising a resume. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(sync): resume must not mark failed files as ingested (#1802 C4) On resume, stagedPathToSource was rebuilt as an empty Map, so readNewFailures() could not map gbrain's per-file failures back to source paths. Every failure fell through to state recording — failed files were silently marked ingested and never retried. Reconstruct the map from the prepared pages via a shared stagedRelPath() helper (single source of truth with writeStaged, so the keys can never drift). Exports stagedRelPath + readNewFailures for a behavioral test proving the reconstructed map recovers the failure the empty map dropped. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * harden(sync): close staging-guard TOCTOU + fail hard on marker write (#1802 C5) checkOwnedStagingDir() now returns the realpath-resolved canonicalPath on a pass, and cleanupStagingDir() rmSync's that instead of the raw input — closing the gap where the input is a symlink swapped between the ownership check and the delete. makeStagingDir() tears down the partial dir and rethrows if the marker write fails, so a marker-less dir (which the guard would refuse forever) can never leak. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * chore: v1.56.1.0 — staging-dir ownership guard + resume-correctness fixes (#1802) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * ci: grant the eval report job issues:write so PR comment upsert stops 401ing Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --------- Co-authored-by: cyre <diazMelgarejo@users.noreply.github.com> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -35,6 +35,8 @@ import {
|
||||
readGbrainCheckpoint,
|
||||
decideResume,
|
||||
} from "../bin/gstack-gbrain-sync";
|
||||
import { checkOwnedStagingDir, STAGING_MARKER } from "../lib/staging-guard";
|
||||
import { stagedRelPath, readNewFailures } from "../bin/gstack-memory-ingest";
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, "..");
|
||||
const DEFAULT_MS = 35 * 60 * 1000;
|
||||
@@ -132,9 +134,11 @@ describe("#1611 decideResume — checkpoint + staging detection", () => {
|
||||
expect(decideResume().kind).toBe("no-checkpoint");
|
||||
});
|
||||
|
||||
test("checkpoint + staging dir exists → resume verdict", () => {
|
||||
test("checkpoint + minted staging dir exists → resume verdict", () => {
|
||||
fs.mkdirSync(stagingDir, { recursive: true });
|
||||
fs.writeFileSync(stagingDir + "/page1.md", "content", "utf-8");
|
||||
// #1802: a real staging dir carries the ownership marker minted by makeStagingDir.
|
||||
fs.writeFileSync(path.join(stagingDir, STAGING_MARKER), "99\n99\n", "utf-8");
|
||||
fs.writeFileSync(cpPath, JSON.stringify({
|
||||
dir: stagingDir,
|
||||
totalFiles: 1989,
|
||||
@@ -143,7 +147,8 @@ describe("#1611 decideResume — checkpoint + staging detection", () => {
|
||||
timestamp: "2026-05-19T19:30:05.008Z",
|
||||
}), "utf-8");
|
||||
|
||||
const v = decideResume();
|
||||
// gstackHome is injected so the ownership check anchors on the test home.
|
||||
const v = decideResume(tmpHome);
|
||||
expect(v.kind).toBe("resume");
|
||||
if (v.kind === "resume") {
|
||||
expect(v.stagingDir).toBe(stagingDir);
|
||||
@@ -160,13 +165,41 @@ describe("#1611 decideResume — checkpoint + staging detection", () => {
|
||||
processedIndex: 1000,
|
||||
}), "utf-8");
|
||||
|
||||
const v = decideResume();
|
||||
const v = decideResume(tmpHome);
|
||||
expect(v.kind).toBe("stale-staging-missing");
|
||||
if (v.kind === "stale-staging-missing") {
|
||||
expect(v.stagingDir).toBe(stagingDir);
|
||||
}
|
||||
});
|
||||
|
||||
// ── #1802 regression: poisoned checkpoint must never be adopted/deleted ────
|
||||
|
||||
test("#1802 checkpoint.dir = repo root with .git → stale-staging-missing (not resumed)", () => {
|
||||
// Reproduces the exact poison: an interrupted import wrote checkpoint.dir =
|
||||
// the repo working tree. It exists and is a directory, so the pre-#1802
|
||||
// code resumed (and cleanup later rm -rf'd it). It must now be refused.
|
||||
const repoRoot = path.join(tmpHome, "my-repo");
|
||||
fs.mkdirSync(path.join(repoRoot, ".git"), { recursive: true });
|
||||
fs.writeFileSync(path.join(repoRoot, "important.py"), "# real work\n", "utf-8");
|
||||
fs.writeFileSync(cpPath, JSON.stringify({ dir: repoRoot, totalFiles: 10, processedIndex: 3 }), "utf-8");
|
||||
|
||||
const v = decideResume(tmpHome);
|
||||
expect(v.kind).toBe("stale-staging-missing");
|
||||
// decideResume never deletes, but prove the repo is untouched by the verdict.
|
||||
expect(fs.existsSync(path.join(repoRoot, "important.py"))).toBe(true);
|
||||
});
|
||||
|
||||
test("#1802 staging-named dir WITHOUT marker → stale-staging-missing (not minted by us)", () => {
|
||||
fs.mkdirSync(stagingDir, { recursive: true }); // .staging-ingest-99-99, but no marker
|
||||
fs.writeFileSync(cpPath, JSON.stringify({ dir: stagingDir, totalFiles: 1, processedIndex: 0 }), "utf-8");
|
||||
expect(decideResume(tmpHome).kind).toBe("stale-staging-missing");
|
||||
});
|
||||
|
||||
test("#1802 checkpoint.dir = '/' → stale-staging-missing", () => {
|
||||
fs.writeFileSync(cpPath, JSON.stringify({ dir: "/", totalFiles: 1, processedIndex: 0 }), "utf-8");
|
||||
expect(decideResume(tmpHome).kind).toBe("stale-staging-missing");
|
||||
});
|
||||
|
||||
test("checkpoint with no dir field → no-checkpoint verdict", () => {
|
||||
fs.writeFileSync(cpPath, JSON.stringify({
|
||||
totalFiles: 1989,
|
||||
@@ -222,6 +255,233 @@ describe("#1611 SIGTERM staging preservation — static invariants", () => {
|
||||
);
|
||||
expect(body).toMatch(/GSTACK_INGEST_RESUME_DIR/);
|
||||
expect(body).toMatch(/resuming from gbrain checkpoint/);
|
||||
expect(body).toMatch(/previous checkpoint stale.*staging dir.*gone.*restaging from scratch/);
|
||||
expect(body).toMatch(/previous checkpoint stale/);
|
||||
expect(body).toMatch(/restaging from scratch/);
|
||||
// #1802: the caller distinguishes "refused as unowned" from "actually gone".
|
||||
expect(body).toMatch(/staging dir not usable/);
|
||||
});
|
||||
});
|
||||
|
||||
// ── #1802 checkOwnedStagingDir — fail-closed ownership matrix ───────────────
|
||||
// The single predicate guarding both the resume gate (decideResume) and the
|
||||
// deletion chokepoint (cleanupStagingDir). Every branch is fail-closed: any
|
||||
// case it cannot prove is owned must return ok:false.
|
||||
describe("#1802 checkOwnedStagingDir — ownership matrix", () => {
|
||||
let home: string;
|
||||
|
||||
beforeEach(() => {
|
||||
home = fs.mkdtempSync(path.join(os.tmpdir(), "gstack-1802-"));
|
||||
});
|
||||
afterEach(() => {
|
||||
try { fs.rmSync(home, { recursive: true, force: true }); } catch { /* best-effort */ }
|
||||
});
|
||||
|
||||
function mintStaging(name = ".staging-ingest-1-1"): string {
|
||||
const d = path.join(home, name);
|
||||
fs.mkdirSync(d, { recursive: true });
|
||||
fs.writeFileSync(path.join(d, STAGING_MARKER), "1\n1\n", "utf-8");
|
||||
return d;
|
||||
}
|
||||
|
||||
test("minted staging dir → ok", () => {
|
||||
expect(checkOwnedStagingDir(mintStaging(), home).ok).toBe(true);
|
||||
});
|
||||
|
||||
test("#1802 C5: ok verdict carries the realpath-resolved canonicalPath", () => {
|
||||
const d = mintStaging();
|
||||
const v = checkOwnedStagingDir(d, home);
|
||||
expect(v.ok).toBe(true);
|
||||
// Callers must delete this (not the raw input) to close the symlink TOCTOU.
|
||||
expect(v.canonicalPath).toBe(fs.realpathSync(d));
|
||||
});
|
||||
|
||||
test("repo root (direct child, has .git, no marker) → refused", () => {
|
||||
const repo = path.join(home, "my-repo");
|
||||
fs.mkdirSync(path.join(repo, ".git"), { recursive: true });
|
||||
expect(checkOwnedStagingDir(repo, home).ok).toBe(false);
|
||||
});
|
||||
|
||||
test("staging-named dir containing .git → refused by tripwire even with marker", () => {
|
||||
const d = mintStaging(".staging-ingest-9-9");
|
||||
fs.mkdirSync(path.join(d, ".git"), { recursive: true });
|
||||
const v = checkOwnedStagingDir(d, home);
|
||||
expect(v.ok).toBe(false);
|
||||
expect(v.reason).toMatch(/\.git/);
|
||||
});
|
||||
|
||||
test("staging-named dir without marker → refused (not minted)", () => {
|
||||
const d = path.join(home, ".staging-ingest-2-2");
|
||||
fs.mkdirSync(d, { recursive: true });
|
||||
expect(checkOwnedStagingDir(d, home).ok).toBe(false);
|
||||
});
|
||||
|
||||
test("right name but NOT a direct child of home → refused", () => {
|
||||
const nested = path.join(home, "sub", ".staging-ingest-3-3");
|
||||
fs.mkdirSync(nested, { recursive: true });
|
||||
fs.writeFileSync(path.join(nested, STAGING_MARKER), "x", "utf-8");
|
||||
expect(checkOwnedStagingDir(nested, home).ok).toBe(false);
|
||||
});
|
||||
|
||||
test("direct child of home but wrong name → refused", () => {
|
||||
const d = path.join(home, "notstaging");
|
||||
fs.mkdirSync(d, { recursive: true });
|
||||
fs.writeFileSync(path.join(d, STAGING_MARKER), "x", "utf-8");
|
||||
expect(checkOwnedStagingDir(d, home).ok).toBe(false);
|
||||
});
|
||||
|
||||
test("missing path → refused (unresolvable)", () => {
|
||||
expect(checkOwnedStagingDir(path.join(home, ".staging-ingest-gone"), home).ok).toBe(false);
|
||||
});
|
||||
|
||||
test("'/' and '' → refused", () => {
|
||||
expect(checkOwnedStagingDir("/", home).ok).toBe(false);
|
||||
expect(checkOwnedStagingDir("", home).ok).toBe(false);
|
||||
});
|
||||
|
||||
test("symlink whose target escapes home → refused (realpath resolves first)", () => {
|
||||
const outside = path.join(home, "..", path.basename(home) + "-outside");
|
||||
fs.mkdirSync(outside, { recursive: true });
|
||||
const link = path.join(home, ".staging-ingest-link");
|
||||
fs.symlinkSync(outside, link);
|
||||
try {
|
||||
// realpathSync resolves the link to `outside`, whose parent is not `home`.
|
||||
expect(checkOwnedStagingDir(link, home).ok).toBe(false);
|
||||
} finally {
|
||||
try { fs.rmSync(outside, { recursive: true, force: true }); } catch { /* best-effort */ }
|
||||
}
|
||||
});
|
||||
|
||||
test("cleanupStagingDir + decideResume both call the guard (static invariant)", () => {
|
||||
const ingest = fs.readFileSync(path.join(ROOT, "bin", "gstack-memory-ingest.ts"), "utf-8");
|
||||
const sync = fs.readFileSync(path.join(ROOT, "bin", "gstack-gbrain-sync.ts"), "utf-8");
|
||||
expect(ingest).toMatch(/checkOwnedStagingDir\(dir, GSTACK_HOME\)/);
|
||||
expect(ingest).toMatch(/staging cleanup REFUSED/);
|
||||
expect(sync).toMatch(/checkOwnedStagingDir\(stagingDir, gstackHome\)/);
|
||||
});
|
||||
});
|
||||
|
||||
// ── #1802 D1: remote-http persistent dir must never hit cleanupStagingDir ───
|
||||
// In remote-http mode `stagingDir` is the PERSISTENT transcript dir
|
||||
// (makePersistentTranscriptDir, under ~/.gstack/transcripts/) that
|
||||
// gstack-brain-sync push consumes. The finally runs on the remote-http `return`,
|
||||
// so the cleanup call there must be gated on `!remoteHttpMode` — otherwise the
|
||||
// guard refuses it on every sync (false "prevent data loss" warning) and, pre-
|
||||
// gate, the dir was deleted outright (broken artifacts handoff).
|
||||
describe("#1802 D1 — remote-http finally gate (static invariant)", () => {
|
||||
const ingest = fs.readFileSync(
|
||||
path.join(ROOT, "bin", "gstack-memory-ingest.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
test("finally gates cleanupStagingDir on !remoteHttpMode", () => {
|
||||
// Tolerates additional guards (e.g. C3's !preserveStaging) in the same
|
||||
// condition — the load-bearing invariant is that remote-http never deletes.
|
||||
expect(ingest).toMatch(/if \(!remoteHttpMode[^)]*\) cleanupStagingDir\(stagingDir\)/);
|
||||
});
|
||||
|
||||
test("the only finally-scoped cleanup call is the gated one", () => {
|
||||
// Locate the finally block and assert it does not contain a bare
|
||||
// `cleanupStagingDir(stagingDir);` that would run regardless of mode.
|
||||
const finallyAt = ingest.lastIndexOf("} finally {");
|
||||
expect(finallyAt).toBeGreaterThan(-1);
|
||||
const finallySlice = ingest.slice(finallyAt, finallyAt + 800);
|
||||
expect(finallySlice).not.toMatch(/^\s*cleanupStagingDir\(stagingDir\);/m);
|
||||
});
|
||||
});
|
||||
|
||||
// ── #1802 C3: internal import-timeout must preserve a checkpointed staging dir ─
|
||||
// runGbrainImport kills only the child on an internal timeout; the parent
|
||||
// returns normally, so the SIGTERM forwarder's preserve branch never runs. The
|
||||
// timeout branch must mirror it (preserve when checkpointed) and the finally
|
||||
// must honor that — otherwise "checkpoint preserved" is a lie and resume breaks.
|
||||
describe("#1802 C3 — import-timeout preserve (static invariant)", () => {
|
||||
const ingest = fs.readFileSync(
|
||||
path.join(ROOT, "bin", "gstack-memory-ingest.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
test("timeout branch checks stagingDirIsCheckpointed and sets preserveStaging", () => {
|
||||
const timeoutAt = ingest.indexOf("if (importResult.timedOut)");
|
||||
expect(timeoutAt).toBeGreaterThan(-1);
|
||||
const slice = ingest.slice(timeoutAt, timeoutAt + 1200);
|
||||
expect(slice).toMatch(/stagingDirIsCheckpointed\(stagingDir\)/);
|
||||
expect(slice).toMatch(/preserveStaging = true/);
|
||||
// The not-checkpointed path must say so honestly rather than promising resume.
|
||||
expect(slice).toMatch(/before writing a checkpoint/);
|
||||
});
|
||||
|
||||
test("finally honors preserveStaging", () => {
|
||||
expect(ingest).toMatch(
|
||||
/if \(!remoteHttpMode && !preserveStaging\) cleanupStagingDir\(stagingDir\)/,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ── #1802 C5: hardening (static invariant) ─────────────────────────────────
|
||||
describe("#1802 C5 — hardening (static invariant)", () => {
|
||||
const ingest = fs.readFileSync(
|
||||
path.join(ROOT, "bin", "gstack-memory-ingest.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
test("cleanupStagingDir deletes the canonical path, not the raw input", () => {
|
||||
expect(ingest).toMatch(/rmSync\(verdict\.canonicalPath \?\? dir/);
|
||||
});
|
||||
|
||||
test("makeStagingDir tears down + rethrows if the marker write fails", () => {
|
||||
const at = ingest.indexOf("function makeStagingDir");
|
||||
expect(at).toBeGreaterThan(-1);
|
||||
const slice = ingest.slice(at, at + 800);
|
||||
expect(slice).toMatch(/catch \(err\)/);
|
||||
expect(slice).toMatch(/rmSync\(dir, \{ recursive: true, force: true \}\)/);
|
||||
expect(slice).toMatch(/throw err/);
|
||||
});
|
||||
});
|
||||
|
||||
// ── #1802 C4: resume must not mark failed files as ingested ─────────────────
|
||||
// readNewFailures() maps gbrain's per-file failures (keyed by staging-relative
|
||||
// path) back to source paths so the caller can EXCLUDE them from state
|
||||
// recording. On resume the map was rebuilt empty, so every failure was lost and
|
||||
// the failed file was silently marked ingested. This proves the reconstructed
|
||||
// map (built with stagedRelPath, the same key writeStaged uses) recovers it.
|
||||
describe("#1802 C4 — resume failure mapping (behavioral)", () => {
|
||||
let dir: string;
|
||||
let cpHome: string;
|
||||
|
||||
beforeEach(() => {
|
||||
dir = fs.mkdtempSync(path.join(os.tmpdir(), "gstack-1802c4-"));
|
||||
cpHome = fs.mkdtempSync(path.join(os.tmpdir(), "gstack-1802c4-fail-"));
|
||||
});
|
||||
afterEach(() => {
|
||||
for (const d of [dir, cpHome]) {
|
||||
try { fs.rmSync(d, { recursive: true, force: true }); } catch { /* best-effort */ }
|
||||
}
|
||||
});
|
||||
|
||||
test("stagedRelPath matches the writeStaged key format", () => {
|
||||
expect(stagedRelPath("my-slug")).toBe("my-slug.md");
|
||||
expect(stagedRelPath("nested/slug")).toBe("nested/slug.md");
|
||||
});
|
||||
|
||||
test("reconstructed map maps the failure back to its source; empty map loses it", () => {
|
||||
const failuresPath = path.join(cpHome, "sync-failures.jsonl");
|
||||
// gbrain records the failure keyed by the staging-relative path.
|
||||
fs.writeFileSync(
|
||||
failuresPath,
|
||||
JSON.stringify({ path: stagedRelPath("doc-a"), error: "boom" }) + "\n",
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
// The resume-path reconstruction: built from prepared pages via stagedRelPath.
|
||||
const reconstructed = new Map<string, string>([
|
||||
[stagedRelPath("doc-a"), "/src/doc-a.json"],
|
||||
]);
|
||||
const recovered = readNewFailures(failuresPath, 0, reconstructed);
|
||||
expect(recovered.has("/src/doc-a.json")).toBe(true);
|
||||
|
||||
// The pre-fix bug: an empty map (what resume used) drops the failure, so the
|
||||
// caller would state-record /src/doc-a.json as ingested.
|
||||
const lost = readNewFailures(failuresPath, 0, new Map());
|
||||
expect(lost.size).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user