fix(sync): fail-closed staging-dir ownership guard — prevent rm -rf of repo (#1802)

Adopts community fix #1827 by @diazMelgarejo (cyre). New lib/staging-guard.ts
exports checkOwnedStagingDir(), the single fail-closed predicate for 'safe to
recurse-delete or resume into', wired at cleanupStagingDir() (the deletion
chokepoint), decideResume(), the ingest entry point, and makeStagingDir()
(mints the .gstack-staging marker).

Fixes #1802.

Co-Authored-By: cyre <diazMelgarejo@users.noreply.github.com>
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-06-03 07:18:19 -07:00
parent c43c850cae
commit ef606117a9
4 changed files with 284 additions and 18 deletions
+21 -13
View File
@@ -41,6 +41,7 @@ import { ensureSourceRegistered, sourcePageCount, parseSourcesList } from "../li
import { detectAutopilot, decideSourceRemove, decideCodeSync } from "../lib/gbrain-guards";
import { localEngineStatus, type LocalEngineStatus } from "../lib/gbrain-local-status";
import { buildGbrainEnv, spawnGbrain, execGbrainJson, NEEDS_SHELL_ON_WINDOWS } from "../lib/gbrain-exec";
import { checkOwnedStagingDir } from "../lib/staging-guard";
// ── Types ──────────────────────────────────────────────────────────────────
@@ -160,7 +161,7 @@ export function readGbrainCheckpoint(): GbrainCheckpoint | null {
export type ResumeVerdict =
| { kind: "no-checkpoint" }
| { kind: "resume"; stagingDir: string; processedIndex: number; totalFiles: number }
| { kind: "stale-staging-missing"; stagingDir: string };
| { kind: "stale-staging-missing"; stagingDir: string; reason?: string };
/**
* Decide whether the next memory-ingest run should resume from gbrain's
@@ -169,20 +170,20 @@ export type ResumeVerdict =
* - checkpoint + staging ok → resume (gbrain picks up at processedIndex+1)
* - checkpoint + staging gone → warn, fall through to fresh restage
*/
export function decideResume(): ResumeVerdict {
export function decideResume(gstackHome: string = GSTACK_HOME): ResumeVerdict {
const cp = readGbrainCheckpoint();
if (!cp || !cp.dir) return { kind: "no-checkpoint" };
const stagingDir = cp.dir;
if (!existsSync(stagingDir)) {
return { kind: "stale-staging-missing", stagingDir };
}
// Treat "non-empty" as the safe-to-resume signal. statSync on a missing
// file throws; we already handled missing above so this is dir-level shape.
try {
const st = statSync(stagingDir);
if (!st.isDirectory()) return { kind: "stale-staging-missing", stagingDir };
} catch {
return { kind: "stale-staging-missing", stagingDir };
// #1802: only resume into a path we can PROVE is a gstack-minted staging dir.
// A poisoned checkpoint (dir = repo root, written when an autopilot import was
// SIGTERM'd while CWD was the repo) would otherwise be adopted as the staging
// dir and later recursively deleted by cleanupStagingDir(). Fail-closed: any
// unprovable path restages from scratch (cost: one re-stage; never data loss).
// Pure decision: return the verdict (with reason) and let the caller log,
// so we don't double-log the same event from here and the call site.
const verdict = checkOwnedStagingDir(stagingDir, gstackHome);
if (!verdict.ok) {
return { kind: "stale-staging-missing", stagingDir, reason: verdict.reason };
}
return {
kind: "resume",
@@ -953,8 +954,15 @@ function runMemoryIngest(args: CliArgs): StageResult {
);
childEnv.GSTACK_INGEST_RESUME_DIR = resume.stagingDir;
} else if (resume.kind === "stale-staging-missing") {
// The reason distinguishes "actually gone" (disk cleanup / reboot) from
// "refused as unowned" (#1802 poison: the path may still exist on disk).
// Logging "gone" for a refused poison path misdirects incident diagnosis.
const why = resume.reason
? `staging dir not usable: ${resume.reason}`
: `staging dir ${resume.stagingDir} gone`;
console.error(
`[sync:memory] previous checkpoint stale (staging dir ${resume.stagingDir} gone), restaging from scratch`,
`[sync:memory] previous checkpoint stale (${why}), restaging from scratch. ` +
`Remove ~/.gbrain/import-checkpoint.json to silence.`,
);
}
+25 -1
View File
@@ -65,6 +65,7 @@ import {
withErrorContext,
} from "../lib/gstack-memory-helpers";
import { execGbrainText, spawnGbrainAsync } from "../lib/gbrain-exec";
import { checkOwnedStagingDir, STAGING_MARKER } from "../lib/staging-guard";
// ── Types ──────────────────────────────────────────────────────────────────
@@ -1198,6 +1199,9 @@ function preparePages(
function makeStagingDir(): string {
const dir = join(GSTACK_HOME, `.staging-ingest-${process.pid}-${Date.now()}`);
mkdirSync(dir, { recursive: true });
// Mint the ownership marker (#1802) so cleanupStagingDir() and decideResume()
// can prove this dir was created by us before any recursive delete or resume.
writeFileSync(join(dir, STAGING_MARKER), `${process.pid}\n${Date.now()}\n`, "utf-8");
return dir;
}
@@ -1259,6 +1263,16 @@ function isRemoteHttpMcpMode(): boolean {
* cleanup failure.
*/
function cleanupStagingDir(dir: string): void {
// #1802 deletion chokepoint: never recurse-delete a path we cannot PROVE we
// own. A poisoned resume could otherwise route the repo root here.
const verdict = checkOwnedStagingDir(dir, GSTACK_HOME);
if (!verdict.ok) {
console.error(
`[gbrain] staging cleanup REFUSED: "${dir}" is not an owned staging dir ` +
`(${verdict.reason}). Skipping rm -rf to prevent data loss (#1802).`,
);
return;
}
try {
rmSync(dir, { recursive: true, force: true });
} catch {
@@ -1515,10 +1529,20 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
// tells it where to resume.
const remoteHttpMode = isRemoteHttpMcpMode();
const resumeDir = process.env.GSTACK_INGEST_RESUME_DIR;
// #1802 second entry point: this binary is runnable directly, so it must not
// trust GSTACK_INGEST_RESUME_DIR just because it exists — a stale/poisoned env
// could make us `gbrain import` (and later clean up) an arbitrary directory.
// Prove ownership here too, independently of the orchestrator's decideResume.
const resuming = !remoteHttpMode
&& typeof resumeDir === "string"
&& resumeDir.length > 0
&& existsSync(resumeDir);
&& existsSync(resumeDir)
&& checkOwnedStagingDir(resumeDir, GSTACK_HOME).ok;
if (!remoteHttpMode && resumeDir && resumeDir.length > 0 && !resuming) {
console.error(
`[memory-ingest] ignoring GSTACK_INGEST_RESUME_DIR="${resumeDir}" — not a proven staging dir (#1802); staging fresh.`,
);
}
const stagingDir = resuming
? resumeDir!
: remoteHttpMode