mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-23 10:10:03 +02:00
fix(sync): fail-closed staging-dir ownership guard — prevent rm -rf of repo (#1802)
Adopts community fix #1827 by @diazMelgarejo (cyre). New lib/staging-guard.ts exports checkOwnedStagingDir(), the single fail-closed predicate for 'safe to recurse-delete or resume into', wired at cleanupStagingDir() (the deletion chokepoint), decideResume(), the ingest entry point, and makeStagingDir() (mints the .gstack-staging marker). Fixes #1802. Co-Authored-By: cyre <diazMelgarejo@users.noreply.github.com> Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+21
-13
@@ -41,6 +41,7 @@ import { ensureSourceRegistered, sourcePageCount, parseSourcesList } from "../li
|
||||
import { detectAutopilot, decideSourceRemove, decideCodeSync } from "../lib/gbrain-guards";
|
||||
import { localEngineStatus, type LocalEngineStatus } from "../lib/gbrain-local-status";
|
||||
import { buildGbrainEnv, spawnGbrain, execGbrainJson, NEEDS_SHELL_ON_WINDOWS } from "../lib/gbrain-exec";
|
||||
import { checkOwnedStagingDir } from "../lib/staging-guard";
|
||||
|
||||
// ── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -160,7 +161,7 @@ export function readGbrainCheckpoint(): GbrainCheckpoint | null {
|
||||
export type ResumeVerdict =
|
||||
| { kind: "no-checkpoint" }
|
||||
| { kind: "resume"; stagingDir: string; processedIndex: number; totalFiles: number }
|
||||
| { kind: "stale-staging-missing"; stagingDir: string };
|
||||
| { kind: "stale-staging-missing"; stagingDir: string; reason?: string };
|
||||
|
||||
/**
|
||||
* Decide whether the next memory-ingest run should resume from gbrain's
|
||||
@@ -169,20 +170,20 @@ export type ResumeVerdict =
|
||||
* - checkpoint + staging ok → resume (gbrain picks up at processedIndex+1)
|
||||
* - checkpoint + staging gone → warn, fall through to fresh restage
|
||||
*/
|
||||
export function decideResume(): ResumeVerdict {
|
||||
export function decideResume(gstackHome: string = GSTACK_HOME): ResumeVerdict {
|
||||
const cp = readGbrainCheckpoint();
|
||||
if (!cp || !cp.dir) return { kind: "no-checkpoint" };
|
||||
const stagingDir = cp.dir;
|
||||
if (!existsSync(stagingDir)) {
|
||||
return { kind: "stale-staging-missing", stagingDir };
|
||||
}
|
||||
// Treat "non-empty" as the safe-to-resume signal. statSync on a missing
|
||||
// file throws; we already handled missing above so this is dir-level shape.
|
||||
try {
|
||||
const st = statSync(stagingDir);
|
||||
if (!st.isDirectory()) return { kind: "stale-staging-missing", stagingDir };
|
||||
} catch {
|
||||
return { kind: "stale-staging-missing", stagingDir };
|
||||
// #1802: only resume into a path we can PROVE is a gstack-minted staging dir.
|
||||
// A poisoned checkpoint (dir = repo root, written when an autopilot import was
|
||||
// SIGTERM'd while CWD was the repo) would otherwise be adopted as the staging
|
||||
// dir and later recursively deleted by cleanupStagingDir(). Fail-closed: any
|
||||
// unprovable path restages from scratch (cost: one re-stage; never data loss).
|
||||
// Pure decision: return the verdict (with reason) and let the caller log,
|
||||
// so we don't double-log the same event from here and the call site.
|
||||
const verdict = checkOwnedStagingDir(stagingDir, gstackHome);
|
||||
if (!verdict.ok) {
|
||||
return { kind: "stale-staging-missing", stagingDir, reason: verdict.reason };
|
||||
}
|
||||
return {
|
||||
kind: "resume",
|
||||
@@ -953,8 +954,15 @@ function runMemoryIngest(args: CliArgs): StageResult {
|
||||
);
|
||||
childEnv.GSTACK_INGEST_RESUME_DIR = resume.stagingDir;
|
||||
} else if (resume.kind === "stale-staging-missing") {
|
||||
// The reason distinguishes "actually gone" (disk cleanup / reboot) from
|
||||
// "refused as unowned" (#1802 poison: the path may still exist on disk).
|
||||
// Logging "gone" for a refused poison path misdirects incident diagnosis.
|
||||
const why = resume.reason
|
||||
? `staging dir not usable: ${resume.reason}`
|
||||
: `staging dir ${resume.stagingDir} gone`;
|
||||
console.error(
|
||||
`[sync:memory] previous checkpoint stale (staging dir ${resume.stagingDir} gone), restaging from scratch`,
|
||||
`[sync:memory] previous checkpoint stale (${why}), restaging from scratch. ` +
|
||||
`Remove ~/.gbrain/import-checkpoint.json to silence.`,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -65,6 +65,7 @@ import {
|
||||
withErrorContext,
|
||||
} from "../lib/gstack-memory-helpers";
|
||||
import { execGbrainText, spawnGbrainAsync } from "../lib/gbrain-exec";
|
||||
import { checkOwnedStagingDir, STAGING_MARKER } from "../lib/staging-guard";
|
||||
|
||||
// ── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -1198,6 +1199,9 @@ function preparePages(
|
||||
function makeStagingDir(): string {
|
||||
const dir = join(GSTACK_HOME, `.staging-ingest-${process.pid}-${Date.now()}`);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
// Mint the ownership marker (#1802) so cleanupStagingDir() and decideResume()
|
||||
// can prove this dir was created by us before any recursive delete or resume.
|
||||
writeFileSync(join(dir, STAGING_MARKER), `${process.pid}\n${Date.now()}\n`, "utf-8");
|
||||
return dir;
|
||||
}
|
||||
|
||||
@@ -1259,6 +1263,16 @@ function isRemoteHttpMcpMode(): boolean {
|
||||
* cleanup failure.
|
||||
*/
|
||||
function cleanupStagingDir(dir: string): void {
|
||||
// #1802 deletion chokepoint: never recurse-delete a path we cannot PROVE we
|
||||
// own. A poisoned resume could otherwise route the repo root here.
|
||||
const verdict = checkOwnedStagingDir(dir, GSTACK_HOME);
|
||||
if (!verdict.ok) {
|
||||
console.error(
|
||||
`[gbrain] staging cleanup REFUSED: "${dir}" is not an owned staging dir ` +
|
||||
`(${verdict.reason}). Skipping rm -rf to prevent data loss (#1802).`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
} catch {
|
||||
@@ -1515,10 +1529,20 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
|
||||
// tells it where to resume.
|
||||
const remoteHttpMode = isRemoteHttpMcpMode();
|
||||
const resumeDir = process.env.GSTACK_INGEST_RESUME_DIR;
|
||||
// #1802 second entry point: this binary is runnable directly, so it must not
|
||||
// trust GSTACK_INGEST_RESUME_DIR just because it exists — a stale/poisoned env
|
||||
// could make us `gbrain import` (and later clean up) an arbitrary directory.
|
||||
// Prove ownership here too, independently of the orchestrator's decideResume.
|
||||
const resuming = !remoteHttpMode
|
||||
&& typeof resumeDir === "string"
|
||||
&& resumeDir.length > 0
|
||||
&& existsSync(resumeDir);
|
||||
&& existsSync(resumeDir)
|
||||
&& checkOwnedStagingDir(resumeDir, GSTACK_HOME).ok;
|
||||
if (!remoteHttpMode && resumeDir && resumeDir.length > 0 && !resuming) {
|
||||
console.error(
|
||||
`[memory-ingest] ignoring GSTACK_INGEST_RESUME_DIR="${resumeDir}" — not a proven staging dir (#1802); staging fresh.`,
|
||||
);
|
||||
}
|
||||
const stagingDir = resuming
|
||||
? resumeDir!
|
||||
: remoteHttpMode
|
||||
|
||||
Reference in New Issue
Block a user