mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-09 03:23:55 +02:00
* fix(sync): fail-closed staging-dir ownership guard — prevent rm -rf of repo (#1802) Adopts community fix #1827 by @diazMelgarejo (cyre). New lib/staging-guard.ts exports checkOwnedStagingDir(), the single fail-closed predicate for 'safe to recurse-delete or resume into', wired at cleanupStagingDir() (the deletion chokepoint), decideResume(), the ingest entry point, and makeStagingDir() (mints the .gstack-staging marker). Fixes #1802. Co-Authored-By: cyre <diazMelgarejo@users.noreply.github.com> Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(sync): don't route the remote-http persistent transcript dir through cleanup (#1802) The ingest finally ran cleanupStagingDir() unconditionally, but in remote-http mode stagingDir is the PERSISTENT transcript dir (~/.gstack/transcripts/) that gstack-brain-sync push must consume. The remote-http branch documents the intent to skip cleanup, but a finally runs on its return. Gate the call on !remoteHttpMode so the ownership guard only ever sees .staging-ingest-* dirs. Pre-gate this dir was deleted outright (broken artifacts handoff); post-#1827 it produced a false 'prevent data loss' warning every sync. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(sync): preserve staging dir on internal import timeout (#1802 C3) The import-timeout branch printed 'checkpoint preserved' but the finally then deleted the staging dir: the SIGTERM forwarder's preserve branch only runs when the PARENT is signalled, and an internal runGbrainImport timeout kills just the child and returns normally. So #1611 resume-after-timeout never actually worked. Mirror the forwarder in the timeout branch: set preserveStaging only when gbrain checkpointed against this dir (finally then skips cleanup); otherwise clean up and tell the user it restages instead of falsely promising a resume. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(sync): resume must not mark failed files as ingested (#1802 C4) On resume, stagedPathToSource was rebuilt as an empty Map, so readNewFailures() could not map gbrain's per-file failures back to source paths. Every failure fell through to state recording — failed files were silently marked ingested and never retried. Reconstruct the map from the prepared pages via a shared stagedRelPath() helper (single source of truth with writeStaged, so the keys can never drift). Exports stagedRelPath + readNewFailures for a behavioral test proving the reconstructed map recovers the failure the empty map dropped. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * harden(sync): close staging-guard TOCTOU + fail hard on marker write (#1802 C5) checkOwnedStagingDir() now returns the realpath-resolved canonicalPath on a pass, and cleanupStagingDir() rmSync's that instead of the raw input — closing the gap where the input is a symlink swapped between the ownership check and the delete. makeStagingDir() tears down the partial dir and rethrows if the marker write fails, so a marker-less dir (which the guard would refuse forever) can never leak. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * chore: v1.56.1.0 — staging-dir ownership guard + resume-correctness fixes (#1802) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * ci: grant the eval report job issues:write so PR comment upsert stops 401ing Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --------- Co-authored-by: cyre <diazMelgarejo@users.noreply.github.com> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+21
-13
@@ -41,6 +41,7 @@ import { ensureSourceRegistered, sourcePageCount, parseSourcesList } from "../li
|
||||
import { detectAutopilot, decideSourceRemove, decideCodeSync } from "../lib/gbrain-guards";
|
||||
import { localEngineStatus, type LocalEngineStatus } from "../lib/gbrain-local-status";
|
||||
import { buildGbrainEnv, spawnGbrain, execGbrainJson, NEEDS_SHELL_ON_WINDOWS } from "../lib/gbrain-exec";
|
||||
import { checkOwnedStagingDir } from "../lib/staging-guard";
|
||||
|
||||
// ── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -160,7 +161,7 @@ export function readGbrainCheckpoint(): GbrainCheckpoint | null {
|
||||
export type ResumeVerdict =
|
||||
| { kind: "no-checkpoint" }
|
||||
| { kind: "resume"; stagingDir: string; processedIndex: number; totalFiles: number }
|
||||
| { kind: "stale-staging-missing"; stagingDir: string };
|
||||
| { kind: "stale-staging-missing"; stagingDir: string; reason?: string };
|
||||
|
||||
/**
|
||||
* Decide whether the next memory-ingest run should resume from gbrain's
|
||||
@@ -169,20 +170,20 @@ export type ResumeVerdict =
|
||||
* - checkpoint + staging ok → resume (gbrain picks up at processedIndex+1)
|
||||
* - checkpoint + staging gone → warn, fall through to fresh restage
|
||||
*/
|
||||
export function decideResume(): ResumeVerdict {
|
||||
export function decideResume(gstackHome: string = GSTACK_HOME): ResumeVerdict {
|
||||
const cp = readGbrainCheckpoint();
|
||||
if (!cp || !cp.dir) return { kind: "no-checkpoint" };
|
||||
const stagingDir = cp.dir;
|
||||
if (!existsSync(stagingDir)) {
|
||||
return { kind: "stale-staging-missing", stagingDir };
|
||||
}
|
||||
// Treat "non-empty" as the safe-to-resume signal. statSync on a missing
|
||||
// file throws; we already handled missing above so this is dir-level shape.
|
||||
try {
|
||||
const st = statSync(stagingDir);
|
||||
if (!st.isDirectory()) return { kind: "stale-staging-missing", stagingDir };
|
||||
} catch {
|
||||
return { kind: "stale-staging-missing", stagingDir };
|
||||
// #1802: only resume into a path we can PROVE is a gstack-minted staging dir.
|
||||
// A poisoned checkpoint (dir = repo root, written when an autopilot import was
|
||||
// SIGTERM'd while CWD was the repo) would otherwise be adopted as the staging
|
||||
// dir and later recursively deleted by cleanupStagingDir(). Fail-closed: any
|
||||
// unprovable path restages from scratch (cost: one re-stage; never data loss).
|
||||
// Pure decision: return the verdict (with reason) and let the caller log,
|
||||
// so we don't double-log the same event from here and the call site.
|
||||
const verdict = checkOwnedStagingDir(stagingDir, gstackHome);
|
||||
if (!verdict.ok) {
|
||||
return { kind: "stale-staging-missing", stagingDir, reason: verdict.reason };
|
||||
}
|
||||
return {
|
||||
kind: "resume",
|
||||
@@ -953,8 +954,15 @@ function runMemoryIngest(args: CliArgs): StageResult {
|
||||
);
|
||||
childEnv.GSTACK_INGEST_RESUME_DIR = resume.stagingDir;
|
||||
} else if (resume.kind === "stale-staging-missing") {
|
||||
// The reason distinguishes "actually gone" (disk cleanup / reboot) from
|
||||
// "refused as unowned" (#1802 poison: the path may still exist on disk).
|
||||
// Logging "gone" for a refused poison path misdirects incident diagnosis.
|
||||
const why = resume.reason
|
||||
? `staging dir not usable: ${resume.reason}`
|
||||
: `staging dir ${resume.stagingDir} gone`;
|
||||
console.error(
|
||||
`[sync:memory] previous checkpoint stale (staging dir ${resume.stagingDir} gone), restaging from scratch`,
|
||||
`[sync:memory] previous checkpoint stale (${why}), restaging from scratch. ` +
|
||||
`Remove ~/.gbrain/import-checkpoint.json to silence.`,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user