fix(sync): resume must not mark failed files as ingested (#1802 C4)

On resume, stagedPathToSource was rebuilt as an empty Map, so readNewFailures()
could not map gbrain's per-file failures back to source paths. Every failure
fell through to state recording — failed files were silently marked ingested and
never retried. Reconstruct the map from the prepared pages via a shared
stagedRelPath() helper (single source of truth with writeStaged, so the keys
can never drift). Exports stagedRelPath + readNewFailures for a behavioral test
proving the reconstructed map recovers the failure the empty map dropped.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-06-03 07:21:13 -07:00
parent 68e452c0ed
commit 661ba50169
2 changed files with 70 additions and 3 deletions
@@ -36,6 +36,7 @@ import {
decideResume,
} from "../bin/gstack-gbrain-sync";
import { checkOwnedStagingDir, STAGING_MARKER } from "../lib/staging-guard";
import { stagedRelPath, readNewFailures } from "../bin/gstack-memory-ingest";
const ROOT = path.resolve(import.meta.dir, "..");
const DEFAULT_MS = 35 * 60 * 1000;
@@ -407,3 +408,51 @@ describe("#1802 C3 — import-timeout preserve (static invariant)", () => {
);
});
});
// ── #1802 C4: resume must not mark failed files as ingested ─────────────────
// readNewFailures() maps gbrain's per-file failures (keyed by staging-relative
// path) back to source paths so the caller can EXCLUDE them from state
// recording. On resume the map was rebuilt empty, so every failure was lost and
// the failed file was silently marked ingested. This proves the reconstructed
// map (built with stagedRelPath, the same key writeStaged uses) recovers it.
describe("#1802 C4 — resume failure mapping (behavioral)", () => {
let dir: string;
let cpHome: string;
beforeEach(() => {
dir = fs.mkdtempSync(path.join(os.tmpdir(), "gstack-1802c4-"));
cpHome = fs.mkdtempSync(path.join(os.tmpdir(), "gstack-1802c4-fail-"));
});
afterEach(() => {
for (const d of [dir, cpHome]) {
try { fs.rmSync(d, { recursive: true, force: true }); } catch { /* best-effort */ }
}
});
test("stagedRelPath matches the writeStaged key format", () => {
expect(stagedRelPath("my-slug")).toBe("my-slug.md");
expect(stagedRelPath("nested/slug")).toBe("nested/slug.md");
});
test("reconstructed map maps the failure back to its source; empty map loses it", () => {
const failuresPath = path.join(cpHome, "sync-failures.jsonl");
// gbrain records the failure keyed by the staging-relative path.
fs.writeFileSync(
failuresPath,
JSON.stringify({ path: stagedRelPath("doc-a"), error: "boom" }) + "\n",
"utf-8",
);
// The resume-path reconstruction: built from prepared pages via stagedRelPath.
const reconstructed = new Map<string, string>([
[stagedRelPath("doc-a"), "/src/doc-a.json"],
]);
const recovered = readNewFailures(failuresPath, 0, reconstructed);
expect(recovered.has("/src/doc-a.json")).toBe(true);
// The pre-fix bug: an empty map (what resume used) drops the failure, so the
// caller would state-record /src/doc-a.json as ingested.
const lost = readNewFailures(failuresPath, 0, new Map());
expect(lost.size).toBe(0);
});
});