From 661ba50169eed40ee52de73587e657d138702c57 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 3 Jun 2026 07:21:13 -0700 Subject: [PATCH] fix(sync): resume must not mark failed files as ingested (#1802 C4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On resume, stagedPathToSource was rebuilt as an empty Map, so readNewFailures() could not map gbrain's per-file failures back to source paths. Every failure fell through to state recording — failed files were silently marked ingested and never retried. Reconstruct the map from the prepared pages via a shared stagedRelPath() helper (single source of truth with writeStaged, so the keys can never drift). Exports stagedRelPath + readNewFailures for a behavioral test proving the reconstructed map recovers the failure the empty map dropped. Co-Authored-By: Claude Opus 4.8 (1M context) --- bin/gstack-memory-ingest.ts | 24 +++++++-- ...regression-1611-gbrain-sync-resume.test.ts | 49 +++++++++++++++++++ 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/bin/gstack-memory-ingest.ts b/bin/gstack-memory-ingest.ts index eb3c2f2a0..32b3b038e 100644 --- a/bin/gstack-memory-ingest.ts +++ b/bin/gstack-memory-ingest.ts @@ -908,13 +908,23 @@ interface StagingResult { * Filename = `${slug}.md`. mkdir is recursive. Existing files overwrite. * Errors per-file are collected; the whole batch is best-effort. */ +/** + * Staging-relative path for a prepared page's slug. Single source of truth so + * writeStaged() (which mints the map) and the resume-path reconstruction (#1802 + * C4) compute identical keys — if they diverge, readNewFailures() silently stops + * mapping gbrain's failures back to sources and failed files get marked ingested. + */ +export function stagedRelPath(slug: string): string { + return `${slug}.md`; +} + function writeStaged(prepared: PreparedPage[], stagingDir: string): StagingResult { mkdirSync(stagingDir, { recursive: true }); const stagedPathToSource = new Map(); const errors: Array<{ slug: string; error: string }> = []; let written = 0; for (const p of prepared) { - const relPath = `${p.slug}.md`; + const relPath = stagedRelPath(p.slug); const absPath = join(stagingDir, relPath); try { mkdirSync(dirname(absPath), { recursive: true }); @@ -979,7 +989,7 @@ function parseImportJson(stdout: string): ImportJsonResult | null { * staging-dir-relative filename gbrain saw (e.g. "transcripts/foo.md"). * stagedPathToSource maps that back to the original source file. */ -function readNewFailures( +export function readNewFailures( syncFailuresPath: string, preImportOffset: number, stagedPathToSource: Map, @@ -1572,7 +1582,15 @@ async function ingestPass(args: CliArgs): Promise { `[memory-ingest] resuming previous staging dir ${stagingDir} (skipping prepare phase)`, ); } - staging = { staging_dir: stagingDir, written: prep.prepared.length, errors: [], stagedPathToSource: new Map() }; + // #1802 C4: reconstruct stagedPathToSource from the prepared pages so + // readNewFailures() can still map gbrain's per-file failures back to + // sources on resume. An empty map made every failed file fall through to + // state-recording — i.e. silently marked ingested despite failing. + const stagedPathToSource = new Map(); + for (const p of prep.prepared) { + stagedPathToSource.set(stagedRelPath(p.slug), p.source_path); + } + staging = { staging_dir: stagingDir, written: prep.prepared.length, errors: [], stagedPathToSource }; } else { staging = writeStaged(prep.prepared, stagingDir); } diff --git a/test/regression-1611-gbrain-sync-resume.test.ts b/test/regression-1611-gbrain-sync-resume.test.ts index c53825f70..fc1c11344 100644 --- a/test/regression-1611-gbrain-sync-resume.test.ts +++ b/test/regression-1611-gbrain-sync-resume.test.ts @@ -36,6 +36,7 @@ import { decideResume, } from "../bin/gstack-gbrain-sync"; import { checkOwnedStagingDir, STAGING_MARKER } from "../lib/staging-guard"; +import { stagedRelPath, readNewFailures } from "../bin/gstack-memory-ingest"; const ROOT = path.resolve(import.meta.dir, ".."); const DEFAULT_MS = 35 * 60 * 1000; @@ -407,3 +408,51 @@ describe("#1802 C3 — import-timeout preserve (static invariant)", () => { ); }); }); + +// ── #1802 C4: resume must not mark failed files as ingested ───────────────── +// readNewFailures() maps gbrain's per-file failures (keyed by staging-relative +// path) back to source paths so the caller can EXCLUDE them from state +// recording. On resume the map was rebuilt empty, so every failure was lost and +// the failed file was silently marked ingested. This proves the reconstructed +// map (built with stagedRelPath, the same key writeStaged uses) recovers it. +describe("#1802 C4 — resume failure mapping (behavioral)", () => { + let dir: string; + let cpHome: string; + + beforeEach(() => { + dir = fs.mkdtempSync(path.join(os.tmpdir(), "gstack-1802c4-")); + cpHome = fs.mkdtempSync(path.join(os.tmpdir(), "gstack-1802c4-fail-")); + }); + afterEach(() => { + for (const d of [dir, cpHome]) { + try { fs.rmSync(d, { recursive: true, force: true }); } catch { /* best-effort */ } + } + }); + + test("stagedRelPath matches the writeStaged key format", () => { + expect(stagedRelPath("my-slug")).toBe("my-slug.md"); + expect(stagedRelPath("nested/slug")).toBe("nested/slug.md"); + }); + + test("reconstructed map maps the failure back to its source; empty map loses it", () => { + const failuresPath = path.join(cpHome, "sync-failures.jsonl"); + // gbrain records the failure keyed by the staging-relative path. + fs.writeFileSync( + failuresPath, + JSON.stringify({ path: stagedRelPath("doc-a"), error: "boom" }) + "\n", + "utf-8", + ); + + // The resume-path reconstruction: built from prepared pages via stagedRelPath. + const reconstructed = new Map([ + [stagedRelPath("doc-a"), "/src/doc-a.json"], + ]); + const recovered = readNewFailures(failuresPath, 0, reconstructed); + expect(recovered.has("/src/doc-a.json")).toBe(true); + + // The pre-fix bug: an empty map (what resume used) drops the failure, so the + // caller would state-record /src/doc-a.json as ingested. + const lost = readNewFailures(failuresPath, 0, new Map()); + expect(lost.size).toBe(0); + }); +});