fix(sync): resume must not mark failed files as ingested (#1802 C4)

On resume, stagedPathToSource was rebuilt as an empty Map, so readNewFailures()
could not map gbrain's per-file failures back to source paths. Every failure
fell through to state recording — failed files were silently marked ingested and
never retried. Reconstruct the map from the prepared pages via a shared
stagedRelPath() helper (single source of truth with writeStaged, so the keys
can never drift). Exports stagedRelPath + readNewFailures for a behavioral test
proving the reconstructed map recovers the failure the empty map dropped.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-06-03 07:21:13 -07:00
parent 68e452c0ed
commit 661ba50169
2 changed files with 70 additions and 3 deletions
+21 -3
View File
@@ -908,13 +908,23 @@ interface StagingResult {
* Filename = `${slug}.md`. mkdir is recursive. Existing files overwrite.
* Errors per-file are collected; the whole batch is best-effort.
*/
/**
* Staging-relative path for a prepared page's slug. Single source of truth so
* writeStaged() (which mints the map) and the resume-path reconstruction (#1802
* C4) compute identical keys — if they diverge, readNewFailures() silently stops
* mapping gbrain's failures back to sources and failed files get marked ingested.
*/
export function stagedRelPath(slug: string): string {
return `${slug}.md`;
}
function writeStaged(prepared: PreparedPage[], stagingDir: string): StagingResult {
mkdirSync(stagingDir, { recursive: true });
const stagedPathToSource = new Map<string, string>();
const errors: Array<{ slug: string; error: string }> = [];
let written = 0;
for (const p of prepared) {
const relPath = `${p.slug}.md`;
const relPath = stagedRelPath(p.slug);
const absPath = join(stagingDir, relPath);
try {
mkdirSync(dirname(absPath), { recursive: true });
@@ -979,7 +989,7 @@ function parseImportJson(stdout: string): ImportJsonResult | null {
* staging-dir-relative filename gbrain saw (e.g. "transcripts/foo.md").
* stagedPathToSource maps that back to the original source file.
*/
function readNewFailures(
export function readNewFailures(
syncFailuresPath: string,
preImportOffset: number,
stagedPathToSource: Map<string, string>,
@@ -1572,7 +1582,15 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
`[memory-ingest] resuming previous staging dir ${stagingDir} (skipping prepare phase)`,
);
}
staging = { staging_dir: stagingDir, written: prep.prepared.length, errors: [], stagedPathToSource: new Map() };
// #1802 C4: reconstruct stagedPathToSource from the prepared pages so
// readNewFailures() can still map gbrain's per-file failures back to
// sources on resume. An empty map made every failed file fall through to
// state-recording — i.e. silently marked ingested despite failing.
const stagedPathToSource = new Map<string, string>();
for (const p of prep.prepared) {
stagedPathToSource.set(stagedRelPath(p.slug), p.source_path);
}
staging = { staging_dir: stagingDir, written: prep.prepared.length, errors: [], stagedPathToSource };
} else {
staging = writeStaged(prep.prepared, stagingDir);
}