mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-17 07:10:12 +02:00
fix(sync): resume must not mark failed files as ingested (#1802 C4)
On resume, stagedPathToSource was rebuilt as an empty Map, so readNewFailures() could not map gbrain's per-file failures back to source paths. Every failure fell through to state recording — failed files were silently marked ingested and never retried. Reconstruct the map from the prepared pages via a shared stagedRelPath() helper (single source of truth with writeStaged, so the keys can never drift). Exports stagedRelPath + readNewFailures for a behavioral test proving the reconstructed map recovers the failure the empty map dropped. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -908,13 +908,23 @@ interface StagingResult {
|
||||
* Filename = `${slug}.md`. mkdir is recursive. Existing files overwrite.
|
||||
* Errors per-file are collected; the whole batch is best-effort.
|
||||
*/
|
||||
/**
|
||||
* Staging-relative path for a prepared page's slug. Single source of truth so
|
||||
* writeStaged() (which mints the map) and the resume-path reconstruction (#1802
|
||||
* C4) compute identical keys — if they diverge, readNewFailures() silently stops
|
||||
* mapping gbrain's failures back to sources and failed files get marked ingested.
|
||||
*/
|
||||
export function stagedRelPath(slug: string): string {
|
||||
return `${slug}.md`;
|
||||
}
|
||||
|
||||
function writeStaged(prepared: PreparedPage[], stagingDir: string): StagingResult {
|
||||
mkdirSync(stagingDir, { recursive: true });
|
||||
const stagedPathToSource = new Map<string, string>();
|
||||
const errors: Array<{ slug: string; error: string }> = [];
|
||||
let written = 0;
|
||||
for (const p of prepared) {
|
||||
const relPath = `${p.slug}.md`;
|
||||
const relPath = stagedRelPath(p.slug);
|
||||
const absPath = join(stagingDir, relPath);
|
||||
try {
|
||||
mkdirSync(dirname(absPath), { recursive: true });
|
||||
@@ -979,7 +989,7 @@ function parseImportJson(stdout: string): ImportJsonResult | null {
|
||||
* staging-dir-relative filename gbrain saw (e.g. "transcripts/foo.md").
|
||||
* stagedPathToSource maps that back to the original source file.
|
||||
*/
|
||||
function readNewFailures(
|
||||
export function readNewFailures(
|
||||
syncFailuresPath: string,
|
||||
preImportOffset: number,
|
||||
stagedPathToSource: Map<string, string>,
|
||||
@@ -1572,7 +1582,15 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
|
||||
`[memory-ingest] resuming previous staging dir ${stagingDir} (skipping prepare phase)`,
|
||||
);
|
||||
}
|
||||
staging = { staging_dir: stagingDir, written: prep.prepared.length, errors: [], stagedPathToSource: new Map() };
|
||||
// #1802 C4: reconstruct stagedPathToSource from the prepared pages so
|
||||
// readNewFailures() can still map gbrain's per-file failures back to
|
||||
// sources on resume. An empty map made every failed file fall through to
|
||||
// state-recording — i.e. silently marked ingested despite failing.
|
||||
const stagedPathToSource = new Map<string, string>();
|
||||
for (const p of prep.prepared) {
|
||||
stagedPathToSource.set(stagedRelPath(p.slug), p.source_path);
|
||||
}
|
||||
staging = { staging_dir: stagingDir, written: prep.prepared.length, errors: [], stagedPathToSource };
|
||||
} else {
|
||||
staging = writeStaged(prep.prepared, stagingDir);
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@ import {
|
||||
decideResume,
|
||||
} from "../bin/gstack-gbrain-sync";
|
||||
import { checkOwnedStagingDir, STAGING_MARKER } from "../lib/staging-guard";
|
||||
import { stagedRelPath, readNewFailures } from "../bin/gstack-memory-ingest";
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, "..");
|
||||
const DEFAULT_MS = 35 * 60 * 1000;
|
||||
@@ -407,3 +408,51 @@ describe("#1802 C3 — import-timeout preserve (static invariant)", () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ── #1802 C4: resume must not mark failed files as ingested ─────────────────
|
||||
// readNewFailures() maps gbrain's per-file failures (keyed by staging-relative
|
||||
// path) back to source paths so the caller can EXCLUDE them from state
|
||||
// recording. On resume the map was rebuilt empty, so every failure was lost and
|
||||
// the failed file was silently marked ingested. This proves the reconstructed
|
||||
// map (built with stagedRelPath, the same key writeStaged uses) recovers it.
|
||||
describe("#1802 C4 — resume failure mapping (behavioral)", () => {
|
||||
let dir: string;
|
||||
let cpHome: string;
|
||||
|
||||
beforeEach(() => {
|
||||
dir = fs.mkdtempSync(path.join(os.tmpdir(), "gstack-1802c4-"));
|
||||
cpHome = fs.mkdtempSync(path.join(os.tmpdir(), "gstack-1802c4-fail-"));
|
||||
});
|
||||
afterEach(() => {
|
||||
for (const d of [dir, cpHome]) {
|
||||
try { fs.rmSync(d, { recursive: true, force: true }); } catch { /* best-effort */ }
|
||||
}
|
||||
});
|
||||
|
||||
test("stagedRelPath matches the writeStaged key format", () => {
|
||||
expect(stagedRelPath("my-slug")).toBe("my-slug.md");
|
||||
expect(stagedRelPath("nested/slug")).toBe("nested/slug.md");
|
||||
});
|
||||
|
||||
test("reconstructed map maps the failure back to its source; empty map loses it", () => {
|
||||
const failuresPath = path.join(cpHome, "sync-failures.jsonl");
|
||||
// gbrain records the failure keyed by the staging-relative path.
|
||||
fs.writeFileSync(
|
||||
failuresPath,
|
||||
JSON.stringify({ path: stagedRelPath("doc-a"), error: "boom" }) + "\n",
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
// The resume-path reconstruction: built from prepared pages via stagedRelPath.
|
||||
const reconstructed = new Map<string, string>([
|
||||
[stagedRelPath("doc-a"), "/src/doc-a.json"],
|
||||
]);
|
||||
const recovered = readNewFailures(failuresPath, 0, reconstructed);
|
||||
expect(recovered.has("/src/doc-a.json")).toBe(true);
|
||||
|
||||
// The pre-fix bug: an empty map (what resume used) drops the failure, so the
|
||||
// caller would state-record /src/doc-a.json as ingested.
|
||||
const lost = readNewFailures(failuresPath, 0, new Map());
|
||||
expect(lost.size).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user