mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-18 15:50:11 +02:00
feat(gbrain-sync): self-heal stale autopilot lock (dead-pid)
detectAutopilot treated a lock FILE as proof of life, so a crashed gbrain daemon left a stale lock that wedged every sync forever (observed: a dead pid refused --full indefinitely). Now read the holder pid (bare or JSON body) and check liveness via signal-0: ESRCH=dead → ignore the stale signal and keep checking; EPERM=alive (other user) → active. A stale lock never masks a live autopilot process. Pure decision function — does not delete the file; the caller may clean it. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -38,6 +38,45 @@ describe("detectAutopilot", () => {
|
||||
expect(r.active).toBe(false);
|
||||
expect(r.signal).toBeNull();
|
||||
});
|
||||
|
||||
// Stale-lock self-heal: a crashed daemon's lock (dead holder pid) must NOT
|
||||
// wedge syncs forever (observed: dead pid refused --full indefinitely).
|
||||
const DEAD_PID = 2999999; // above macOS pid_max; vanishingly unlikely elsewhere
|
||||
|
||||
test("ignores a STALE lock whose holder pid is dead", () => {
|
||||
const tmp = fs.mkdtempSync(join(os.tmpdir(), "ap-"));
|
||||
const lock = join(tmp, "autopilot.lock");
|
||||
fs.writeFileSync(lock, `${DEAD_PID}\n`);
|
||||
const r = detectAutopilot(process.env, { lockPaths: [lock], processRunning: () => false });
|
||||
expect(r.active).toBe(false);
|
||||
expect(r.signal).toBeNull();
|
||||
});
|
||||
|
||||
test("treats a FRESH lock (live holder pid) as active", () => {
|
||||
const tmp = fs.mkdtempSync(join(os.tmpdir(), "ap-"));
|
||||
const lock = join(tmp, "autopilot.lock");
|
||||
fs.writeFileSync(lock, String(process.pid)); // the test runner itself is alive
|
||||
const r = detectAutopilot(process.env, { lockPaths: [lock], processRunning: () => false });
|
||||
expect(r.active).toBe(true);
|
||||
expect(r.signal).toContain(`pid ${process.pid}`);
|
||||
});
|
||||
|
||||
test("parses a JSON lock body and ignores it when the pid is dead", () => {
|
||||
const tmp = fs.mkdtempSync(join(os.tmpdir(), "ap-"));
|
||||
const lock = join(tmp, "autopilot.lock");
|
||||
fs.writeFileSync(lock, JSON.stringify({ pid: DEAD_PID, started_at: "x" }));
|
||||
const r = detectAutopilot(process.env, { lockPaths: [lock], processRunning: () => false });
|
||||
expect(r.active).toBe(false);
|
||||
});
|
||||
|
||||
test("a stale lock does not mask a live autopilot process", () => {
|
||||
const tmp = fs.mkdtempSync(join(os.tmpdir(), "ap-"));
|
||||
const lock = join(tmp, "autopilot.lock");
|
||||
fs.writeFileSync(lock, `${DEAD_PID}`);
|
||||
const r = detectAutopilot(process.env, { lockPaths: [lock], processRunning: () => true });
|
||||
expect(r.active).toBe(true);
|
||||
expect(r.signal).toBe("process:gbrain autopilot");
|
||||
});
|
||||
});
|
||||
|
||||
// ── #1734 remove safety (E7: fail closed on user-managed without keep-storage) ─
|
||||
|
||||
Reference in New Issue
Block a user