diff --git a/scripts/eval-watch.ts b/scripts/eval-watch.ts index 117d2bdb..899ec906 100644 --- a/scripts/eval-watch.ts +++ b/scripts/eval-watch.ts @@ -19,6 +19,7 @@ const STALE_THRESHOLD_SEC = 600; // 10 minutes export interface HeartbeatData { runId: string; + pid?: number; startedAt: string; currentTest: string; status: string; @@ -51,6 +52,16 @@ function readJSON(filePath: string): T | null { } } +/** Check if a process is alive (signal 0 = existence check, doesn't kill). */ +function isProcessAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + /** Format seconds as Xm Ys */ function formatDuration(sec: number): string { if (sec < 60) return `${sec}s`; @@ -127,9 +138,17 @@ if (import.meta.main) { const showTail = process.argv.includes('--tail'); const render = () => { - const heartbeat = readJSON(HEARTBEAT_PATH); + let heartbeat = readJSON(HEARTBEAT_PATH); const partial = readJSON(PARTIAL_PATH); + // Auto-clear heartbeat if the process is dead + if (heartbeat?.pid && !isProcessAlive(heartbeat.pid)) { + try { fs.unlinkSync(HEARTBEAT_PATH); } catch { /* already gone */ } + process.stdout.write('\x1B[2J\x1B[H'); + process.stdout.write(`Cleared stale heartbeat — PID ${heartbeat.pid} is no longer running.\n\n`); + heartbeat = null; + } + // Clear screen process.stdout.write('\x1B[2J\x1B[H'); process.stdout.write(renderDashboard(heartbeat, partial) + '\n'); diff --git a/test/helpers/session-runner.ts b/test/helpers/session-runner.ts index 17ed772c..6654df5f 100644 --- a/test/helpers/session-runner.ts +++ b/test/helpers/session-runner.ts @@ -216,6 +216,7 @@ export async function runSkillTest(options: { const toolDesc = `${item.name}(${truncate(JSON.stringify(item.input || {}), 60)})`; atomicWriteSync(HEARTBEAT_PATH, JSON.stringify({ runId, + pid: proc.pid, startedAt, currentTest: testName, status: 'running',