fix: Windows browse — health-check-first ensureServer, detached startServer, Windows process mgmt

Three compounding bugs made browse completely broken on Windows:

1. Bun.spawn().unref() doesn't truly detach on Windows — server dies when
   CLI exits. Fix: use Node's child_process.spawn with { detached: true }
   via a launcher script. Credit: PR #191 by @fqueiro for the approach.

2. process.kill(pid, 0) is broken in Bun's compiled binary on Windows —
   ensureServer() never reaches the health check. Fix: restructure to
   health-check-first (HTTP is definitive proof on all platforms). Extract
   isServerHealthy() helper for DRY (4 call sites).

3. Windows process management: isProcessAlive() falls back to tasklist,
   killServer() uses taskkill /T /F (kills process tree including Chromium),
   cleanupLegacyState() skips on Windows (no /tmp, no ps).

Also: hard-fail on Windows if server-node.mjs is missing instead of
silently falling back to the known-broken Bun path.

Fixes #342.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-03-23 11:09:18 -07:00
parent f4bbfaa5bd
commit ee1c0c109e
+105 -39
View File
@@ -76,6 +76,13 @@ export function resolveNodeServerScript(
const NODE_SERVER_SCRIPT = IS_WINDOWS ? resolveNodeServerScript() : null;
// On Windows, hard-fail if server-node.mjs is missing — the Bun path is known broken.
if (IS_WINDOWS && !NODE_SERVER_SCRIPT) {
throw new Error(
'server-node.mjs not found. Run `bun run build` to generate the Windows server bundle.'
);
}
interface ServerState {
pid: number;
port: number;
@@ -96,6 +103,19 @@ function readState(): ServerState | null {
}
function isProcessAlive(pid: number): boolean {
if (IS_WINDOWS) {
// Bun's compiled binary can't signal Windows PIDs (always throws ESRCH).
// Use tasklist as a fallback. Only for one-shot calls — too slow for polling loops.
try {
const result = Bun.spawnSync(
['tasklist', '/FI', `PID eq ${pid}`, '/NH', '/FO', 'CSV'],
{ stdout: 'pipe', stderr: 'pipe', timeout: 3000 }
);
return result.stdout.toString().includes(`"${pid}"`);
} catch {
return false;
}
}
try {
process.kill(pid, 0);
return true;
@@ -104,10 +124,42 @@ function isProcessAlive(pid: number): boolean {
}
}
/**
* HTTP health check — definitive proof the server is alive and responsive.
* Used in all polling loops instead of isProcessAlive() (which is slow on Windows).
*/
export async function isServerHealthy(port: number): Promise<boolean> {
try {
const resp = await fetch(`http://127.0.0.1:${port}/health`, {
signal: AbortSignal.timeout(2000),
});
if (!resp.ok) return false;
const health = await resp.json() as any;
return health.status === 'healthy';
} catch {
return false;
}
}
// ─── Process Management ─────────────────────────────────────────
async function killServer(pid: number): Promise<void> {
if (!isProcessAlive(pid)) return;
if (IS_WINDOWS) {
// taskkill /T /F kills the process tree (Node + Chromium)
try {
Bun.spawnSync(
['taskkill', '/PID', String(pid), '/T', '/F'],
{ stdout: 'pipe', stderr: 'pipe', timeout: 5000 }
);
} catch {}
const deadline = Date.now() + 2000;
while (Date.now() < deadline && isProcessAlive(pid)) {
await Bun.sleep(100);
}
return;
}
try { process.kill(pid, 'SIGTERM'); } catch { return; }
// Wait up to 2s for graceful shutdown
@@ -127,6 +179,10 @@ async function killServer(pid: number): Promise<void> {
* Verifies PID ownership before sending signals.
*/
function cleanupLegacyState(): void {
// No legacy state on Windows — /tmp and `ps` don't exist, and gstack
// never ran on Windows before the Node.js fallback was added.
if (IS_WINDOWS) return;
try {
const files = fs.readdirSync('/tmp').filter(f => f.startsWith('browse-server') && f.endsWith('.json'));
for (const file of files) {
@@ -164,44 +220,65 @@ function cleanupLegacyState(): void {
async function startServer(): Promise<ServerState> {
ensureStateDir(config);
// Clean up stale state file
// Clean up stale state file and error log
try { fs.unlinkSync(config.stateFile); } catch {}
try { fs.unlinkSync(path.join(config.stateDir, 'browse-startup-error.log')); } catch {}
// Start server as detached background process.
// On Windows, Bun can't launch/connect to Playwright's Chromium (oven-sh/bun#4253, #9911).
// Fall back to running the server under Node.js with Bun API polyfills.
const useNode = IS_WINDOWS && NODE_SERVER_SCRIPT;
const serverCmd = useNode
? ['node', NODE_SERVER_SCRIPT]
: ['bun', 'run', SERVER_SCRIPT];
const proc = Bun.spawn(serverCmd, {
stdio: ['ignore', 'pipe', 'pipe'],
env: { ...process.env, BROWSE_STATE_FILE: config.stateFile },
});
let proc: any = null;
// Don't hold the CLI open
proc.unref();
if (IS_WINDOWS && NODE_SERVER_SCRIPT) {
// Windows: Bun.spawn() + proc.unref() doesn't truly detach on Windows —
// when the CLI exits, the server dies with it. Use Node's child_process.spawn
// with { detached: true } instead, which is the gold standard for Windows
// process independence. Credit: PR #191 by @fqueiro.
const launcherCode =
`const{spawn}=require('child_process');` +
`spawn(process.execPath,[${JSON.stringify(NODE_SERVER_SCRIPT)}],` +
`{detached:true,stdio:'ignore',env:Object.assign({},process.env,` +
`{BROWSE_STATE_FILE:${JSON.stringify(config.stateFile)}})}).unref()`;
Bun.spawnSync(['node', '-e', launcherCode], { stdio: 'ignore' });
} else {
// macOS/Linux: Bun.spawn + unref works correctly
proc = Bun.spawn(['bun', 'run', SERVER_SCRIPT], {
stdio: ['ignore', 'pipe', 'pipe'],
env: { ...process.env, BROWSE_STATE_FILE: config.stateFile },
});
proc.unref();
}
// Wait for state file to appear
// Wait for server to become healthy.
// Use HTTP health check (not isProcessAlive) — it's fast (~instant ECONNREFUSED)
// and works reliably on all platforms including Windows.
const start = Date.now();
while (Date.now() - start < MAX_START_WAIT) {
const state = readState();
if (state && isProcessAlive(state.pid)) {
if (state && await isServerHealthy(state.port)) {
return state;
}
await Bun.sleep(100);
}
// If we get here, server didn't start in time
// Try to read stderr for error message
const stderr = proc.stderr;
if (stderr) {
const reader = stderr.getReader();
// Server didn't start in time — try to get error details
if (proc?.stderr) {
// macOS/Linux: read stderr from the spawned process
const reader = proc.stderr.getReader();
const { value } = await reader.read();
if (value) {
const errText = new TextDecoder().decode(value);
throw new Error(`Server failed to start:\n${errText}`);
}
} else {
// Windows: check startup error log (server writes errors to disk since
// stderr is unavailable due to stdio: 'ignore' for detachment)
const errorLogPath = path.join(config.stateDir, 'browse-startup-error.log');
try {
const errorLog = fs.readFileSync(errorLogPath, 'utf-8').trim();
if (errorLog) {
throw new Error(`Server failed to start:\n${errorLog}`);
}
} catch (e: any) {
if (e.code !== 'ENOENT') throw e;
}
}
throw new Error(`Server failed to start within ${MAX_START_WAIT / 1000}s`);
}
@@ -237,7 +314,10 @@ function acquireServerLock(): (() => void) | null {
async function ensureServer(): Promise<ServerState> {
const state = readState();
if (state && isProcessAlive(state.pid)) {
// Health-check-first: HTTP is definitive proof the server is alive and responsive.
// This replaces the PID-gated approach which breaks on Windows (Bun's process.kill
// always throws ESRCH for Windows PIDs in compiled binaries).
if (state && await isServerHealthy(state.port)) {
// Check for binary version mismatch (auto-restart on update)
const currentVersion = readVersionHash();
if (currentVersion && state.binaryVersion && currentVersion !== state.binaryVersion) {
@@ -245,21 +325,7 @@ async function ensureServer(): Promise<ServerState> {
await killServer(state.pid);
return startServer();
}
// Server appears alive — do a health check
try {
const resp = await fetch(`http://127.0.0.1:${state.port}/health`, {
signal: AbortSignal.timeout(2000),
});
if (resp.ok) {
const health = await resp.json() as any;
if (health.status === 'healthy') {
return state;
}
}
} catch {
// Health check failed — server is dead or unhealthy
}
return state;
}
// Acquire lock to prevent concurrent restart races (TOCTOU)
@@ -270,7 +336,7 @@ async function ensureServer(): Promise<ServerState> {
const start = Date.now();
while (Date.now() - start < MAX_START_WAIT) {
const freshState = readState();
if (freshState && isProcessAlive(freshState.pid)) return freshState;
if (freshState && await isServerHealthy(freshState.port)) return freshState;
await Bun.sleep(200);
}
throw new Error('Timed out waiting for another instance to start the server');
@@ -279,7 +345,7 @@ async function ensureServer(): Promise<ServerState> {
try {
// Re-read state under lock in case another process just started the server
const freshState = readState();
if (freshState && isProcessAlive(freshState.pid)) {
if (freshState && await isServerHealthy(freshState.port)) {
return freshState;
}