feat(design): introduce design daemon — multi-board persistent server

Adds design/src/daemon.ts: a Bun.serve daemon that hosts many boards
under /boards/<id>/ instead of one server per `$D compare --serve` call.
Spawned by daemon-client (next commit); for now wired only via tests.

Endpoint table:
  GET  /health                       liveness + version + counts (unauth)
  GET  /                             index of recent boards
  POST /api/boards                   publish; daemon derives sourceDir
                                     from realpath(html). body sourceDir
                                     IGNORED (Codex trust-boundary fix).
  POST /shutdown                     graceful; refuses if active boards
                                     exist (Codex data-loss fix)
  GET  /boards/<id>                  301 → /boards/<id>/ (trailing slash
                                     is load-bearing — relative URLs in
                                     board JS resolve against pathname)
  GET  /boards/<id>/                 render board HTML
  GET  /boards/<id>/api/progress     state machine status (no idle reset)
  POST /boards/<id>/api/feedback     submit/regen; writes feedback.json
                                     or feedback-pending.json with
                                     boardId + publishedAt augmented in
  POST /boards/<id>/api/reload       swap HTML; per-board allowedDir
                                     guard rejects traversal, directories,
                                     out-of-allowed-dir symlinks

Lifecycle:
- 24h idle timeout (DESIGN_DAEMON_IDLE_MS for tests).
- Idle with active boards extends 1h up to 4x, then force-shuts (Codex).
- LRU cap 50 boards; evicts done before non-done; 503 when 50 non-done.
- Per-board async mutex serializes feedback POST vs reload POST.
- SIGTERM/SIGINT/uncaughtException → graceful shutdown, state file unlink.
- Stdout: DAEMON_STARTED port=<N> (the line the client parses).

Shared utilities live in design/src/daemon-state.ts: atomic state-file
write/read (mode 0o600), fs.openSync('wx') lock, isProcessAlive, cmdline
identity verification (/proc on Linux, ps on macOS), CMDLINE_MARKER
constant. Modeled on browse/src/cli.ts lock + spawn patterns.

design/test/daemon.test.ts: 30 tests, all green. Covers every endpoint,
both error paths and happy paths, cross-board feedback isolation, the
trailing-slash redirect, the directory-not-file reload rejection, LRU
preferring done over non-done, /shutdown refusal with active boards,
all path-traversal guards. Uses the exported fetchHandler in-process
(no spawn) so the suite runs in ~70ms.

design/test/daemon-tests-fixtures.ts: shared helpers — req() builder,
tmp-dir helpers, daemon reset, and a spawnDaemonForTest() helper used
by the next commit's discovery tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-05-25 15:24:05 -07:00
parent 51a8d26be2
commit 14f3ab570c
4 changed files with 1398 additions and 0 deletions
+185
View File
@@ -0,0 +1,185 @@
/**
* Pure utilities for design-daemon discovery.
*
* Shared between daemon.ts (writes/removes the state file) and
* daemon-client.ts (reads state, decides spawn-vs-attach). Mirrors
* browse/src/cli.ts:109-315 — same atomic-write + fs.openSync 'wx' lock
* pattern, with an added cmdline-based identity check to guard against
* SIGTERM hitting a reused PID (Codex finding on the daemon plan).
*/
import { execFileSync } from "child_process";
import fs from "fs";
import os from "os";
import path from "path";
export interface DaemonState {
pid: number;
port: number;
startedAt: string; // ISO 8601
version: string;
serverPath: string;
cmdlineMarker: string;
}
// String we grep for in the spawned daemon's cmdline to confirm a pid is
// ours before sending any signal. Must appear in argv at spawn time.
export const CMDLINE_MARKER = "gstack-design-daemon";
export function resolveStateFilePath(): string {
try {
const root = execFileSync("git", ["rev-parse", "--show-toplevel"], {
encoding: "utf8",
stdio: ["ignore", "pipe", "ignore"],
}).trim();
if (root) return path.join(root, ".gstack", "design.json");
} catch {
// not in a git repo — fall through
}
return path.join(process.cwd(), ".gstack", "design.json");
}
export function resolveLockFilePath(stateFile: string = resolveStateFilePath()): string {
return `${stateFile}.lock`;
}
export function resolveDaemonLogPath(): string {
return path.join(os.homedir(), ".gstack", "design-daemon.log");
}
export function resolveStartupLogPath(): string {
return path.join(os.homedir(), ".gstack", "design-daemon-startup.log");
}
export function readStateFile(stateFile: string = resolveStateFilePath()): DaemonState | null {
try {
return JSON.parse(fs.readFileSync(stateFile, "utf-8")) as DaemonState;
} catch {
return null;
}
}
export function writeStateFile(
state: DaemonState,
stateFile: string = resolveStateFilePath(),
): void {
fs.mkdirSync(path.dirname(stateFile), { recursive: true });
const tmp = `${stateFile}.tmp.${process.pid}.${Math.random().toString(36).slice(2)}`;
fs.writeFileSync(tmp, JSON.stringify(state, null, 2), { mode: 0o600 });
fs.renameSync(tmp, stateFile);
}
export function removeStateFile(stateFile: string = resolveStateFilePath()): void {
try {
fs.unlinkSync(stateFile);
} catch {
// already gone
}
}
export interface HealthOk {
ok: true;
version: string;
uptime: number;
boards: number;
activeBoards: number;
}
export async function healthCheck(
port: number,
timeoutMs: number = 2000,
): Promise<HealthOk | null> {
try {
const resp = await fetch(`http://127.0.0.1:${port}/health`, {
signal: AbortSignal.timeout(timeoutMs),
});
if (!resp.ok) return null;
const body = (await resp.json()) as Partial<HealthOk> | null;
if (body && body.ok === true && typeof body.version === "string") {
return body as HealthOk;
}
return null;
} catch {
return null;
}
}
export function isProcessAlive(pid: number): boolean {
if (!pid || pid <= 0) return false;
try {
process.kill(pid, 0);
return true;
} catch (e: unknown) {
// EPERM means it exists, we just can't signal it. ESRCH means it's gone.
const code = (e as NodeJS.ErrnoException | undefined)?.code;
return code === "EPERM";
}
}
/**
* Read the cmdline of a running process. Returns "" on any error.
* Linux: /proc/<pid>/cmdline (NUL-separated argv). macOS: `ps -p PID -o command=`.
*/
export function readCmdline(pid: number): string {
if (!isProcessAlive(pid)) return "";
try {
if (process.platform === "linux") {
const raw = fs.readFileSync(`/proc/${pid}/cmdline`, "utf-8");
return raw.replace(/\0/g, " ").trim();
}
if (process.platform === "darwin") {
return execFileSync("ps", ["-p", String(pid), "-o", "command="], {
encoding: "utf8",
stdio: ["ignore", "pipe", "ignore"],
}).trim();
}
return "";
} catch {
return "";
}
}
/**
* True only when the process at `pid` has `marker` in its cmdline. Used to
* avoid SIGTERMing an unrelated process that happens to have inherited a
* PID from a stale state file (the Codex PID-reuse concern). On systems
* where readCmdline is unsupported (or fails), this returns false — safer
* to skip the signal than to risk killing the wrong process.
*/
export function verifyIdentity(pid: number, marker: string): boolean {
if (!marker) return false;
return readCmdline(pid).includes(marker);
}
/**
* Acquire an exclusive lock on `lockPath`. Returns a release function, or
* null if held by another live process. Stale locks (PID dead) are reclaimed
* once; if reclaim also fails the caller waits and retries via state re-read.
*/
export function acquireLock(lockPath: string): (() => void) | null {
try {
fs.mkdirSync(path.dirname(lockPath), { recursive: true });
// 'wx' = create exclusive, fail if exists. Atomic check-and-create.
const fd = fs.openSync(lockPath, "wx");
fs.writeSync(fd, `${process.pid}\n`);
fs.closeSync(fd);
return () => {
try {
fs.unlinkSync(lockPath);
} catch {
// already gone
}
};
} catch {
// Held — check if holder is alive
try {
const holderPid = parseInt(fs.readFileSync(lockPath, "utf-8").trim(), 10);
if (holderPid && isProcessAlive(holderPid)) return null;
// Stale, reclaim
fs.unlinkSync(lockPath);
return acquireLock(lockPath);
} catch {
return null;
}
}
}