mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-04 09:08:09 +02:00
14f3ab570c
Adds design/src/daemon.ts: a Bun.serve daemon that hosts many boards
under /boards/<id>/ instead of one server per `$D compare --serve` call.
Spawned by daemon-client (next commit); for now wired only via tests.
Endpoint table:
GET /health liveness + version + counts (unauth)
GET / index of recent boards
POST /api/boards publish; daemon derives sourceDir
from realpath(html). body sourceDir
IGNORED (Codex trust-boundary fix).
POST /shutdown graceful; refuses if active boards
exist (Codex data-loss fix)
GET /boards/<id> 301 → /boards/<id>/ (trailing slash
is load-bearing — relative URLs in
board JS resolve against pathname)
GET /boards/<id>/ render board HTML
GET /boards/<id>/api/progress state machine status (no idle reset)
POST /boards/<id>/api/feedback submit/regen; writes feedback.json
or feedback-pending.json with
boardId + publishedAt augmented in
POST /boards/<id>/api/reload swap HTML; per-board allowedDir
guard rejects traversal, directories,
out-of-allowed-dir symlinks
Lifecycle:
- 24h idle timeout (DESIGN_DAEMON_IDLE_MS for tests).
- Idle with active boards extends 1h up to 4x, then force-shuts (Codex).
- LRU cap 50 boards; evicts done before non-done; 503 when 50 non-done.
- Per-board async mutex serializes feedback POST vs reload POST.
- SIGTERM/SIGINT/uncaughtException → graceful shutdown, state file unlink.
- Stdout: DAEMON_STARTED port=<N> (the line the client parses).
Shared utilities live in design/src/daemon-state.ts: atomic state-file
write/read (mode 0o600), fs.openSync('wx') lock, isProcessAlive, cmdline
identity verification (/proc on Linux, ps on macOS), CMDLINE_MARKER
constant. Modeled on browse/src/cli.ts lock + spawn patterns.
design/test/daemon.test.ts: 30 tests, all green. Covers every endpoint,
both error paths and happy paths, cross-board feedback isolation, the
trailing-slash redirect, the directory-not-file reload rejection, LRU
preferring done over non-done, /shutdown refusal with active boards,
all path-traversal guards. Uses the exported fetchHandler in-process
(no spawn) so the suite runs in ~70ms.
design/test/daemon-tests-fixtures.ts: shared helpers — req() builder,
tmp-dir helpers, daemon reset, and a spawnDaemonForTest() helper used
by the next commit's discovery tests.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
186 lines
5.4 KiB
TypeScript
186 lines
5.4 KiB
TypeScript
/**
|
|
* Pure utilities for design-daemon discovery.
|
|
*
|
|
* Shared between daemon.ts (writes/removes the state file) and
|
|
* daemon-client.ts (reads state, decides spawn-vs-attach). Mirrors
|
|
* browse/src/cli.ts:109-315 — same atomic-write + fs.openSync 'wx' lock
|
|
* pattern, with an added cmdline-based identity check to guard against
|
|
* SIGTERM hitting a reused PID (Codex finding on the daemon plan).
|
|
*/
|
|
|
|
import { execFileSync } from "child_process";
|
|
import fs from "fs";
|
|
import os from "os";
|
|
import path from "path";
|
|
|
|
export interface DaemonState {
|
|
pid: number;
|
|
port: number;
|
|
startedAt: string; // ISO 8601
|
|
version: string;
|
|
serverPath: string;
|
|
cmdlineMarker: string;
|
|
}
|
|
|
|
// String we grep for in the spawned daemon's cmdline to confirm a pid is
|
|
// ours before sending any signal. Must appear in argv at spawn time.
|
|
export const CMDLINE_MARKER = "gstack-design-daemon";
|
|
|
|
export function resolveStateFilePath(): string {
|
|
try {
|
|
const root = execFileSync("git", ["rev-parse", "--show-toplevel"], {
|
|
encoding: "utf8",
|
|
stdio: ["ignore", "pipe", "ignore"],
|
|
}).trim();
|
|
if (root) return path.join(root, ".gstack", "design.json");
|
|
} catch {
|
|
// not in a git repo — fall through
|
|
}
|
|
return path.join(process.cwd(), ".gstack", "design.json");
|
|
}
|
|
|
|
export function resolveLockFilePath(stateFile: string = resolveStateFilePath()): string {
|
|
return `${stateFile}.lock`;
|
|
}
|
|
|
|
export function resolveDaemonLogPath(): string {
|
|
return path.join(os.homedir(), ".gstack", "design-daemon.log");
|
|
}
|
|
|
|
export function resolveStartupLogPath(): string {
|
|
return path.join(os.homedir(), ".gstack", "design-daemon-startup.log");
|
|
}
|
|
|
|
export function readStateFile(stateFile: string = resolveStateFilePath()): DaemonState | null {
|
|
try {
|
|
return JSON.parse(fs.readFileSync(stateFile, "utf-8")) as DaemonState;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
export function writeStateFile(
|
|
state: DaemonState,
|
|
stateFile: string = resolveStateFilePath(),
|
|
): void {
|
|
fs.mkdirSync(path.dirname(stateFile), { recursive: true });
|
|
const tmp = `${stateFile}.tmp.${process.pid}.${Math.random().toString(36).slice(2)}`;
|
|
fs.writeFileSync(tmp, JSON.stringify(state, null, 2), { mode: 0o600 });
|
|
fs.renameSync(tmp, stateFile);
|
|
}
|
|
|
|
export function removeStateFile(stateFile: string = resolveStateFilePath()): void {
|
|
try {
|
|
fs.unlinkSync(stateFile);
|
|
} catch {
|
|
// already gone
|
|
}
|
|
}
|
|
|
|
export interface HealthOk {
|
|
ok: true;
|
|
version: string;
|
|
uptime: number;
|
|
boards: number;
|
|
activeBoards: number;
|
|
}
|
|
|
|
export async function healthCheck(
|
|
port: number,
|
|
timeoutMs: number = 2000,
|
|
): Promise<HealthOk | null> {
|
|
try {
|
|
const resp = await fetch(`http://127.0.0.1:${port}/health`, {
|
|
signal: AbortSignal.timeout(timeoutMs),
|
|
});
|
|
if (!resp.ok) return null;
|
|
const body = (await resp.json()) as Partial<HealthOk> | null;
|
|
if (body && body.ok === true && typeof body.version === "string") {
|
|
return body as HealthOk;
|
|
}
|
|
return null;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
export function isProcessAlive(pid: number): boolean {
|
|
if (!pid || pid <= 0) return false;
|
|
try {
|
|
process.kill(pid, 0);
|
|
return true;
|
|
} catch (e: unknown) {
|
|
// EPERM means it exists, we just can't signal it. ESRCH means it's gone.
|
|
const code = (e as NodeJS.ErrnoException | undefined)?.code;
|
|
return code === "EPERM";
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Read the cmdline of a running process. Returns "" on any error.
|
|
* Linux: /proc/<pid>/cmdline (NUL-separated argv). macOS: `ps -p PID -o command=`.
|
|
*/
|
|
export function readCmdline(pid: number): string {
|
|
if (!isProcessAlive(pid)) return "";
|
|
try {
|
|
if (process.platform === "linux") {
|
|
const raw = fs.readFileSync(`/proc/${pid}/cmdline`, "utf-8");
|
|
return raw.replace(/\0/g, " ").trim();
|
|
}
|
|
if (process.platform === "darwin") {
|
|
return execFileSync("ps", ["-p", String(pid), "-o", "command="], {
|
|
encoding: "utf8",
|
|
stdio: ["ignore", "pipe", "ignore"],
|
|
}).trim();
|
|
}
|
|
return "";
|
|
} catch {
|
|
return "";
|
|
}
|
|
}
|
|
|
|
/**
|
|
* True only when the process at `pid` has `marker` in its cmdline. Used to
|
|
* avoid SIGTERMing an unrelated process that happens to have inherited a
|
|
* PID from a stale state file (the Codex PID-reuse concern). On systems
|
|
* where readCmdline is unsupported (or fails), this returns false — safer
|
|
* to skip the signal than to risk killing the wrong process.
|
|
*/
|
|
export function verifyIdentity(pid: number, marker: string): boolean {
|
|
if (!marker) return false;
|
|
return readCmdline(pid).includes(marker);
|
|
}
|
|
|
|
/**
|
|
* Acquire an exclusive lock on `lockPath`. Returns a release function, or
|
|
* null if held by another live process. Stale locks (PID dead) are reclaimed
|
|
* once; if reclaim also fails the caller waits and retries via state re-read.
|
|
*/
|
|
export function acquireLock(lockPath: string): (() => void) | null {
|
|
try {
|
|
fs.mkdirSync(path.dirname(lockPath), { recursive: true });
|
|
// 'wx' = create exclusive, fail if exists. Atomic check-and-create.
|
|
const fd = fs.openSync(lockPath, "wx");
|
|
fs.writeSync(fd, `${process.pid}\n`);
|
|
fs.closeSync(fd);
|
|
return () => {
|
|
try {
|
|
fs.unlinkSync(lockPath);
|
|
} catch {
|
|
// already gone
|
|
}
|
|
};
|
|
} catch {
|
|
// Held — check if holder is alive
|
|
try {
|
|
const holderPid = parseInt(fs.readFileSync(lockPath, "utf-8").trim(), 10);
|
|
if (holderPid && isProcessAlive(holderPid)) return null;
|
|
// Stale, reclaim
|
|
fs.unlinkSync(lockPath);
|
|
return acquireLock(lockPath);
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
}
|