diff --git a/bin/gstack-detach b/bin/gstack-detach new file mode 100755 index 000000000..e2976f55e --- /dev/null +++ b/bin/gstack-detach @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# gstack-detach — run a long-running command in its OWN session (a fresh process +# group with no controlling terminal) so a SIGTERM aimed at the launching shell's +# process group can't reach it. +# +# Why this exists: when an AGENT/harness launches a 30-60 min eval as a background +# task, the harness sends SIGTERM ("polite quit") to that task's process group on +# turn boundaries, monitor stops, or interruptions — killing the run mid-flight +# (observed: `script "test:gate" was terminated by signal SIGTERM`). Detaching into +# a new session escapes that group signal. Humans running evals foreground in their +# own terminal don't need this (Ctrl-C is intended); this is for agent-run jobs. +# +# Usage: gstack-detach -- [args...] +# (the `--` is optional but recommended for clarity) +# Output: prints `PID LOG ` and returns immediately. Poll the logfile; +# the command keeps running independently of this shell. +# Secrets: inherited from the environment ONLY. NEVER pass an API key in argv +# (it would show in `ps`). Export it before calling gstack-detach. +set -euo pipefail + +LOG="${1:?usage: gstack-detach -- }"; shift +[ "${1:-}" = "--" ] && shift +[ "$#" -ge 1 ] || { echo "gstack-detach: no command given" >&2; exit 2; } +mkdir -p "$(dirname "$LOG")" 2>/dev/null || true + +# Preferred path: python3 creates the new session (portable; macOS has no setsid) +# and, on macOS, wraps the command in `caffeinate -i` so idle-sleep can't kill a +# long run — a second silent killer for 30-60 min jobs. +if command -v python3 >/dev/null 2>&1; then + GSTACK_DETACH_LOG="$LOG" exec python3 - "$@" <<'PY' +import os, sys, shutil, subprocess +os.setsid() # new session => new process group, no controlling terminal +log = os.environ["GSTACK_DETACH_LOG"] +cmd = sys.argv[1:] +if shutil.which("caffeinate"): # macOS: block idle-sleep for the run + cmd = ["caffeinate", "-i", *cmd] +f = open(log, "ab", buffering=0) +p = subprocess.Popen(cmd, stdout=f, stderr=subprocess.STDOUT, stdin=subprocess.DEVNULL) +print(f"PID {p.pid} LOG {log}") +PY +fi + +# Linux without python3: real setsid. +if command -v setsid >/dev/null 2>&1; then + setsid sh -c 'exec "$@" >>"$0" 2>&1' "$LOG" "$@" & + echo "PID $! LOG $LOG"; disown 2>/dev/null || true; exit 0 +fi + +# Last resort: nohup detaches from SIGHUP (not a group SIGTERM, but better than +# nothing on a minimal box). +nohup sh -c 'exec "$@" >>"$0" 2>&1' "$LOG" "$@" >/dev/null 2>&1 & +echo "PID $! LOG $LOG"; disown 2>/dev/null || true diff --git a/test/gstack-detach.test.ts b/test/gstack-detach.test.ts new file mode 100644 index 000000000..7b1ac137b --- /dev/null +++ b/test/gstack-detach.test.ts @@ -0,0 +1,70 @@ +/** + * gstack-detach — the SIGTERM-survival guard. + * + * Proves the wrapper runs its command in a DIFFERENT process group than the + * caller (so a group SIGTERM from the harness can't reach it) and that the + * command outlives the launching shell (returns immediately, completes later). + * This is the regression guard that keeps the eval-killer dead. + */ +import { describe, test, expect } from 'bun:test'; +import { spawnSync } from 'child_process'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; + +const ROOT = path.resolve(import.meta.dir, '..'); +const DETACH = path.join(ROOT, 'bin', 'gstack-detach'); + +function ownPgid(): string { + const r = spawnSync('ps', ['-o', 'pgid=', '-p', String(process.pid)], { encoding: 'utf-8' }); + return (r.stdout || '').trim(); +} + +describe('gstack-detach', () => { + test('returns immediately and the command keeps running detached', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-detach-')); + const log = path.join(dir, 'run.log'); + const marker = path.join(dir, 'marker'); + const pgidFile = path.join(dir, 'child.pgid'); + try { + const started = Date.now(); + // Child records its own pgid, sleeps past the launcher's return, then writes + // a marker — proving it ran to completion independently of this shell. + const cmd = `ps -o pgid= -p $$ | tr -d ' ' > '${pgidFile}'; sleep 2; echo ok > '${marker}'`; + const r = spawnSync(DETACH, [log, '--', 'bash', '-c', cmd], { encoding: 'utf-8', timeout: 10000 }); + const elapsed = Date.now() - started; + + expect(r.status).toBe(0); + expect(r.stdout).toMatch(/PID \d+ {2}LOG /); + // Non-blocking: the launcher returns well before the child's 2s sleep ends. + expect(elapsed).toBeLessThan(1500); + + // Poll for the marker — the detached child finishes after the launcher exited. + let survived = false; + const deadline = Date.now() + 6000; + while (Date.now() < deadline) { + if (fs.existsSync(marker)) { survived = true; break; } + spawnSync('sleep', ['0.2']); + } + expect(survived).toBe(true); + + // Detached: the child's process group differs from ours, so a group SIGTERM + // aimed at this process can't reach it. + const childPgid = fs.readFileSync(pgidFile, 'utf-8').trim(); + expect(childPgid).not.toBe(''); + expect(childPgid).not.toBe(ownPgid()); + } finally { + fs.rmSync(dir, { recursive: true, force: true }); + } + }, 15000); + + test('rejects missing command (exit 2)', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-detach-')); + try { + const r = spawnSync(DETACH, [path.join(dir, 'x.log')], { encoding: 'utf-8' }); + expect(r.status).toBe(2); + } finally { + fs.rmSync(dir, { recursive: true, force: true }); + } + }); +});