From f8a0dc08884a411ca4a7ef0f311fff5b01c22a47 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Thu, 11 Jun 2026 23:06:33 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20eval:bg*=20scripts=20=E2=80=94=20detach?= =?UTF-8?q?ed=20eval=20runs=20for=20agents?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent-facing convenience scripts that launch the eval suites through gstack-detach so a harness SIGTERM can't kill a long run. eval:bg (diff-based), eval:bg:all, eval:bg:gate, eval:bg:periodic — each returns immediately and streams to /tmp/gstack-evals.log for polling. The plain test:evals / test:e2e scripts stay foreground for humans. Co-Authored-By: Claude Fable 5 --- package.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/package.json b/package.json index 068a9272c..e4bd636ed 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,10 @@ "skill:check": "bun run scripts/skill-check.ts", "dev:skill": "bun run scripts/dev-skill.ts", "start": "bun run browse/src/server.ts", + "eval:bg": "bin/gstack-detach /tmp/gstack-evals.log -- bun run test:evals", + "eval:bg:all": "bin/gstack-detach /tmp/gstack-evals.log -- bun run test:evals:all", + "eval:bg:gate": "bin/gstack-detach /tmp/gstack-evals.log -- bun run test:gate", + "eval:bg:periodic": "bin/gstack-detach /tmp/gstack-evals.log -- bun run test:periodic", "eval:list": "bun run scripts/eval-list.ts", "eval:compare": "bun run scripts/eval-compare.ts", "eval:summary": "bun run scripts/eval-summary.ts",