Merge remote-tracking branch 'origin/main' into garrytan/codex-review-default-on

2026-06-17 07:10:12 +02:00 · 2026-06-09 22:29:49 -07:00
parent dd5cf5b25c 8241949357
commit 06cbb480ea
27 changed files with 1141 additions and 47 deletions
@@ -7,6 +7,7 @@ make-pdf/dist/
 bin/gstack-global-discover*
 .gstack/
 .claude/skills/
+.claude/gstack-rendered/
 .claude/scheduled_tasks.lock
 .claude/*.lock
 .agents/
@@ -212,8 +212,8 @@ from `snapshot`, or `@c` refs from `snapshot -C`. Full table:

 | Command | Description |
 |---------|-------------|
-| `js <expr>` | Run inline JavaScript expression in page context, return as string |
-| `eval <file>` | Run JS from a file (path under /tmp or cwd; same sandbox as `js`) |
+| `js <expr> [--out <file>] [--raw]` | Run inline JavaScript expression in page context, return as string. With `--out <file>` the result is written to disk instead of returned (a `data:*;base64,...` result is decoded to raw bytes unless `--raw`). `--out` makes the invocation a WRITE (needs `write` scope, never allowed over the tunnel). |
+| `eval <file> [--out <file>] [--raw]` | Run JS from a file (path under /tmp or cwd; same sandbox as `js`). `--out`/`--raw` behave as for `js`. |
 | `css <sel> <prop>` | Computed CSS value |
 | `attrs <sel\|@ref>` | Element attributes as JSON |
 | `is <prop> <sel\|@ref>` | State check: visible, hidden, enabled, disabled, checked, editable, focused |
@@ -1,5 +1,143 @@
 # Changelog

+## [1.57.9.0] - 2026-06-09
+
+## **Your gstack checkout stays clean when gbrain is installed.**
+## **Brain-aware skill blocks render to an untracked spot, never into tracked source.**
+
+Before this, finishing a Conductor or dev-workspace setup with gbrain installed
+rewrote 16 planning and review SKILL.md files in place, adding 326 lines of
+brain-aware blocks straight into tracked source. Your working tree came back dirty,
+one stray `git add` away from committing a token regression for everyone who does
+not run gbrain. Now `gen-skill-docs --out-dir` renders the brain-aware variant into
+an untracked per-workspace directory, and `bin/dev-setup` repoints the workspace's
+skill symlinks at it. The dev workspace gets the full gbrain experience (context-load
+and save-to-brain blocks live at runtime), while the tracked SKILL.md files stay
+byte-for-byte canonical. To turn the blocks on across all your projects' Claude
+sessions, `gstack-config gbrain-refresh` now renders them into your global install,
+guarded so it never mutates a symlinked or non-gstack directory.
+
+### The numbers that matter
+
+Structural facts of the change, verifiable from the diff plus `bun run gen:skill-docs`
+(zero drift) and the new behavioral test (`test/gen-skill-docs-out-dir.test.ts`).
+
+| When gbrain is installed | Before | After |
+|---|---|---|
+| Tracked SKILL.md files dirtied by dev-setup | 16 (+326 lines) | 0 |
+| Where brain-aware blocks render in a dev workspace | in-place, tracked source | `.claude/gstack-rendered/`, untracked |
+| Brain-aware blocks across other projects | re-run `./setup` or hand-edit | `gstack-config gbrain-refresh` (idempotent) |
+| "Is gbrain usable" check | per-caller JSON grep, can read stale state | `gstack-gbrain-detect --is-ok` (one live gate) |
+
+The section-path rewrite is surgical: only `~/.claude/skills/gstack/<skill>/sections/`
+references move to the render dir, so `bin/` and `docs/` references still resolve to
+the install.
+
+### What this means for you
+
+If you develop gstack with gbrain on, `git status` is clean again after setup, and
+you can stop fishing brain-block drift out of your commits. After a
+`git reset --hard` deploy of your install, re-run `gstack-config gbrain-refresh` to
+restore the machine-wide blocks (it is idempotent, and the deploy note in CLAUDE.md
+spells this out).
+
+### Itemized changes
+
+#### Added
+- `gen-skill-docs --out-dir <dir>`: render the Claude SKILL.md + sections into a
+  separate directory instead of in place, rewriting only the section-base path so
+  section reads resolve to the render. Default (no flag) output is unchanged.
+- `gstack-gbrain-detect --is-ok`: live-detection exit-code gate (0 iff gbrain is
+  usable), so setup, dev-setup, and gstack-config share one check.
+- `gstack-config gbrain-refresh` now renders brain-aware blocks into the global
+  install (`~/.claude/skills/gstack`), guarded against symlinked or non-gstack
+  targets and self-documenting about the `reset --hard` re-run cycle.
+
+#### Changed
+- `bin/dev-setup` renders the brain-aware variant into `.claude/gstack-rendered`
+  (gitignored) and repoints workspace skill symlinks at it; the worktree stays
+  canonical. `GSTACK_SKIP_GBRAIN_REGEN` is passed inline to the nested setup, never
+  exported.
+- `setup` honors `GSTACK_SKIP_GBRAIN_REGEN` (skips the in-place brain regen on dev
+  trees) and writes detection state to a PID-unique tmp so concurrent workspaces
+  cannot clobber it.
+- `scripts/dev-skill.ts` refreshes the workspace render on template change, only
+  when the render dir already exists.
+- `bin/dev-teardown` removes the untracked render.
+
+#### For contributors
+- New tests: `test/gen-skill-docs-out-dir.test.ts` (behavioral: worktree unchanged,
+  blocks rendered, section paths rewritten), `test/dev-setup-render-isolation.test.ts`
+  and `test/gbrain-refresh-install-render.test.ts` (static tripwires), plus
+  `--is-ok` coverage in `test/gbrain-detect-shape.test.ts`.
+
+## [1.57.8.0] - 2026-06-09
+
+## **`browse` is now the one Chromium on the box, for offline rendering too.**
+## **`js`/`eval --out <file>` writes a render straight to disk, so skills stop bundling their own puppeteer.**
+
+You can now turn your own local HTML or JSON into a PNG (or any bytes) on disk
+through the same headless `browse` Chromium you already run, with no second
+browser install. `js "<expr>" --out out.png` and `eval script.js --out out.png`
+write the evaluate result to a file instead of returning it. When the result is a
+base64 data URL (the shape Excalidraw exports, og-image generators, and card
+renderers hand back), `--out` decodes it to raw bytes for you; pass `--raw` to
+write the literal string. Malformed base64 errors loudly instead of writing a
+corrupt file, and missing parent directories are created. This closes the gap that
+made local-render skills each `npm i puppeteer` and download a drifting second
+Chromium.
+
+### The numbers that matter
+
+No synthetic benchmark — these are structural facts of the change, verifiable from
+the diff and a one-line smoke (`browse load-html` → `screenshot --selector` /
+`js --out`).
+
+| For a skill that rasterizes local HTML/JSON | Before | After |
+|---|---|---|
+| Chromium installs per box | 2+ (browse + each skill's own puppeteer) | 1 (shared `browse`) |
+| Getting a PNG from a render function | `evaluate` → multi-MB data URL over the CLI channel → hand-decode base64 → write | `js --out` decodes and writes server-side; only a short status crosses the channel |
+| Render-to-file primitive | none | `js`/`eval --out [--raw]` |
+
+The blessed offline path is documented in the browse skill: visual output goes
+through `screenshot --selector` (the picture never crosses the CDP wire), and bytes
+a function returns go through `js --out`.
+
+### What this means for you
+
+If you write skills that draw diagrams, cards, or og-images, point them at `browse`
+and delete the bundled Chromium. One version to pin, one daemon to manage. `--out`
+is treated as a write everywhere it matters: it needs the `write` scope, is blocked
+over the pair-agent tunnel, and is gated in watch mode, so a remote agent can never
+use it to write to your disk.
+
+### Itemized changes
+
+#### Added
+- **`js` / `eval --out <file>` render-to-file** (`browse/src/read-commands.ts`).
+  Writes the evaluate result to disk and returns a short `... result written: <path>
+  (<N> bytes)` status. A `data:<type>;base64,...` result is decoded to raw bytes
+  (case-insensitive header parse, split on the first comma, base64-charset validated
+  before decode); `--raw` forces a literal write. Parent directories are created.
+- **`--raw` flag** to bypass data-URL decoding and write the literal result string.
+- **Offline render mode docs** in the browse skill: an explicit headless, no-proxy,
+  no-Xvfb path with a worked example showing visual (`screenshot --selector`) vs
+  bytes (`js --out`), a puppeteer→browse cheatsheet row, and a "don't bundle your
+  own Chromium" note (also in CONTRIBUTING.md).
+
+#### Changed
+- **`--out` is a per-invocation WRITE capability** (`browse/src/server.ts`).
+  `js`/`eval` stay read commands, but an `--out` invocation requires the `write`
+  scope, is never dispatchable over the tunnel surface (`canDispatchOverTunnel` now
+  consults args), and counts as a mutation for watch-mode and tab-ownership gates.
+
+#### For contributors
+- New tests: `parseOutArgs`/`hasOutArg` unit coverage (`--out`/`--out=`, `--raw`,
+  repeats, missing value, ordering), `--out` render-to-file integration (large
+  string, data-URL→PNG, `--raw`, malformed-base64, outside-safe-dir, mkdir, eval
+  parity, byte-for-byte null/undefined), and tunnel-gate guards proving `--out`
+  is never tunnel-dispatchable.
+
 ## [1.57.7.0] - 2026-06-08

 ## **Every plan review now ends by telling you, in one line, whether anything is still unresolved.**
@@ -883,6 +883,12 @@ The active skill lives at `~/.claude/skills/gstack/`. After making changes:
 2. Fetch and reset in the skill directory: `cd ~/.claude/skills/gstack && git fetch origin && git reset --hard origin/main`
 3. Rebuild: `cd ~/.claude/skills/gstack && bun run build`

+**If you use gbrain:** the `git reset --hard` in step 2 reverts the brain-aware
+(`GBRAIN_CONTEXT_LOAD` / `GBRAIN_SAVE_RESULTS`) blocks that `gstack-config
+gbrain-refresh` renders into the install (those generated blocks differ from
+`main` by design). After deploying, re-run `gstack-config gbrain-refresh` to
+restore them across all your projects' Claude sessions. It's idempotent.
+
 Or copy the binaries directly:
 - `cp browse/dist/browse ~/.claude/skills/gstack/browse/dist/browse`
 - `cp design/dist/design ~/.claude/skills/gstack/design/dist/design`
@@ -106,6 +106,22 @@ bun run build
 bin/dev-teardown
 ```

+### Brain-aware blocks in a dev workspace (gbrain installed)
+
+If gbrain is installed and usable (`bin/gstack-gbrain-detect --is-ok` exits 0),
+`bin/dev-setup` keeps your tracked `SKILL.md` files canonical and renders the
+brain-aware variant (the `GBRAIN_CONTEXT_LOAD` / `GBRAIN_SAVE_RESULTS` blocks)
+into `.claude/gstack-rendered/` (gitignored, per-workspace). It then repoints the
+workspace's `SKILL.md` symlinks at that render, so your Claude sessions get the
+full gbrain experience while `git status` stays clean. Under the hood, dev-setup
+passes `GSTACK_SKIP_GBRAIN_REGEN=1` inline to the nested `./setup` (so it never
+dirties tracked source) and runs `gen:skill-docs:user --out-dir .claude/gstack-rendered`,
+which rewrites only the section-base paths to point at the render. `bin/dev-teardown`
+removes the render. To make the blocks live across your *other* projects' Claude
+sessions, run `gstack-config gbrain-refresh`, which renders them into the global
+install (`~/.claude/skills/gstack`), guarded so it never touches a symlinked or
+non-gstack directory.
+
 ## Testing & evals

 ### Setup
@@ -232,6 +248,14 @@ For template authoring best practices (natural language over bash-isms, dynamic

 To add a browse command, add it to `browse/src/commands.ts`. To add a snapshot flag, add it to `SNAPSHOT_FLAGS` in `browse/src/snapshot.ts`. Then rebuild.

+**Don't bundle puppeteer/Chromium in a skill.** `browse` is the one shared
+Chromium per box, including offline local-render workloads. A skill that needs to
+rasterize its own HTML/JSON (diagrams, cards, og-images) should route through
+`browse` — `screenshot --selector` for visual output, `load-html` + `js --out` for
+bytes a render function returns — instead of `npm i puppeteer` and downloading a
+second Chromium that drifts out of version sync. One install to pin, one daemon to
+manage.
+
 ## Jargon list (V1 writing style)

 gstack's Writing Style section (injected into every tier-≥2 skill's preamble)
@@ -326,8 +350,8 @@ If you're using [Conductor](https://conductor.build) to run multiple Claude Code

 | Hook | Script | What it does |
 |------|--------|-------------|
-| `setup` | `bin/dev-setup` | Copies `.env` from main worktree, installs deps, symlinks skills, runs `./setup` non-interactively |
-| `archive` | `bin/dev-teardown` | Removes skill symlinks, cleans up `.claude/` directory |
+| `setup` | `bin/dev-setup` | Copies `.env` from main worktree, installs deps, symlinks skills, runs `./setup` non-interactively, and (if gbrain is installed) renders brain-aware blocks into `.claude/gstack-rendered/` without dirtying tracked source |
+| `archive` | `bin/dev-teardown` | Removes skill symlinks, the `.claude/gstack-rendered/` render, and cleans up `.claude/` directory |

 When Conductor creates a new workspace, `bin/dev-setup` runs automatically. It detects the main worktree (via `git worktree list`), copies your `.env` so API keys carry over, and sets up dev mode — no manual steps needed.

@@ -917,10 +917,10 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
 | `cookies` | All cookies as JSON |
 | `css <sel> <prop>` | Computed CSS value |
 | `dialog [--clear]` | Dialog messages |
-| `eval <file>` | Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. |
+| `eval <file> [--out <file>] [--raw]` | Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. With --out <file>, the result is written to disk (base64 data URL decoded to bytes unless --raw); --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel). |
 | `inspect [selector] [--all] [--history]` | Deep CSS inspection via CDP — full rule cascade, box model, computed styles |
 | `is <prop> <sel|@ref>` | State check on element. Valid <prop> values: visible, hidden, enabled, disabled, checked, editable, focused (case-sensitive). <sel> accepts a CSS selector OR an @ref token from a prior snapshot (e.g. @e3, @c1) — refs are interchangeable with selectors anywhere a selector is expected. |
-| `js <expr>` | Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. |
+| `js <expr> [--out <file>] [--raw]` | Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. With --out <file>, the result is written to disk instead of returned (a base64 data URL is decoded to raw bytes unless --raw is given) — ideal for rasterizing local renders to PNG without serializing megabytes back through the CLI. --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel). |
 | `network [--clear]` | Network requests |
 | `perf` | Page load timings |
 | `storage  |  storage set <key> <value>` | Read both localStorage and sessionStorage as JSON. With "set <key> <value>", write to localStorage only (sessionStorage is read-only via this command — set it with `js sessionStorage.setItem(...)`). |
@@ -1 +1 @@
-1.57.7.0
+1.57.9.0
@@ -72,7 +72,48 @@ fi
 # no-op skip (no install, no decline marker). A dev workspace must never mutate
 # global settings.json. To install the hooks, run `./setup --plan-tune-hooks`
 # directly (outside dev-setup). Saved prefix/other config preferences still apply.
-"$GSTACK_LINK/setup" --plan-tune-hooks=prompt </dev/null
+#
+# GSTACK_SKIP_GBRAIN_REGEN=1 is passed INLINE (not exported) so it scopes to
+# exactly this nested setup call and can't leak into any other setup path. It
+# tells setup NOT to regenerate the gbrain :user variant into the tracked
+# worktree (that would dirty checked-in source). We render it into an untracked
+# per-workspace dir below instead.
+GSTACK_SKIP_GBRAIN_REGEN=1 "$GSTACK_LINK/setup" --plan-tune-hooks=prompt </dev/null
+
+# 7. Brain-aware (gbrain) blocks — render into an untracked workspace dir.
+#
+# The worktree's SKILL.md files stay canonical (the guard above). If gbrain is
+# installed, render the :user variant (with GBRAIN_CONTEXT_LOAD +
+# GBRAIN_SAVE_RESULTS) into .claude/gstack-rendered (gitignored, per-workspace)
+# and repoint the workspace's SKILL.md symlinks at it. gen-skill-docs --out-dir
+# also rewrites the section-base path so section reads resolve to the render, not
+# the global install. Result: this workspace gets the full gbrain experience
+# while git stays clean. Other projects pick up blocks via `gstack-config
+# gbrain-refresh` (printed below).
+GBRAIN_DETECT="$REPO_ROOT/bin/gstack-gbrain-detect"
+RENDER_DIR="$REPO_ROOT/.claude/gstack-rendered"
+if [ -x "$GBRAIN_DETECT" ] && "$GBRAIN_DETECT" --is-ok 2>/dev/null; then
+  echo ""
+  echo "gbrain detected — rendering brain-aware skills into .claude/gstack-rendered (workspace-only, untracked)..."
+  rm -rf "$RENDER_DIR"
+  if ( cd "$REPO_ROOT" && bun run gen:skill-docs:user --host claude --out-dir "$RENDER_DIR" >/dev/null 2>&1 ); then
+    # Repoint each project-local SKILL.md symlink whose worktree target has a
+    # rendered counterpart. The skill DIRECTORY name (basename of the symlink
+    # target's dir) maps to RENDER_DIR/<dir>/SKILL.md, which is robust to
+    # frontmatter renames and the gstack- prefix on the link name.
+    repointed=0
+    for skill_link in "$REPO_ROOT"/.claude/skills/*/SKILL.md; do
+      [ -L "$skill_link" ] || continue
+      target="$(readlink "$skill_link")"
+      skilldir="$(basename "$(dirname "$target")")"
+      rendered="$RENDER_DIR/$skilldir/SKILL.md"
+      if [ -f "$rendered" ]; then ln -snf "$rendered" "$skill_link"; repointed=$((repointed + 1)); fi
+    done
+    echo "  $repointed workspace skills now serve brain-aware blocks (worktree stays canonical)."
+  else
+    echo "  warning: brain-aware render failed — workspace uses canonical skills."
+  fi
+fi

 echo ""
 echo "Dev mode active. Skills resolve from this working tree."
@@ -80,4 +121,7 @@ echo "  .claude/skills/gstack → $REPO_ROOT"
 echo "  .agents/skills/gstack → $REPO_ROOT"
 echo "Edit any SKILL.md and test immediately — no copy/deploy needed."
 echo ""
+echo "To make brain-aware blocks live across your OTHER projects too, run:"
+echo "  gstack-config gbrain-refresh"
+echo ""
 echo "To tear down: bin/dev-teardown"
@@ -24,9 +24,16 @@ if [ -d "$CLAUDE_SKILLS" ]; then
  fi

  rmdir "$CLAUDE_SKILLS" 2>/dev/null || true
-  rmdir "$REPO_ROOT/.claude" 2>/dev/null || true
 fi

+# ─── Clean up the untracked brain-aware render (bin/dev-setup step 7) ──
+RENDER_DIR="$REPO_ROOT/.claude/gstack-rendered"
+if [ -d "$RENDER_DIR" ]; then
+  rm -rf "$RENDER_DIR"
+  removed+=("claude/gstack-rendered")
+fi
+rmdir "$REPO_ROOT/.claude" 2>/dev/null || true
+
 # ─── Clean up .agents/skills/ ────────────────────────────────
 AGENTS_SKILLS="$REPO_ROOT/.agents/skills"
 if [ -d "$AGENTS_SKILLS" ]; then
@@ -412,8 +412,29 @@ case "${1:-}" in

    case "$STATUS" in
      ok)
-        echo "Detected gbrain v$VERSION → brain-aware blocks will render in planning-skill SKILL.md files."
-        echo "Run 'bun run gen:skill-docs' in the gstack repo (or re-run ./setup) to regenerate now."
+        echo "Detected gbrain v$VERSION."
+        # Render brain-aware blocks INTO the global install so EVERY project's
+        # Claude sessions get them (other projects read SKILL.md + sections from
+        # ~/.claude/skills/gstack via absolute paths baked at gen time). Guards
+        # (never mutate an arbitrary directory): the target must exist, not be a
+        # symlink (a symlinked install points at a dev worktree — rendering there
+        # would dirty tracked source), and look like a real gstack clone.
+        INSTALL_DIR="$HOME/.claude/skills/gstack"
+        if [ ! -d "$INSTALL_DIR" ]; then
+          echo "No global install at $INSTALL_DIR — nothing to render. (Dev workspaces get blocks via bin/dev-setup.)"
+        elif [ -L "$INSTALL_DIR" ]; then
+          echo "Skip: $INSTALL_DIR is a symlink (likely a dev worktree). Rendering there would dirty tracked source — run bin/dev-setup in that worktree instead."
+        elif [ ! -f "$INSTALL_DIR/VERSION" ] || [ ! -f "$INSTALL_DIR/package.json" ]; then
+          echo "Skip: $INSTALL_DIR doesn't look like a gstack clone (missing VERSION/package.json) — refusing to modify it."
+        elif ! command -v bun >/dev/null 2>&1; then
+          echo "Skip: bun not on PATH — can't render. Install bun, then re-run 'gstack-config gbrain-refresh'."
+        elif ( cd "$INSTALL_DIR" && bun run gen:skill-docs:user --host claude >/dev/null 2>&1 ); then
+          echo "Rendered brain-aware blocks into $INSTALL_DIR — now live across all your projects' Claude sessions."
+          echo "Note: this dirties the install's git tree (generated blocks differ from main, by design)."
+          echo "      A 'git reset --hard origin/main' there reverts them; re-run 'gstack-config gbrain-refresh' to restore."
+        else
+          echo "Warning: render failed. Run 'cd $INSTALL_DIR && bun run gen:skill-docs:user --host claude' manually to see the error."
+        fi
        ;;
      *)
        echo "gbrain not detected (local-status: $STATUS) → brain-aware blocks will be suppressed in planning-skill SKILL.md files."
@@ -234,4 +234,14 @@ function main(): void {
  process.stdout.write(JSON.stringify(out, null, 2) + "\n");
 }

+// --is-ok: live engine-status gate. Exits 0 iff gbrain is usable ("ok"), 1
+// otherwise. Runs detection live (never reads the possibly-stale
+// gbrain-detection.json), so callers — setup, bin/dev-setup, and
+// `gstack-config gbrain-refresh` — can decide whether to render the gbrain
+// :user variant without duplicating the JSON grep. Prints nothing on stdout.
+if (process.argv.includes("--is-ok")) {
+  const noCache = process.env.GSTACK_DETECT_NO_CACHE === "1";
+  process.exit(localEngineStatus({ noCache }) === "ok" ? 0 : 1);
+}
+
 main();
@@ -644,6 +644,51 @@ $B screenshot /tmp/out.png --selector .tweet-card
 ```
 Scale must be 1-3 (gstack policy cap). Changing `--scale` recreates the browser context; refs from `snapshot` are invalidated (rerun `snapshot`), but `load-html` content is replayed automatically. Not supported in headed mode.

+### 14. Offline render mode (rasterize your own HTML/JSON, zero network)
+
+This is the blessed path for "I just want to turn my own local HTML or JSON into a
+PNG/PDF/bytes on disk" — Excalidraw diagrams, tweet/quote cards, og-images,
+report rasterization. It is **plain headless, shared Chromium, no proxy, no Xvfb,
+no anti-bot stealth**. Default `$B` is already exactly this; you do not pass
+`--headed` or `--proxy`. One Chromium per box, shared by every skill — **do not
+`npm i puppeteer` and ship a second browser** (see the note under the cheatsheet).
+
+Two output shapes, pick by what you have:
+
+**A) Visual output → `screenshot --selector` (preferred).** If the thing you want
+is a picture of something on the page, screenshot it. The PNG is written from the
+browser process straight to disk — the image bytes never cross the CDP wire.
+
+```bash
+echo '<div id="card" style="width:400px;height:200px;background:#1da1f2;color:#fff;padding:20px">hi</div>' > /tmp/card.html
+$B viewport 480x600 --scale 2
+$B load-html /tmp/card.html
+$B screenshot /tmp/card.png --selector '#card'   # disk path — no megabytes over CDP
+```
+(Use the disk path, NOT `screenshot --base64` — base64 serializes the bytes back
+through the command channel, which is the cost you're trying to avoid.)
+
+**B) Bytes a function returns → `js --out` / `eval --out`.** When a library hands
+you the result as a return value (a base64 data URL, a blob, computed JSON) rather
+than painting a stable element — e.g. Excalidraw's export function returns a PNG
+data URL — write the evaluate result straight to disk. `--out` decodes a
+`data:*;base64,...` result to raw bytes automatically (pass `--raw` to write the
+literal string). The payload is written by the daemon and never serialized back
+out to the CLI/stdout.
+
+```bash
+# Load the render bundle, signal readiness, then render-to-file.
+$B load-html /tmp/excalidraw-export.html        # bundle sets window.__render + a #done flag
+$B wait '#done'                                  # deterministic ready handshake
+$B js "window.__render(SCENE_JSON)" --out /tmp/diagram.png   # data URL → decoded PNG on disk
+```
+
+`--out` is a WRITE: it needs the `write` scope and is never allowed over the
+pair-agent tunnel (a remote agent can't write to your disk). Parent directories
+are created; malformed base64 errors instead of writing corrupt bytes. Pick A when
+you can (no CDP transfer at all); reach for B only when the bytes come back as a
+return value.
+
 ## Puppeteer → browse cheatsheet

 Migrating from Puppeteer? Here's the 1:1 mapping for the core workflow:
@@ -657,6 +702,8 @@ Migrating from Puppeteer? Here's the 1:1 mapping for the core workflow:
 | `await (await page.$('.x')).screenshot({path})` | `$B screenshot <path> --selector .x` |
 | `await page.screenshot({fullPage: true, path})` | `$B screenshot <path>` (full page default) |
 | `await page.screenshot({clip: {x, y, w, h}, path})` | `$B screenshot <path> --clip x,y,w,h` |
+| `const r = await page.evaluate(fn)` | `$B js "<expr>"` (result to stdout) |
+| `fs.writeFileSync(out, Buffer.from(dataUrl.split(',')[1],'base64'))` | `$B js "<expr>" --out <file>` (data URL auto-decoded) |

 Worked example (the tweet-renderer flow — Puppeteer → browse):

@@ -671,6 +718,13 @@ $B screenshot /tmp/out.png --selector .tweet-card

 Aliases: typing `setcontent` or `set-content` routes to `load-html` automatically. Typing a typo (`load-htm`) returns `Did you mean 'load-html'?`.

+**Don't bundle your own puppeteer/Chromium.** `browse` is the one shared Chromium
+per box. Skills that need to rasterize local HTML/JSON (diagrams, cards, og-images)
+should route through `browse` — `screenshot --selector` for visual output,
+`load-html` + `js --out` for bytes a function returns — instead of
+`npm i puppeteer` and downloading a second Chromium that drifts out of version sync.
+One install to pin, one daemon's lifecycle to manage.
+
 ## User Handoff

 When you hit something you can't handle in headless mode (CAPTCHA, complex auth, multi-factor
@@ -875,10 +929,10 @@ $B prettyscreenshot --cleanup --scroll-to ".pricing" --width 1440 ~/Desktop/hero
 | `cookies` | All cookies as JSON |
 | `css <sel> <prop>` | Computed CSS value |
 | `dialog [--clear]` | Dialog messages |
-| `eval <file>` | Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. |
+| `eval <file> [--out <file>] [--raw]` | Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. With --out <file>, the result is written to disk (base64 data URL decoded to bytes unless --raw); --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel). |
 | `inspect [selector] [--all] [--history]` | Deep CSS inspection via CDP — full rule cascade, box model, computed styles |
 | `is <prop> <sel|@ref>` | State check on element. Valid <prop> values: visible, hidden, enabled, disabled, checked, editable, focused (case-sensitive). <sel> accepts a CSS selector OR an @ref token from a prior snapshot (e.g. @e3, @c1) — refs are interchangeable with selectors anywhere a selector is expected. |
-| `js <expr>` | Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. |
+| `js <expr> [--out <file>] [--raw]` | Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. With --out <file>, the result is written to disk instead of returned (a base64 data URL is decoded to raw bytes unless --raw is given) — ideal for rasterizing local renders to PNG without serializing megabytes back through the CLI. --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel). |
 | `network [--clear]` | Network requests |
 | `perf` | Page load timings |
 | `storage  |  storage set <key> <value>` | Read both localStorage and sessionStorage as JSON. With "set <key> <value>", write to localStorage only (sessionStorage is read-only via this command — set it with `js sessionStorage.setItem(...)`). |
@@ -135,6 +135,51 @@ $B screenshot /tmp/out.png --selector .tweet-card
 ```
 Scale must be 1-3 (gstack policy cap). Changing `--scale` recreates the browser context; refs from `snapshot` are invalidated (rerun `snapshot`), but `load-html` content is replayed automatically. Not supported in headed mode.

+### 14. Offline render mode (rasterize your own HTML/JSON, zero network)
+
+This is the blessed path for "I just want to turn my own local HTML or JSON into a
+PNG/PDF/bytes on disk" — Excalidraw diagrams, tweet/quote cards, og-images,
+report rasterization. It is **plain headless, shared Chromium, no proxy, no Xvfb,
+no anti-bot stealth**. Default `$B` is already exactly this; you do not pass
+`--headed` or `--proxy`. One Chromium per box, shared by every skill — **do not
+`npm i puppeteer` and ship a second browser** (see the note under the cheatsheet).
+
+Two output shapes, pick by what you have:
+
+**A) Visual output → `screenshot --selector` (preferred).** If the thing you want
+is a picture of something on the page, screenshot it. The PNG is written from the
+browser process straight to disk — the image bytes never cross the CDP wire.
+
+```bash
+echo '<div id="card" style="width:400px;height:200px;background:#1da1f2;color:#fff;padding:20px">hi</div>' > /tmp/card.html
+$B viewport 480x600 --scale 2
+$B load-html /tmp/card.html
+$B screenshot /tmp/card.png --selector '#card'   # disk path — no megabytes over CDP
+```
+(Use the disk path, NOT `screenshot --base64` — base64 serializes the bytes back
+through the command channel, which is the cost you're trying to avoid.)
+
+**B) Bytes a function returns → `js --out` / `eval --out`.** When a library hands
+you the result as a return value (a base64 data URL, a blob, computed JSON) rather
+than painting a stable element — e.g. Excalidraw's export function returns a PNG
+data URL — write the evaluate result straight to disk. `--out` decodes a
+`data:*;base64,...` result to raw bytes automatically (pass `--raw` to write the
+literal string). The payload is written by the daemon and never serialized back
+out to the CLI/stdout.
+
+```bash
+# Load the render bundle, signal readiness, then render-to-file.
+$B load-html /tmp/excalidraw-export.html        # bundle sets window.__render + a #done flag
+$B wait '#done'                                  # deterministic ready handshake
+$B js "window.__render(SCENE_JSON)" --out /tmp/diagram.png   # data URL → decoded PNG on disk
+```
+
+`--out` is a WRITE: it needs the `write` scope and is never allowed over the
+pair-agent tunnel (a remote agent can't write to your disk). Parent directories
+are created; malformed base64 errors instead of writing corrupt bytes. Pick A when
+you can (no CDP transfer at all); reach for B only when the bytes come back as a
+return value.
+
 ## Puppeteer → browse cheatsheet

 Migrating from Puppeteer? Here's the 1:1 mapping for the core workflow:
@@ -148,6 +193,8 @@ Migrating from Puppeteer? Here's the 1:1 mapping for the core workflow:
 | `await (await page.$('.x')).screenshot({path})` | `$B screenshot <path> --selector .x` |
 | `await page.screenshot({fullPage: true, path})` | `$B screenshot <path>` (full page default) |
 | `await page.screenshot({clip: {x, y, w, h}, path})` | `$B screenshot <path> --clip x,y,w,h` |
+| `const r = await page.evaluate(fn)` | `$B js "<expr>"` (result to stdout) |
+| `fs.writeFileSync(out, Buffer.from(dataUrl.split(',')[1],'base64'))` | `$B js "<expr>" --out <file>` (data URL auto-decoded) |

 Worked example (the tweet-renderer flow — Puppeteer → browse):

@@ -162,6 +209,13 @@ $B screenshot /tmp/out.png --selector .tweet-card

 Aliases: typing `setcontent` or `set-content` routes to `load-html` automatically. Typing a typo (`load-htm`) returns `Did you mean 'load-html'?`.

+**Don't bundle your own puppeteer/Chromium.** `browse` is the one shared Chromium
+per box. Skills that need to rasterize local HTML/JSON (diagrams, cards, og-images)
+should route through `browse` — `screenshot --selector` for visual output,
+`load-html` + `js --out` for bytes a function returns — instead of
+`npm i puppeteer` and downloading a second Chromium that drifts out of version sync.
+One install to pin, one daemon's lifecycle to manage.
+
 ## User Handoff

 When you hit something you can't handle in headless mode (CAPTCHA, complex auth, multi-factor
@@ -106,8 +106,8 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
  'media':   { category: 'Reading', description: 'All media elements (images, videos, audio) with URLs, dimensions, types', usage: 'media [--images|--videos|--audio] [selector]' },
  'data':    { category: 'Reading', description: 'Structured data: JSON-LD, Open Graph, Twitter Cards, meta tags', usage: 'data [--jsonld|--og|--meta|--twitter]' },
  // Inspection
-  'js':      { category: 'Inspection', description: 'Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file.', usage: 'js <expr>' },
-  'eval':    { category: 'Inspection', description: 'Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners.', usage: 'eval <file>' },
+  'js':      { category: 'Inspection', description: 'Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. With --out <file>, the result is written to disk instead of returned (a base64 data URL is decoded to raw bytes unless --raw is given) — ideal for rasterizing local renders to PNG without serializing megabytes back through the CLI. --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel).', usage: 'js <expr> [--out <file>] [--raw]' },
+  'eval':    { category: 'Inspection', description: 'Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. With --out <file>, the result is written to disk (base64 data URL decoded to bytes unless --raw); --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel).', usage: 'eval <file> [--out <file>] [--raw]' },
  'css':     { category: 'Inspection', description: 'Computed CSS value', usage: 'css <sel> <prop>' },
  'attrs':   { category: 'Inspection', description: 'Element attributes as JSON', usage: 'attrs <sel|@ref>' },
  'is':      { category: 'Inspection', description: 'State check on element. Valid <prop> values: visible, hidden, enabled, disabled, checked, editable, focused (case-sensitive). <sel> accepts a CSS selector OR an @ref token from a prior snapshot (e.g. @e3, @c1) — refs are interchangeable with selectors anywhere a selector is expected.', usage: 'is <prop> <sel|@ref>' },
@@ -13,7 +13,7 @@ import * as fs from 'fs';
 import * as path from 'path';
 import { TEMP_DIR } from './platform';
 import { inspectElement, formatInspectorResult, getModificationHistory } from './cdp-inspector';
-import { validateReadPath } from './path-security';
+import { validateReadPath, validateOutputPath } from './path-security';
 import { stripLoneSurrogates } from './sanitize';
 // Re-export for backward compatibility (tests import from read-commands)
 export { validateReadPath } from './path-security';
@@ -46,6 +46,117 @@ function wrapForEvaluate(code: string): string {
    : `(async()=>(${trimmed}))()`;
 }

+/** Flags split out of `js`/`eval` args by parseOutArgs. */
+export interface OutArgs {
+  outPath?: string;
+  raw: boolean;
+  rest: string[];
+}
+
+/**
+ * Parse `--out <path>` / `--out=<path>` and `--raw` / `--raw=true|false` out of an
+ * arg list, returning the flags plus the remaining positional args (`rest`).
+ *
+ * Single source of truth shared by the js/eval handlers and the write-capability
+ * gate in server.ts, so the two never disagree on what counts as an `--out`
+ * invocation. Throws on malformed usage (repeated `--out`, missing value, bad
+ * `--raw` value) so the user gets a clear error instead of a silent misparse.
+ */
+export function parseOutArgs(args: string[]): OutArgs {
+  let outPath: string | undefined;
+  let raw = false;
+  const rest: string[] = [];
+  for (let i = 0; i < args.length; i++) {
+    const a = args[i];
+    if (a === '--out') {
+      if (outPath !== undefined) throw new Error('--out specified more than once');
+      const val = args[i + 1];
+      if (val === undefined || val.startsWith('--')) throw new Error('--out requires a file path');
+      outPath = val;
+      i++;
+    } else if (a.startsWith('--out=')) {
+      if (outPath !== undefined) throw new Error('--out specified more than once');
+      const val = a.slice('--out='.length);
+      if (val === '') throw new Error('--out requires a file path');
+      outPath = val;
+    } else if (a === '--raw') {
+      raw = true;
+    } else if (a.startsWith('--raw=')) {
+      const v = a.slice('--raw='.length).toLowerCase();
+      if (v !== 'true' && v !== 'false') throw new Error('--raw must be true or false');
+      raw = v === 'true';
+    } else {
+      rest.push(a);
+    }
+  }
+  return { outPath, raw, rest };
+}
+
+/**
+ * True iff an arg list contains an `--out` flag in any accepted form
+ * (`--out <path>` or `--out=<path>`). Used by the write-capability gate to
+ * decide whether an otherwise-read command (`js`/`eval`) is actually a write
+ * invocation. Mirrors parseOutArgs's `--out` recognition exactly. Never throws —
+ * a malformed `--out=` still counts as an out attempt (fail safe: gate it).
+ */
+export function hasOutArg(args: string[]): boolean {
+  return args.some(a => a === '--out' || a.startsWith('--out='));
+}
+
+/**
+ * Convert an evaluate() result to its string form — the exact conversion `js`/`eval`
+ * used inline before `--out` existed. Kept byte-for-byte: `typeof === 'object'`
+ * (which includes `null`) goes through JSON.stringify (so `null` → `"null"`);
+ * everything else via `String(result ?? '')` (so `undefined` → `''`). JSON.stringify
+ * still throws on circular / BigInt-bearing results, same as before.
+ */
+export function resultToString(result: unknown): string {
+  return typeof result === 'object'
+    ? JSON.stringify(result, null, 2)
+    : String(result ?? '');
+}
+
+/**
+ * Write an evaluate result string to disk for `--out`, returning bytes written.
+ *
+ * When the result is a base64 data URL (`data:<type>;...;base64,<payload>`) and
+ * `raw` is false, decode the payload to raw bytes — this is the Excalidraw / og-image
+ * path where a render function returns a PNG data URL. The header is parsed
+ * case-insensitively and split on the FIRST comma (data URLs can contain commas in
+ * the payload). The payload is validated against the base64 charset before decoding,
+ * because `Buffer.from(_, 'base64')` silently drops invalid characters and would
+ * otherwise write corrupted bytes. `--raw` forces a literal write even for data URLs.
+ *
+ * Non-base64 strings are surrogate-sanitized (matching what the stdout egress path
+ * did before) and written as UTF-8. Parent directories are created — validateOutputPath
+ * gates the location but does not mkdir.
+ */
+export function writeEvalResult(outPath: string, str: string, opts: { raw: boolean }): number {
+  validateOutputPath(outPath);
+  fs.mkdirSync(path.dirname(path.resolve(outPath)), { recursive: true });
+
+  if (!opts.raw && str.startsWith('data:')) {
+    const comma = str.indexOf(',');
+    if (comma !== -1) {
+      const header = str.slice('data:'.length, comma);
+      const tokens = header.split(';').map(t => t.trim().toLowerCase());
+      if (tokens.includes('base64')) {
+        const payload = str.slice(comma + 1).replace(/\s+/g, '');
+        if (!/^[A-Za-z0-9+/]*={0,2}$/.test(payload)) {
+          throw new Error('--out: malformed base64 in data URL (decode would corrupt output)');
+        }
+        const buf = Buffer.from(payload, 'base64');
+        fs.writeFileSync(outPath, buf);
+        return buf.length;
+      }
+    }
+  }
+
+  const buf = Buffer.from(stripLoneSurrogates(str), 'utf-8');
+  fs.writeFileSync(outPath, buf);
+  return buf.length;
+}
+
 /**
 * Extract clean text from a page (strips script/style/noscript/svg).
 * Exported for DRY reuse in meta-commands (diff).
@@ -179,24 +290,36 @@ export async function handleReadCommand(
    }

    case 'js': {
-      const expr = args[0];
-      if (!expr) throw new Error('Usage: browse js <expression>');
+      const { outPath, raw, rest } = parseOutArgs(args);
+      const expr = rest[0];
+      if (!expr) throw new Error('Usage: browse js <expression> [--out <file>] [--raw]');
      if (bm) assertJsOriginAllowed(bm, page.url());
      const wrapped = wrapForEvaluate(expr);
      const result = await target.evaluate(wrapped);
-      return typeof result === 'object' ? JSON.stringify(result, null, 2) : String(result ?? '');
+      const str = resultToString(result);
+      if (outPath) {
+        const n = writeEvalResult(outPath, str, { raw });
+        return `JS result written: ${outPath} (${n} bytes)`;
+      }
+      return str;
    }

    case 'eval': {
-      const filePath = args[0];
-      if (!filePath) throw new Error('Usage: browse eval <js-file>');
+      const { outPath, raw, rest } = parseOutArgs(args);
+      const filePath = rest[0];
+      if (!filePath) throw new Error('Usage: browse eval <js-file> [--out <file>] [--raw]');
      if (bm) assertJsOriginAllowed(bm, page.url());
      validateReadPath(filePath);
      if (!fs.existsSync(filePath)) throw new Error(`File not found: ${filePath}`);
      const code = fs.readFileSync(filePath, 'utf-8');
      const wrapped = wrapForEvaluate(code);
      const result = await target.evaluate(wrapped);
-      return typeof result === 'object' ? JSON.stringify(result, null, 2) : String(result ?? '');
+      const str = resultToString(result);
+      if (outPath) {
+        const n = writeEvalResult(outPath, str, { raw });
+        return `Eval result written: ${outPath} (${n} bytes)`;
+      }
+      return str;
    }

    case 'css': {
@@ -14,7 +14,7 @@
 */

 import { BrowserManager } from './browser-manager';
-import { handleReadCommand } from './read-commands';
+import { handleReadCommand, hasOutArg } from './read-commands';
 import { handleWriteCommand } from './write-commands';
 import { handleMetaCommand } from './meta-commands';
 import { handleCookiePickerRoute, hasActivePicker } from './cookie-picker-routes';
@@ -330,9 +330,15 @@ export const TUNNEL_COMMANDS = new Set<string>([
 * without standing up an HTTP listener. Behavior is identical to the inline
 * check; the function canonicalizes the command (so aliases hit the same set)
 * and returns false for null/undefined input.
+ *
+ * `args` is consulted so an `--out` invocation (e.g. `eval --out <file>`) is
+ * NEVER tunnel-dispatchable: `--out` turns an otherwise-readable command into a
+ * local-disk WRITE, and the tunnel surface never grants disk-write capability to
+ * remote paired agents. Omitting `args` preserves the old command-only behavior.
 */
-export function canDispatchOverTunnel(command: string | undefined | null): boolean {
+export function canDispatchOverTunnel(command: string | undefined | null, args?: string[]): boolean {
  if (typeof command !== 'string' || command.length === 0) return false;
+  if (Array.isArray(args) && hasOutArg(args)) return false;
  const cmd = canonicalizeCommand(command);
  return TUNNEL_COMMANDS.has(cmd);
 }
@@ -716,6 +722,19 @@ if (BROWSE_PARENT_PID > 0 && !IS_HEADED_WATCHDOG) {
 import { READ_COMMANDS, WRITE_COMMANDS, META_COMMANDS } from './commands';
 export { READ_COMMANDS, WRITE_COMMANDS, META_COMMANDS };

+/**
+ * Whether an invocation should be treated as a WRITE for capability gating
+ * (scope, watch-mode block, tab ownership, tunnel). A command is a write if it
+ * mutates state (`WRITE_COMMANDS`) OR it carries an `--out` flag — `js`/`eval
+ * --out` writes the evaluate result to local disk, so the capability is
+ * per-invocation, not per-command-name. This deliberately does NOT change
+ * dispatch routing: `js`/`eval` still route to `handleReadCommand`; only the
+ * security gates consult this.
+ */
+function isWriteInvocation(command: string, args: string[]): boolean {
+  return WRITE_COMMANDS.has(command) || hasOutArg(args);
+}
+
 // ─── Inspector State (in-memory) ──────────────────────────────
 let inspectorData: InspectorResult | null = null;
 let inspectorTimestamp: number = 0;
@@ -957,6 +976,19 @@ async function handleCommandInternalImpl(
      };
    }

+    // `--out` writes the evaluate result to local disk, which is a WRITE
+    // capability distinct from the JS-exec (admin) capability js/eval need.
+    // Require write scope so an admin-but-not-write token can't write files.
+    if (hasOutArg(args) && !tokenInfo.scopes.includes('write')) {
+      return {
+        status: 403, json: true,
+        result: JSON.stringify({
+          error: `"--out" writes to disk and requires the "write" scope`,
+          hint: `Your scopes: ${tokenInfo.scopes.join(', ')}. Re-pair with write access to use --out.`,
+        }),
+      };
+    }
+
    // Domain check for navigation commands
    if ((command === 'goto' || command === 'newtab') && args[0]) {
      if (!checkDomain(tokenInfo, args[0])) {
@@ -1011,7 +1043,7 @@ async function handleCommandInternalImpl(
  // Skip for `newtab` — it creates a tab rather than accessing one.
  if (command !== 'newtab' && tokenInfo && tokenInfo.clientId !== 'root' && tokenInfo.tabPolicy === 'own-only') {
    const targetTab = tabId ?? browserManager.getActiveTabId();
-    if (!browserManager.checkTabAccess(targetTab, tokenInfo.clientId, { isWrite: WRITE_COMMANDS.has(command), ownOnly: true })) {
+    if (!browserManager.checkTabAccess(targetTab, tokenInfo.clientId, { isWrite: isWriteInvocation(command, args), ownOnly: true })) {
      return {
        status: 403, json: true,
        result: JSON.stringify({
@@ -1035,8 +1067,9 @@ async function handleCommandInternalImpl(
    };
  }

-  // Block mutation commands while watching (read-only observation mode)
-  if (browserManager.isWatching() && WRITE_COMMANDS.has(command)) {
+  // Block mutation commands while watching (read-only observation mode).
+  // `--out` invocations count as mutations (they write the result to disk).
+  if (browserManager.isWatching() && isWriteInvocation(command, args)) {
    return {
      status: 400, json: true,
      result: JSON.stringify({ error: 'Cannot run mutation commands while watching. Run `$B watch stop` first.' }),
@@ -2650,11 +2683,11 @@ export function buildFetchHandler(cfg: ServerConfig): ServerHandle {
        // Paired remote agents drive the browser but cannot configure the
        // daemon, launch new browsers, import cookies, or rotate tokens.
        if (surface === 'tunnel') {
-          if (!canDispatchOverTunnel(body?.command)) {
+          if (!canDispatchOverTunnel(body?.command, body?.args)) {
            logTunnelDenial(req, url, `disallowed_command:${body?.command}`);
            return new Response(JSON.stringify({
              error: `Command '${body?.command}' is not allowed over the tunnel surface`,
-              hint: `Tunnel commands: ${[...TUNNEL_COMMANDS].sort().join(', ')}`,
+              hint: `Tunnel commands: ${[...TUNNEL_COMMANDS].sort().join(', ')}. Note: --out (disk write) is never allowed over the tunnel.`,
            }), { status: 403, headers: { 'Content-Type': 'application/json' } });
          }
        }
@@ -9,7 +9,7 @@ import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
 import { startTestServer } from './test-server';
 import { BrowserManager } from '../src/browser-manager';
 import { resolveServerScript } from '../src/cli';
-import { handleReadCommand as _handleReadCommand } from '../src/read-commands';
+import { handleReadCommand as _handleReadCommand, parseOutArgs, hasOutArg, resultToString } from '../src/read-commands';
 import { handleWriteCommand as _handleWriteCommand } from '../src/write-commands';
 import { handleMetaCommand } from '../src/meta-commands';
 import { consoleBuffer, networkBuffer, dialogBuffer, addConsoleEntry, addNetworkEntry, addDialogEntry, CircularBuffer } from '../src/buffers';
@@ -23,6 +23,65 @@ const handleReadCommand = (cmd: string, args: string[], b: BrowserManager) =>
 const handleWriteCommand = (cmd: string, args: string[], b: BrowserManager) =>
  _handleWriteCommand(cmd, args, b.getActiveSession(), b);

+// ─── Pure arg-parser + result-conversion unit tests (no browser) ───
+describe('parseOutArgs / hasOutArg', () => {
+  test('--out <path> splits the flag from the positional', () => {
+    expect(parseOutArgs(['expr', '--out', '/tmp/x'])).toEqual({ outPath: '/tmp/x', raw: false, rest: ['expr'] });
+  });
+
+  test('--out=<path> form is equivalent', () => {
+    expect(parseOutArgs(['expr', '--out=/tmp/x'])).toEqual({ outPath: '/tmp/x', raw: false, rest: ['expr'] });
+  });
+
+  test('flag ordering does not matter', () => {
+    expect(parseOutArgs(['--out', '/tmp/x', 'expr'])).toEqual({ outPath: '/tmp/x', raw: false, rest: ['expr'] });
+  });
+
+  test('--raw and --raw=true|false', () => {
+    expect(parseOutArgs(['e', '--out', '/tmp/x', '--raw']).raw).toBe(true);
+    expect(parseOutArgs(['e', '--out', '/tmp/x', '--raw=true']).raw).toBe(true);
+    expect(parseOutArgs(['e', '--out', '/tmp/x', '--raw=false']).raw).toBe(false);
+  });
+
+  test('repeated --out throws', () => {
+    expect(() => parseOutArgs(['e', '--out', '/a', '--out', '/b'])).toThrow(/more than once/);
+  });
+
+  test('--out with a missing value throws', () => {
+    expect(() => parseOutArgs(['e', '--out'])).toThrow(/requires a file path/);
+    expect(() => parseOutArgs(['e', '--out', '--raw'])).toThrow(/requires a file path/);
+    expect(() => parseOutArgs(['e', '--out='])).toThrow(/requires a file path/);
+  });
+
+  test('bad --raw value throws', () => {
+    expect(() => parseOutArgs(['e', '--out', '/a', '--raw=maybe'])).toThrow(/--raw must be true or false/);
+  });
+
+  test('hasOutArg matches --out and --out= exactly, not lookalikes', () => {
+    expect(hasOutArg(['a', '--out', 'b'])).toBe(true);
+    expect(hasOutArg(['a', '--out=b'])).toBe(true);
+    expect(hasOutArg(['a'])).toBe(false);
+    expect(hasOutArg(['a', '--output', 'b'])).toBe(false);
+    expect(hasOutArg(['a', '--outx'])).toBe(false);
+  });
+});
+
+describe('resultToString — byte-for-byte with pre-refactor behavior', () => {
+  test('null becomes "null" (typeof null === object → JSON.stringify)', () => {
+    expect(resultToString(null)).toBe('null');
+  });
+  test('undefined becomes empty string', () => {
+    expect(resultToString(undefined)).toBe('');
+  });
+  test('objects are pretty-printed JSON', () => {
+    expect(resultToString({ a: 1 })).toBe(JSON.stringify({ a: 1 }, null, 2));
+  });
+  test('primitives use String()', () => {
+    expect(resultToString(42)).toBe('42');
+    expect(resultToString(true)).toBe('true');
+  });
+});
+
 let testServer: ReturnType<typeof startTestServer>;
 let bm: BrowserManager;
 let baseUrl: string;
@@ -225,6 +284,102 @@ describe('Inspection', () => {
    expect(result).toBe('3');
  });

+  // ─── js/eval --out (render-to-file) ───────────────────────────
+
+  test('js (no --out) returns a multi-MB string without truncation', async () => {
+    // Handler-level guarantee: the result is not sliced/capped before return.
+    // (Full HTTP egress path is exercised elsewhere; this pins the handler.)
+    const result = await handleReadCommand('js', ["'x'.repeat(3 * 1024 * 1024)"], bm);
+    expect(result.length).toBe(3 * 1024 * 1024);
+  });
+
+  test('js --out writes the result to disk and returns a short status, not the payload', async () => {
+    const out = `/tmp/browse-out-large-${Date.now()}.txt`;
+    try {
+      const result = await handleReadCommand('js', ["'y'.repeat(2 * 1024 * 1024)", '--out', out], bm);
+      expect(result).toContain('JS result written:');
+      expect(result).toContain(out);
+      expect(result).toContain(`(${2 * 1024 * 1024} bytes)`);
+      expect(result.length).toBeLessThan(200); // status, not the 2MB payload
+      expect(fs.statSync(out).size).toBe(2 * 1024 * 1024);
+    } finally {
+      fs.rmSync(out, { force: true });
+    }
+  });
+
+  test('js --out decodes a base64 PNG data URL to real bytes', async () => {
+    // 1x1 transparent PNG.
+    const b64 = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
+    const out = `/tmp/browse-out-png-${Date.now()}.png`;
+    try {
+      const result = await handleReadCommand('js', [`'data:image/png;base64,' + '${b64}'`, '--out', out], bm);
+      const buf = fs.readFileSync(out);
+      // PNG magic bytes: 89 50 4E 47
+      expect([buf[0], buf[1], buf[2], buf[3]]).toEqual([0x89, 0x50, 0x4e, 0x47]);
+      const expectedLen = Buffer.from(b64, 'base64').length;
+      expect(buf.length).toBe(expectedLen);
+      expect(result).toContain(`(${expectedLen} bytes)`);
+    } finally {
+      fs.rmSync(out, { force: true });
+    }
+  });
+
+  test('js --out --raw writes the literal data-URL string (no decode)', async () => {
+    const dataUrl = 'data:text/plain;base64,aGVsbG8=';
+    const out = `/tmp/browse-out-raw-${Date.now()}.txt`;
+    try {
+      await handleReadCommand('js', [`'${dataUrl}'`, '--out', out, '--raw'], bm);
+      expect(fs.readFileSync(out, 'utf-8')).toBe(dataUrl);
+    } finally {
+      fs.rmSync(out, { force: true });
+    }
+  });
+
+  test('js --out throws on a malformed base64 data URL instead of writing corrupt bytes', async () => {
+    const out = `/tmp/browse-out-bad-${Date.now()}.png`;
+    try {
+      await expect(
+        handleReadCommand('js', ["'data:image/png;base64,!!!not-base64!!!'", '--out', out], bm)
+      ).rejects.toThrow(/malformed base64/);
+      expect(fs.existsSync(out)).toBe(false);
+    } finally {
+      fs.rmSync(out, { force: true });
+    }
+  });
+
+  test('js --out rejects a path outside the safe directories', async () => {
+    await expect(
+      handleReadCommand('js', ['1 + 1', '--out', '/etc/browse-should-not-write.txt'], bm)
+    ).rejects.toThrow();
+  });
+
+  test('js --out creates a missing parent directory', async () => {
+    // validateOutputPath resolves the parent's realpath, so it permits one level
+    // of missing dir under a safe root (/tmp). mkdir then materializes it.
+    const root = `/tmp/browse-out-nested-${Date.now()}`;
+    const out = `${root}/result.txt`;
+    try {
+      await handleReadCommand('js', ["'nested'", '--out', out], bm);
+      expect(fs.readFileSync(out, 'utf-8')).toBe('nested');
+    } finally {
+      fs.rmSync(root, { recursive: true, force: true });
+    }
+  });
+
+  test('eval --out writes the file result to disk (parity with js)', async () => {
+    const script = `/tmp/browse-eval-out-src-${Date.now()}.js`;
+    const out = `/tmp/browse-eval-out-${Date.now()}.txt`;
+    fs.writeFileSync(script, "'from eval'");
+    try {
+      const result = await handleReadCommand('eval', [script, '--out', out], bm);
+      expect(result).toContain('Eval result written:');
+      expect(fs.readFileSync(out, 'utf-8')).toBe('from eval');
+    } finally {
+      fs.rmSync(script, { force: true });
+      fs.rmSync(out, { force: true });
+    }
+  });
+
  test('css returns computed property', async () => {
    const result = await handleReadCommand('css', ['h1', 'color'], bm);
    // Navy color
@@ -95,3 +95,35 @@ describe('canDispatchOverTunnel — alias canonicalization', () => {
    expect(canDispatchOverTunnel('closetab')).toBe(true);
  });
 });
+
+describe('canDispatchOverTunnel — --out writes are never tunnel-dispatchable', () => {
+  // `--out` turns an otherwise-readable command into a local-disk WRITE. The
+  // tunnel surface never grants disk-write to remote paired agents, so any
+  // --out invocation must be 403'd even when the bare command is allowlisted.
+  test('bare eval dispatches, but eval --out does not', () => {
+    expect(canDispatchOverTunnel('eval', ['/tmp/x.js'])).toBe(true);
+    expect(canDispatchOverTunnel('eval', ['/tmp/x.js', '--out', '/tmp/o.png'])).toBe(false);
+  });
+
+  test('--out= form is rejected too (no parser-shape bypass)', () => {
+    expect(canDispatchOverTunnel('eval', ['/tmp/x.js', '--out=/tmp/o.png'])).toBe(false);
+  });
+
+  test('--out anywhere in args is caught regardless of ordering', () => {
+    expect(canDispatchOverTunnel('eval', ['--out', '/tmp/o.png', '/tmp/x.js'])).toBe(false);
+  });
+
+  test('args without --out still dispatch', () => {
+    expect(canDispatchOverTunnel('goto', ['https://example.com'])).toBe(true);
+    expect(canDispatchOverTunnel('eval', ['/tmp/x.js'])).toBe(true);
+  });
+
+  test('omitting args preserves the old command-only behavior', () => {
+    expect(canDispatchOverTunnel('eval')).toBe(true);
+  });
+
+  test('a lookalike flag (--output) is NOT treated as --out', () => {
+    // hasOutArg matches '--out' exactly or '--out='; '--output' must not trip it.
+    expect(canDispatchOverTunnel('eval', ['/tmp/x.js', '--output', '/tmp/o'])).toBe(true);
+  });
+});
@@ -81,10 +81,10 @@ Run with `browse <command> [args]`. Full reference: `browse/SKILL.md`.
 - `cookies`: All cookies as JSON
 - `css <sel> <prop>`: Computed CSS value
 - `dialog [--clear]`: Dialog messages
- `eval <file>`: Run JavaScript from a file in the page context and return result as string.
+- `eval <file> [--out <file>] [--raw]`: Run JavaScript from a file in the page context and return result as string.
 - `inspect [selector] [--all] [--history]`: Deep CSS inspection via CDP — full rule cascade, box model, computed styles
 - `is <prop> <sel|@ref>`: State check on element.
- `js <expr>`: Run inline JavaScript expression in the page context and return result as string.
+- `js <expr> [--out <file>] [--raw]`: Run inline JavaScript expression in the page context and return result as string.
 - `network [--clear]`: Network requests
 - `perf`: Page load timings
 - `storage  |  storage set <key> <value>`: Read both localStorage and sessionStorage as JSON.
@@ -1,6 +1,6 @@
 {
  "name": "gstack",
-  "version": "1.57.7.0",
+  "version": "1.57.9.0",
  "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.",
  "license": "MIT",
  "type": "module",
@@ -50,6 +50,24 @@ function regenerateAndValidate() {
      console.log(`  [check] \u2705 ${output} — ${totalValid} commands, all valid`);
    }
  }
+
+  // Dev workspace render isolation: the default in-place regen above keeps the
+  // worktree canonical. If bin/dev-setup set up an untracked brain-aware render
+  // (.claude/gstack-rendered), refresh it too so live template edits reflect at
+  // this workspace's runtime. Only runs when the render dir already exists — we
+  // never create it during plain template dev.
+  const RENDER_DIR = path.join(ROOT, '.claude', 'gstack-rendered');
+  if (fs.existsSync(RENDER_DIR)) {
+    try {
+      execSync(
+        `bun run scripts/gen-skill-docs.ts --respect-detection --host claude --out-dir ${JSON.stringify(RENDER_DIR)}`,
+        { cwd: ROOT, stdio: 'pipe' },
+      );
+      console.log('  [render] refreshed .claude/gstack-rendered (brain-aware workspace copy)');
+    } catch (err: any) {
+      console.log(`  [render] ERROR: ${err.stderr?.toString().trim() || err.message}`);
+    }
+  }
 }

 // Initial run
@@ -137,6 +137,39 @@ const EXPLAIN_LEVEL: 'default' | 'terse' = (() => {
  return val;
 })();

+// ─── Out-dir (dev workspace render isolation) ───────────────
+// --out-dir <abs-dir> redirects Claude SKILL.md + section output to a separate
+// (untracked) directory instead of writing in place, AND rewrites the literal
+// section-base path (`~/.claude/skills/gstack/<skill>/sections/`) inside the
+// generated content to point at the out-dir, so section Reads resolve to the
+// rendered copy rather than the global install. Used by bin/dev-setup to render
+// the gbrain `:user` variant for a Conductor workspace without dirtying tracked
+// source. Default (unset) = in-place, behavior unchanged. Claude host only.
+const OUT_DIR_ARG = process.argv.find(a => a.startsWith('--out-dir'));
+const OUT_DIR: string | null = (() => {
+  if (!OUT_DIR_ARG) return null;
+  const val = OUT_DIR_ARG.includes('=')
+    ? OUT_DIR_ARG.split('=')[1]
+    : process.argv[process.argv.indexOf(OUT_DIR_ARG) + 1];
+  if (!val) throw new Error('--out-dir requires a directory path');
+  return path.resolve(val);
+})();
+
+/**
+ * When rendering to an out-dir, repoint the literal section-base path at the
+ * out-dir so section Reads resolve to the rendered copy, not the global install.
+ * Surgical: ONLY paths containing `/sections/` are rewritten — bin/, browse/,
+ * docs/ references keep pointing at `~/.claude/skills/gstack` (the global
+ * install, which still works). No-op when --out-dir is unset.
+ */
+function rewriteSectionBase(content: string): string {
+  if (!OUT_DIR) return content;
+  return content.replace(
+    /~\/\.claude\/skills\/gstack\/([^\s)`"'*]+\/sections\/)/g,
+    `${OUT_DIR}/$1`,
+  );
+}
+
 // HostPaths, HOST_PATHS, and TemplateContext imported from ./resolvers/types (line 7-8)
 // Design constants (AI_SLOP_BLACKLIST, OPENAI_HARD_REJECTIONS, OPENAI_LITMUS_CHECKS)
 // live in ./resolvers/constants and are consumed by resolvers directly.
@@ -768,6 +801,12 @@ function processTemplate(tmplPath: string, host: Host = 'claude'): { outputPath:
  // Determine skill directory relative to ROOT
  const skillDir = path.relative(ROOT, path.dirname(tmplPath));

+  // --out-dir (Claude only): mirror the skill tree into the out-dir instead of
+  // writing in place. External hosts compute their own paths below.
+  if (OUT_DIR && host === 'claude') {
+    outputPath = path.join(OUT_DIR, skillDir, path.basename(tmplPath).replace(/\.tmpl$/, ''));
+  }
+
  // Extract name/description: name drives external skill naming + setup symlinks
  // (and TemplateContext.skillName via buildContext); description feeds external
  // host metadata. When frontmatter name: differs from directory name (e.g.
@@ -822,6 +861,9 @@ function processTemplate(tmplPath: string, host: Host = 'claude'): { outputPath:
    }
  }

+  // --out-dir: repoint section-base paths to the out-dir (no-op otherwise).
+  if (host === 'claude') content = rewriteSectionBase(content);
+
  return { outputPath, content, symlinkLoop, catalogParts };
 }

@@ -860,6 +902,10 @@ function processSectionTemplate(
  // External hosts: rewrite cross-reference paths/tools (no frontmatter to transform).
  if (host !== 'claude') {
    content = applyHostRewrites(content, hostConfig);
+  } else {
+    // --out-dir: a section may cross-reference another section by absolute path;
+    // repoint those to the out-dir too (no-op when --out-dir is unset).
+    content = rewriteSectionBase(content);
  }

  // Plain generated header (no frontmatter to insert after).
@@ -868,7 +914,7 @@ function processSectionTemplate(
  const fileName = path.basename(sectionTmplPath).replace(/\.tmpl$/, '');
  let outputPath: string;
  if (host === 'claude') {
-    outputPath = path.join(ROOT, skillDir, 'sections', fileName);
+    outputPath = path.join(OUT_DIR || ROOT, skillDir, 'sections', fileName);
  } else {
    const externalName = externalSkillName(skillDir, parentName);
    outputPath = path.join(ROOT, hostConfig.hostSubdir, 'skills', externalName, 'sections', fileName);
@@ -933,7 +979,7 @@ for (const currentHost of hostsToRun) {
          voice_line: catalogParts.voiceLine,
        };
      }
-      const relOutput = path.relative(ROOT, outputPath);
+      const relOutput = path.relative(OUT_DIR || ROOT, outputPath);

      if (symlinkLoop) {
        console.log(`SKIPPED (symlink loop): ${relOutput}`);
@@ -946,6 +992,9 @@ for (const currentHost of hostsToRun) {
          console.log(`FRESH: ${relOutput}`);
        }
      } else {
+        // In-place writes land in existing dirs; --out-dir needs the mirrored
+        // skill dir created first.
+        if (OUT_DIR) fs.mkdirSync(path.dirname(outputPath), { recursive: true });
        fs.writeFileSync(outputPath, content);
        console.log(`GENERATED: ${relOutput}`);
      }
@@ -982,7 +1031,7 @@ for (const currentHost of hostsToRun) {
          currentHostConfig.generation.skipSkills.includes(sec.skillDir)) continue;

      const { outputPath, content } = processSectionTemplate(path.join(ROOT, sec.tmpl), sec.skillDir, currentHost);
-      const relOutput = path.relative(ROOT, outputPath);
+      const relOutput = path.relative(OUT_DIR || ROOT, outputPath);

      if (DRY_RUN) {
        const existing = fs.existsSync(outputPath) ? fs.readFileSync(outputPath, 'utf-8') : '';
@@ -1079,7 +1128,9 @@ The orchestrator will persist the plan link to its own memory/knowledge store.
    // No timestamp field — keeps the file content-deterministic across runs so
    // CI dry-run freshness checks don't flap on regen. If a per-run timestamp
    // is ever needed for debugging, write it to a separate `.gen-stamp` file.
-    if (currentHost === 'claude' && CATALOG_MODE === 'trim' && Object.keys(proactiveAggregate).length > 0 && !DRY_RUN) {
+    // Skip the global proactive-suggestions.json in --out-dir mode: it lives at
+    // a repo path (scripts/) and the dev workspace render doesn't need it.
+    if (currentHost === 'claude' && CATALOG_MODE === 'trim' && Object.keys(proactiveAggregate).length > 0 && !DRY_RUN && !OUT_DIR) {
      const proactivePath = path.join(ROOT, 'scripts', 'proactive-suggestions.json');
      // Sort keys alphabetically so the serialized JSON is identical across
      // machines regardless of filesystem-iteration order. Without this, CI
@@ -1286,22 +1286,37 @@ fi
 DETECT_BIN="$SOURCE_GSTACK_DIR/bin/gstack-gbrain-detect"
 GBRAIN_STATE_DIR="${GSTACK_HOME:-$HOME/.gstack}"
 DETECTION_FILE="$GBRAIN_STATE_DIR/gbrain-detection.json"
+# PID-unique tmp so concurrent setups (parallel Conductor workspaces) can't
+# clobber each other's in-flight detection write.
+DETECTION_TMP="$DETECTION_FILE.$$.tmp"
 mkdir -p "$GBRAIN_STATE_DIR"
 if [ -x "$DETECT_BIN" ]; then
-  if "$DETECT_BIN" > "$DETECTION_FILE.tmp" 2>/dev/null; then
-    mv "$DETECTION_FILE.tmp" "$DETECTION_FILE"
-    if grep -q '"gbrain_local_status": "ok"' "$DETECTION_FILE" 2>/dev/null; then
-      log "gbrain detected — regenerating Claude SKILL.md with brain-aware blocks (~250 token overhead per planning skill)..."
-      (
-        cd "$SOURCE_GSTACK_DIR"
-        bun_cmd run gen:skill-docs:user --host claude 2>&1 | tail -3
-      ) || log "  warning: gen:skill-docs:user failed — run 'bun run gen:skill-docs:user' manually if you want brain-aware blocks"
+  if "$DETECT_BIN" > "$DETECTION_TMP" 2>/dev/null; then
+    mv "$DETECTION_TMP" "$DETECTION_FILE"
+    # Single source of truth for "is gbrain usable" — `--is-ok` runs live
+    # detection (exit 0 iff ok), so setup, bin/dev-setup, and gstack-config
+    # all gate on the same check instead of re-grepping the JSON.
+    if "$DETECT_BIN" --is-ok 2>/dev/null; then
+      if [ -n "${GSTACK_SKIP_GBRAIN_REGEN:-}" ]; then
+        # Dev/source tree (set by bin/dev-setup): never regenerate tracked
+        # SKILL.md in place — that dirties checked-in source. Detection is
+        # still persisted above; the dev workspace renders the :user variant
+        # into an untracked dir, and other projects get blocks via
+        # `gstack-config gbrain-refresh`.
+        log "gbrain detected — GSTACK_SKIP_GBRAIN_REGEN set: leaving tracked SKILL.md canonical (dev/source tree)."
+      else
+        log "gbrain detected — regenerating Claude SKILL.md with brain-aware blocks (~250 token overhead per planning skill)..."
+        (
+          cd "$SOURCE_GSTACK_DIR"
+          bun_cmd run gen:skill-docs:user --host claude 2>&1 | tail -3
+        ) || log "  warning: gen:skill-docs:user failed — run 'bun run gen:skill-docs:user' manually if you want brain-aware blocks"
+      fi
    else
      log "gbrain not detected — brain-aware blocks suppressed in planning-skill SKILL.md files (zero token overhead)."
      log "  To enable: install gbrain via /setup-gbrain, then re-run ./setup or 'gstack-config gbrain-refresh'."
    fi
  else
-    rm -f "$DETECTION_FILE.tmp"
+    rm -f "$DETECTION_TMP"
    log "  warning: gstack-gbrain-detect failed — brain-aware blocks will stay suppressed"
  fi
 fi
@@ -0,0 +1,91 @@
+import { describe, test, expect } from 'bun:test';
+import * as path from 'path';
+import * as fs from 'fs';
+
+// Static tripwires for the B2 render-isolation wiring. These fail CI if a
+// refactor drops a load-bearing line, re-introducing the "dev-setup dirties
+// tracked SKILL.md" drift (or worse, leaks the skip-guard into real installs).
+const ROOT = path.resolve(import.meta.dir, '..');
+const read = (rel: string) => fs.readFileSync(path.join(ROOT, rel), 'utf-8');
+
+describe('dev-setup: worktree stays canonical', () => {
+  const devSetup = read('bin/dev-setup');
+
+  test('passes GSTACK_SKIP_GBRAIN_REGEN inline on the nested setup call', () => {
+    expect(devSetup).toContain('GSTACK_SKIP_GBRAIN_REGEN=1 "$GSTACK_LINK/setup"');
+  });
+
+  test('never exports GSTACK_SKIP_GBRAIN_REGEN (would leak into other setup paths)', () => {
+    expect(devSetup).not.toMatch(/export\s+GSTACK_SKIP_GBRAIN_REGEN/);
+  });
+
+  test('renders the :user variant into an out-dir, not in place', () => {
+    expect(devSetup).toContain('--out-dir');
+    expect(devSetup).toContain('.claude/gstack-rendered');
+  });
+
+  test('gates the render on gstack-gbrain-detect --is-ok', () => {
+    expect(devSetup).toContain('--is-ok');
+  });
+});
+
+describe('setup: honors GSTACK_SKIP_GBRAIN_REGEN', () => {
+  const setup = read('setup');
+
+  test('skips the in-place :user regen when the guard is set', () => {
+    expect(setup).toContain('${GSTACK_SKIP_GBRAIN_REGEN:-}');
+    // The guard must wrap the in-place render, not the detection persist.
+    const idx = setup.indexOf('GSTACK_SKIP_GBRAIN_REGEN');
+    const after = setup.slice(idx, idx + 600);
+    expect(after).toContain('leaving tracked SKILL.md canonical');
+  });
+
+  test('uses a PID-unique detection tmp (no concurrent clobber)', () => {
+    expect(setup).toContain('$DETECTION_FILE.$$.tmp');
+  });
+
+  test('gates detection on the shared --is-ok check', () => {
+    expect(setup).toContain('"$DETECT_BIN" --is-ok');
+  });
+});
+
+describe('gen-skill-docs: section rewrite is gated on --out-dir', () => {
+  const gen = read('scripts/gen-skill-docs.ts');
+
+  test('rewriteSectionBase is a no-op without --out-dir', () => {
+    expect(gen).toContain('function rewriteSectionBase');
+    const idx = gen.indexOf('function rewriteSectionBase');
+    const body = gen.slice(idx, idx + 400);
+    expect(body).toContain('if (!OUT_DIR) return content');
+    expect(body).toContain('sections'); // surgical: regex targets only /sections/ paths
+  });
+});
+
+describe('dev-teardown: removes the untracked render', () => {
+  const teardown = read('bin/dev-teardown');
+
+  test('rm -rf the gstack-rendered dir', () => {
+    expect(teardown).toContain('gstack-rendered');
+    expect(teardown).toMatch(/rm -rf .*RENDER_DIR/);
+  });
+});
+
+describe('.gitignore: render dir is declared untracked', () => {
+  test('.claude/gstack-rendered/ is ignored', () => {
+    expect(read('.gitignore')).toContain('.claude/gstack-rendered/');
+  });
+});
+
+describe('dev-skill: refreshes the render on template change', () => {
+  const devSkill = read('scripts/dev-skill.ts');
+
+  test('re-renders the :user variant into the workspace render dir', () => {
+    expect(devSkill).toContain('gstack-rendered');
+    expect(devSkill).toContain('--out-dir');
+    expect(devSkill).toContain('--respect-detection');
+  });
+
+  test('only refreshes when the render dir already exists (never creates it during plain dev)', () => {
+    expect(devSkill).toContain('fs.existsSync(RENDER_DIR)');
+  });
+});
@@ -16,7 +16,7 @@
 */

 import { describe, it, expect } from "bun:test";
-import { execFileSync } from "child_process";
+import { execFileSync, spawnSync } from "child_process";
 import {
  mkdtempSync,
  mkdirSync,
@@ -47,6 +47,16 @@ function runDetect(env: Partial<NodeJS.ProcessEnv>): string {
  });
 }

+/** Run detect with --is-ok and return its exit code (never throws). */
+function runIsOk(env: Partial<NodeJS.ProcessEnv>): number {
+  const r = spawnSync(BUN_BIN, ["run", DETECT_BIN, "--is-ok"], {
+    timeout: 15_000,
+    stdio: ["ignore", "pipe", "pipe"],
+    env: { ...process.env, ...env },
+  });
+  return r.status ?? 1;
+}
+
 interface DetectShape {
  gbrain_on_path: boolean;
  gbrain_version: string | null;
@@ -244,3 +254,66 @@ exit 0
    }
  });
 });
+
+describe("bin/gstack-gbrain-detect --is-ok — live gate", () => {
+  it("exits non-zero when gbrain is not on PATH (no-cli)", () => {
+    const tmp = mkdtempSync(join(tmpdir(), "detect-isok-"));
+    try {
+      const code = runIsOk({
+        HOME: tmp,
+        PATH: "/usr/bin:/bin", // no gbrain
+        GSTACK_HOME: tmp,
+        GSTACK_DETECT_NO_CACHE: "1",
+      });
+      expect(code).not.toBe(0);
+    } finally {
+      rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("exits 0 when a fake gbrain reports a healthy engine (ok)", () => {
+    const tmp = mkdtempSync(join(tmpdir(), "detect-isok-"));
+    const bindir = join(tmp, "bin");
+    const home = join(tmp, "home");
+    const configDir = join(home, ".gbrain");
+    try {
+      mkdirSync(bindir, { recursive: true });
+      mkdirSync(configDir, { recursive: true });
+      writeFileSync(join(configDir, "config.json"), JSON.stringify({ engine: "pglite" }));
+      const fake = `#!/bin/sh
+case "$1 $2" in
+  "--version ")        echo "gbrain 0.33.1.0"; exit 0 ;;
+  "sources list")      echo '{"sources":[]}'; exit 0 ;;
+  "doctor "*)          echo '{"status":"ok","checks":[]}'; exit 0 ;;
+esac
+exit 0
+`;
+      const gbrainPath = join(bindir, "gbrain");
+      writeFileSync(gbrainPath, fake);
+      chmodSync(gbrainPath, 0o755);
+
+      const code = runIsOk({
+        HOME: home,
+        PATH: `${bindir}:/usr/bin:/bin`,
+        GSTACK_HOME: tmp,
+        GSTACK_DETECT_NO_CACHE: "1",
+      });
+      expect(code).toBe(0);
+    } finally {
+      rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("exit code agrees with the JSON gbrain_local_status (no skew)", () => {
+    // Run both surfaces against the same env and assert they never disagree.
+    const tmp = mkdtempSync(join(tmpdir(), "detect-isok-"));
+    try {
+      const env = { HOME: tmp, PATH: "/usr/bin:/bin", GSTACK_HOME: tmp, GSTACK_DETECT_NO_CACHE: "1" };
+      const status = (JSON.parse(runDetect(env)) as DetectShape).gbrain_local_status;
+      const code = runIsOk(env);
+      expect(code === 0).toBe(status === "ok");
+    } finally {
+      rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+});
@@ -0,0 +1,60 @@
+import { describe, test, expect } from 'bun:test';
+import * as path from 'path';
+import * as fs from 'fs';
+
+// Static tripwires for the C (machine-wide) render in `gstack-config
+// gbrain-refresh`. The render mutates the shared global install, so the guards
+// that stop it from touching the wrong directory are load-bearing — these fail
+// CI if any guard is dropped.
+const ROOT = path.resolve(import.meta.dir, '..');
+const SRC = fs.readFileSync(path.join(ROOT, 'bin', 'gstack-config'), 'utf-8');
+
+// Pull out just the gbrain-refresh `ok)` branch so assertions can't be
+// satisfied by unrelated text elsewhere in the file.
+function okBranch(): string {
+  const start = SRC.indexOf('gbrain-refresh)');
+  const ok = SRC.indexOf('ok)', start);
+  const end = SRC.indexOf(';;', ok);
+  if (start < 0 || ok < 0 || end < 0) throw new Error('Could not locate gbrain-refresh ok) branch');
+  return SRC.slice(ok, end);
+}
+
+describe('gstack-config gbrain-refresh: machine-wide render guards', () => {
+  const branch = okBranch();
+
+  test('targets the global install', () => {
+    expect(branch).toContain('$HOME/.claude/skills/gstack');
+  });
+
+  test('refuses a symlinked install (would dirty a dev worktree)', () => {
+    expect(branch).toMatch(/\[ -L "\$INSTALL_DIR" \]/);
+  });
+
+  test('verifies it is a real gstack clone before mutating it', () => {
+    expect(branch).toContain('$INSTALL_DIR/VERSION');
+    expect(branch).toContain('$INSTALL_DIR/package.json');
+  });
+
+  test('requires bun on PATH', () => {
+    expect(branch).toContain('command -v bun');
+  });
+
+  test('renders the :user variant in place into the install', () => {
+    expect(branch).toContain('gen:skill-docs:user --host claude');
+  });
+
+  test('is self-documenting about the reset --hard / re-run cycle', () => {
+    expect(branch).toContain('reset --hard');
+    expect(branch).toContain('gbrain-refresh');
+  });
+});
+
+describe('CLAUDE.md: deploy section documents the re-run', () => {
+  test('notes re-running gbrain-refresh after reset --hard', () => {
+    const claudeMd = fs.readFileSync(path.join(ROOT, 'CLAUDE.md'), 'utf-8');
+    const idx = claudeMd.indexOf('## Deploying to the active skill');
+    expect(idx).toBeGreaterThan(-1);
+    const section = claudeMd.slice(idx, idx + 1200);
+    expect(section).toContain('gbrain-refresh');
+  });
+});
@@ -0,0 +1,84 @@
+import { describe, test, expect } from 'bun:test';
+import { spawnSync } from 'child_process';
+import { createHash } from 'crypto';
+import * as path from 'path';
+import * as fs from 'fs';
+import * as os from 'os';
+
+const ROOT = path.resolve(import.meta.dir, '..');
+
+// Render the gbrain `:user` variant into a temp out-dir, forcing detection ON
+// via a crafted GSTACK_HOME so the test is deterministic regardless of whether
+// the dev machine actually has gbrain installed. Asserts the B2 contract:
+//   (a) the worktree SKILL.md is byte-unchanged (source stays canonical),
+//   (b) the out-dir SKILL.md gained the inline Brain Context Load block,
+//   (c) its section refs point at the out-dir, not ~/.claude/skills/gstack,
+//   (d) bin/ refs are left pointing at the global install,
+//   (e) the out-dir section file gained the Save Results to Brain block.
+describe('gen-skill-docs --out-dir (B2 render isolation)', () => {
+  function hashFile(p: string): string {
+    return createHash('sha256').update(fs.readFileSync(p)).digest('hex');
+  }
+
+  test('renders :user to out-dir, rewrites section paths, leaves worktree canonical', () => {
+    const tmpHome = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-home-'));
+    const outDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-out-'));
+    const worktreeSkill = path.join(ROOT, 'ship', 'SKILL.md');
+    const beforeHash = hashFile(worktreeSkill);
+    try {
+      // Force gbrain detection ON for --respect-detection.
+      fs.writeFileSync(
+        path.join(tmpHome, 'gbrain-detection.json'),
+        JSON.stringify({ gbrain_local_status: 'ok', gbrain_version: '9.9.9' }),
+      );
+
+      const res = spawnSync(
+        'bun',
+        ['run', 'scripts/gen-skill-docs.ts', '--respect-detection', '--host', 'claude', '--out-dir', outDir],
+        { cwd: ROOT, encoding: 'utf-8', timeout: 120_000, env: { ...process.env, GSTACK_HOME: tmpHome } },
+      );
+      expect(res.status).toBe(0);
+
+      const outSkill = path.join(outDir, 'ship', 'SKILL.md');
+      const outSection = path.join(outDir, 'ship', 'sections', 'adversarial.md');
+      expect(fs.existsSync(outSkill)).toBe(true);
+      const skillContent = fs.readFileSync(outSkill, 'utf-8');
+
+      // (a) worktree byte-unchanged
+      expect(hashFile(worktreeSkill)).toBe(beforeHash);
+
+      // (b) inline block present in the rendered SKILL.md
+      expect(skillContent).toContain('Brain Context Load');
+
+      // (c) section refs repointed to the out-dir; none left pointing at the install
+      expect(skillContent).toContain(`${outDir}/ship/sections/`);
+      expect(skillContent).not.toContain('~/.claude/skills/gstack/ship/sections/');
+
+      // (d) bin refs are NOT rewritten — they still resolve to the global install
+      expect(skillContent).toContain('~/.claude/skills/gstack/bin/');
+
+      // (e) the SAVE block landed in the rendered section file
+      expect(fs.existsSync(outSection)).toBe(true);
+      expect(fs.readFileSync(outSection, 'utf-8')).toContain('Save Results to Brain');
+    } finally {
+      fs.rmSync(tmpHome, { recursive: true, force: true });
+      fs.rmSync(outDir, { recursive: true, force: true });
+    }
+  });
+
+  test('global extras (proactive-suggestions.json) are NOT written in out-dir mode', () => {
+    const outDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-out-'));
+    try {
+      const res = spawnSync(
+        'bun',
+        ['run', 'scripts/gen-skill-docs.ts', '--host', 'claude', '--out-dir', outDir],
+        { cwd: ROOT, encoding: 'utf-8', timeout: 120_000 },
+      );
+      expect(res.status).toBe(0);
+      // proactive-suggestions.json lives at a repo path; out-dir mode must skip it.
+      expect(fs.existsSync(path.join(outDir, 'scripts', 'proactive-suggestions.json'))).toBe(false);
+    } finally {
+      fs.rmSync(outDir, { recursive: true, force: true });
+    }
+  });
+});
@@ -1 +1 @@
 .57.7.0
 .57.9.0