Merge origin/main into gbrowser-anti-detection

Brings the branch up to date with main (v1.40.0.2 -> v1.58.1.0).

Conflict resolutions:
- VERSION: take main's 1.58.1.0 (branch re-bumps at /ship time).
- CHANGELOG.md: keep main's full history; slot the branch's unique
  v1.40.0.2 entry into descending-order position (no content lost).
- browse/src/browser-manager.ts: keep main's GSTACK_CHROMIUM_NO_SANDBOX
  override and onDisconnect(exitCode) signature; branch's
  buildGStackLaunchArgs / STEALTH_IGNORE_DEFAULT_ARGS wiring preserved.
- browse/test/browser-manager-unit.test.ts: keep main's override +
  exit-code propagation tests alongside the branch's Cmd+Q
  cause-resolver tests.
- browse/src/stealth.ts: blend the two stealth designs. Layer C
  (buildStealthScript) is the always-on consistency-first default;
  main's GSTACK_STEALTH=extended (EXTENDED_STEALTH_SCRIPT) remains an
  opt-in layer applied on top. Both public APIs and both test suites
  (stealth-layer-c + stealth-extended) preserved; the two applyStealth
  wiring assertions updated to reflect the Layer C default.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-06-17 07:54:24 -07:00
637 changed files with 100152 additions and 18013 deletions
+6
View File
@@ -37,3 +37,9 @@ bin/* text eol=lf
*.gif binary
*.ico binary
*.pdf binary
# The committed diagram-render bundle is hash-pinned (BUILD_INFO sha256);
# a CRLF rewrite on Windows checkout would break the drift test and change
# the content-addressed staged filename.
lib/diagram-render/dist/*.html text eol=lf
lib/diagram-render/dist/*.json text eol=lf
+6
View File
@@ -162,6 +162,12 @@ jobs:
permissions:
contents: read
pull-requests: write
# The comment upsert below calls the REST `/issues/{n}/comments` endpoints
# (gh api ... issues/comments). With GITHUB_TOKEN those are gated by the
# `issues` permission, not `pull-requests` — without it the GET returns 401
# on every PR that produces eval artifacts (PRs with no artifacts exit
# early and never hit it, which is why this stayed hidden). See #1802 CI fix.
issues: write
steps:
- uses: actions/checkout@v4
with:
+14 -3
View File
@@ -4,6 +4,8 @@ on:
branches: [main]
paths:
- 'make-pdf/**'
- 'lib/diagram-render/**'
- 'test/diagram-render-drift.test.ts'
- 'browse/src/meta-commands.ts'
- 'browse/src/write-commands.ts'
- 'browse/src/commands.ts'
@@ -51,6 +53,15 @@ jobs:
if: matrix.os == 'ubicloud-standard-8'
run: sudo apt-get update && sudo apt-get install -y poppler-utils
# Install a color-emoji font BEFORE Chromium launches so the emoji render
# gate has a fallback font. macOS ships Apple Color Emoji already.
- name: Install color-emoji font (Ubuntu)
if: matrix.os == 'ubicloud-standard-8'
run: |
sudo apt-get install -y fonts-noto-color-emoji
fc-cache -f || true
fc-match -f '%{family[0]}\t%{color}\n' ':lang=und-zsye:charset=1F600' || true
- name: Install Playwright Chromium
run: bunx playwright install chromium
@@ -72,9 +83,9 @@ jobs:
which pdftotext && pdftotext -v 2>&1 | head -1 || true
- name: Run make-pdf unit tests
run: bun test make-pdf/test/*.test.ts
run: bun test make-pdf/test/*.test.ts test/diagram-render-drift.test.ts
- name: Run combined-features copy-paste gate (P0)
- name: Run E2E gates (combined-features copy-paste + emoji render)
env:
BROWSE_BIN: ${{ github.workspace }}/browse/dist/browse
run: bun test make-pdf/test/e2e/combined-gate.test.ts
run: bun test make-pdf/test/e2e/
+63 -8
View File
@@ -1,7 +1,25 @@
name: PR Title Sync
# WHY pull_request_target (not pull_request): the default GITHUB_TOKEN is
# READ-ONLY on fork PRs under `pull_request`, so the title-sync backstop could
# never `gh pr edit` a fork/agent PR. `pull_request_target` runs in the base-repo
# context with a write token, which fixes fork coverage.
#
# WHY this is SAFE (pull_request_target is the most dangerous trigger):
# - We check out the BASE repo (no `ref:`), so the only code we execute is
# trusted base-repo infra (bin/gstack-pr-title-rewrite.sh). We NEVER check
# out or run PR-head/fork code.
# - Every attacker-controlled PR field (title, head repo, head sha) arrives via
# `env:` and is referenced as a shell-quoted "$VAR". We NEVER inline a
# `${{ github.event.pull_request.* }}` expression inside the run: script
# (that would execute a crafted title as shell).
# - The PR-head VERSION is read as DATA via the API (raw media type), from the
# head repo at the head sha — never by checking out the head.
# test/pr-title-sync-workflow-safety.test.ts is the static tripwire for all of
# the above and fails CI if any of it regresses.
on:
pull_request:
pull_request_target:
types: [opened, synchronize, edited]
paths:
- 'VERSION'
@@ -19,25 +37,62 @@ jobs:
pull-requests: write
if: github.actor != 'github-actions[bot]'
steps:
- name: Checkout PR head
# Base repo only — trusted infra (the rewrite helper). No PR-head checkout.
- name: Checkout base repo (trusted)
uses: actions/checkout@v4
with:
fetch-depth: 1
ref: ${{ github.event.pull_request.head.sha }}
- name: Rewrite PR title to match VERSION
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUM: ${{ github.event.pull_request.number }}
# Attacker-controlled on fork PRs — env-only, never inlined into run:.
OLD_TITLE: ${{ github.event.pull_request.title }}
BASE_REPO: ${{ github.repository }}
HEAD_REPO: ${{ github.event.pull_request.head.repo.full_name }}
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
run: |
set -euo pipefail
chmod +x ./bin/gstack-pr-title-rewrite.sh
VERSION=$(cat VERSION | tr -d '[:space:]')
NEW_TITLE=$(./bin/gstack-pr-title-rewrite.sh "$VERSION" "$OLD_TITLE")
if [ "$NEW_TITLE" = "$OLD_TITLE" ]; then
echo "Title already correct; no change."
if [ "$HEAD_REPO" = "$BASE_REPO" ]; then IS_FORK=0; else IS_FORK=1; fi
# Read the PR-head VERSION as data (raw bytes), from the head repo at
# the head sha. Guard the assignment itself: under `set -e` a bare
# `VERSION=$(...)` would abort the step before any later [ -z ] check.
if ! VERSION=$(gh api -H "Accept: application/vnd.github.raw" \
"repos/$HEAD_REPO/contents/VERSION?ref=$HEAD_SHA" 2>/dev/null | tr -d '[:space:]'); then
VERSION=""
fi
if [ -z "$VERSION" ]; then
# Same-repo read failure should never happen — fail loudly so we
# notice. A fork miss (public-contents quirk, private fork) is a
# convenience gap, not a gate — warn and skip so the check stays green.
if [ "$IS_FORK" = "0" ]; then
echo "::error::Could not read VERSION from same-repo PR head ($HEAD_SHA)."
exit 1
fi
echo "::warning::Could not read VERSION from fork $HEAD_REPO ($HEAD_SHA); skipping title sync."
exit 0
fi
# The helper rejects a malformed VERSION (exit 2). Same policy: loud for
# same-repo, soft for forks. Never echo the raw (attacker-controlled)
# title — Actions still parses ::workflow-command:: from stdout.
if ! NEW_TITLE=$(./bin/gstack-pr-title-rewrite.sh "$VERSION" "$OLD_TITLE"); then
if [ "$IS_FORK" = "0" ]; then
echo "::error::Could not compute title for VERSION '$VERSION' on PR #$PR_NUM."
exit 1
fi
echo "::warning::Could not compute title for fork PR #$PR_NUM; skipping."
exit 0
fi
if [ "$NEW_TITLE" = "$OLD_TITLE" ]; then
echo "PR #$PR_NUM title already correct; no change."
exit 0
fi
echo "Rewriting: $OLD_TITLE -> $NEW_TITLE"
gh pr edit "$PR_NUM" --title "$NEW_TITLE"
echo "PR #$PR_NUM title synced to VERSION."
+1
View File
@@ -116,6 +116,7 @@ jobs:
test/setup-windows-fallback.test.ts \
test/build-script-shell-compat.test.ts \
test/docs-config-keys.test.ts \
test/brain-sync-windows-paths.test.ts \
make-pdf/test/browseClient.test.ts \
make-pdf/test/pdftotext.test.ts
shell: bash
+96
View File
@@ -0,0 +1,96 @@
name: Windows Setup E2E
# End-to-end fresh-install gate for Windows. Runs `./setup` on a clean
# windows-latest checkout and asserts the build completes, binaries
# resolve via find-browse, and the gstack-paths state root resolves
# cleanly. Catches Bun shell-parser regressions in package.json's build
# chain (#1538, #1537, #1530, #1457, #1561) before they reach users.
#
# Separate from windows-free-tests.yml because that one runs a curated
# unit-test subset; this one exercises the install path itself.
#
# Runner: GitHub-hosted free windows-latest. ~3-5 min total.
on:
pull_request:
branches: [main]
paths:
- 'package.json'
- 'scripts/build.sh'
- 'scripts/write-version-files.sh'
- 'setup'
- 'browse/src/cli.ts'
- 'browse/src/find-browse.ts'
- 'bin/gstack-paths'
- '.github/workflows/windows-setup-e2e.yml'
workflow_dispatch:
concurrency:
group: windows-setup-e2e-${{ github.head_ref }}
cancel-in-progress: true
jobs:
windows-setup:
runs-on: windows-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- uses: oven-sh/setup-bun@v1
with:
bun-version: latest
- name: Configure git identity
run: |
git config --global user.email "windows-setup-e2e@gstack.test"
git config --global user.name "Windows Setup E2E"
git config --global init.defaultBranch main
shell: bash
- name: Install dependencies
run: bun install --frozen-lockfile
shell: bash
- name: Run bun run build (the previously-broken path)
# This is the regression gate. Bun's Windows shell parser rejected
# multiple constructs the old inline build chain used; the wave
# moved the build to scripts/build.sh. If this step fails on
# Windows, the build chain regressed.
run: bun run build
shell: bash
env:
GSTACK_SKIP_PLAYWRIGHT: '1'
- name: Verify binaries exist (with .exe extension on Windows)
run: |
set -e
test -f browse/dist/browse.exe || test -f browse/dist/browse || (echo "MISSING: browse" && exit 1)
test -f browse/dist/find-browse.exe || test -f browse/dist/find-browse || (echo "MISSING: find-browse" && exit 1)
test -f design/dist/design.exe || test -f design/dist/design || (echo "MISSING: design" && exit 1)
test -f bin/gstack-global-discover.exe || test -f bin/gstack-global-discover || (echo "MISSING: gstack-global-discover" && exit 1)
echo "All binaries present"
shell: bash
- name: Verify find-browse resolves to the .exe variant
run: |
set -e
OUT=$(bun browse/src/find-browse.ts 2>&1) || true
echo "find-browse output: $OUT"
# On Windows, find-browse should successfully resolve to a binary,
# whether or not it has the .exe extension on disk. Empty output
# or "not found" means the .exe extension resolver regressed.
echo "$OUT" | grep -qE '(browse\.exe|browse)$' || (echo "find-browse failed to resolve binary on Windows" && exit 1)
shell: bash
- name: Verify gstack-paths state root resolves
run: |
set -e
eval "$(bash bin/gstack-paths)"
test -n "$GSTACK_STATE_ROOT" || (echo "GSTACK_STATE_ROOT empty" && exit 1)
test -n "$PLAN_ROOT" || (echo "PLAN_ROOT empty" && exit 1)
test -n "$TMP_ROOT" || (echo "TMP_ROOT empty" && exit 1)
echo "GSTACK_STATE_ROOT=$GSTACK_STATE_ROOT"
echo "PLAN_ROOT=$PLAN_ROOT"
echo "TMP_ROOT=$TMP_ROOT"
shell: bash
+8 -1
View File
@@ -4,9 +4,13 @@ dist/
browse/dist/
design/dist/
make-pdf/dist/
bin/gstack-global-discover
# diagram-render ships its built bundle (offline-at-install premise, eng-review D2)
!lib/diagram-render/dist/
!lib/diagram-render/dist/**
bin/gstack-global-discover*
.gstack/
.claude/skills/
.claude/gstack-rendered/
.claude/scheduled_tasks.lock
.claude/*.lock
.agents/
@@ -37,3 +41,6 @@ supabase/.temp/
# Throughput analysis — local-only, regenerate via scripts/garry-output-comparison.ts
docs/throughput-*.json
# gbrain local source-staging dir (capability checks, source clones) — runtime artifact
.sources/
+21
View File
@@ -21,6 +21,7 @@ Invoke them by name (e.g., `/office-hours`).
| `/plan-tune` | Self-tune AskUserQuestion sensitivity per question. |
| `/autoplan` | One command runs CEO → design → eng → DX review. |
| `/design-consultation` | Build a complete design system from scratch. |
| `/spec` | Turn vague intent into a precise, executable spec in five phases. Files a GitHub issue, optionally spawns a Claude Code agent in a fresh worktree, and lets `/ship` close the source issue on merge. |
### Implementation + review
@@ -75,6 +76,25 @@ Invoke them by name (e.g., `/office-hours`).
| `/setup-browser-cookies` | Import cookies from your real browser for authenticated testing. |
| `/pair-agent` | Pair a remote AI agent (OpenClaw, Codex, etc.) with your browser. |
### iOS QA — drive real iPhones over USB or Tailscale (v1.43.0.0+)
| Skill | What it does |
|-------|-------------|
| `/ios-qa` | Live-device iOS QA via USB CoreDevice tunnel + embedded StateServer. Optionally exposes the device over Tailscale so remote agents can drive it. |
| `/ios-fix` | Autonomous iOS bug fixer with regression snapshot capture. |
| `/ios-design-review` | Designer's-eye QA on a real iPhone — 10-dimension Apple HIG rubric. |
| `/ios-clean` | Convenience: strip DebugBridge + #if DEBUG wiring before a Release build. |
| `/ios-sync` | Regenerate the iOS debug bridge against the latest upstream templates. |
Companion CLIs (run on the Mac that's plugged into the device):
| Command | What it does |
|---------|-------------|
| `gstack-ios-qa-daemon` | Mac-side broker. Loopback by default; `--tailnet` adds a Tailscale-facing listener with capability tiers and audit logging. |
| `gstack-ios-qa-mint` | Owner-grant CLI for the tailnet allowlist (`grant`/`revoke`/`list`). |
End-to-end walkthrough: [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md).
### Safety + scoping
| Skill | What it does |
@@ -84,6 +104,7 @@ Invoke them by name (e.g., `/office-hours`).
| `/guard` | Activate both careful + freeze at once. |
| `/unfreeze` | Remove directory edit restrictions. |
| `/make-pdf` | Turn any markdown file into a publication-quality PDF. |
| `/diagram` | English in, diagram out: mermaid source + editable .excalidraw + SVG/PNG, offline. |
## Build commands
+3 -2
View File
@@ -212,8 +212,8 @@ from `snapshot`, or `@c` refs from `snapshot -C`. Full table:
| Command | Description |
|---------|-------------|
| `js <expr>` | Run inline JavaScript expression in page context, return as string |
| `eval <file>` | Run JS from a file (path under /tmp or cwd; same sandbox as `js`) |
| `js <expr> [--out <file>] [--raw]` | Run inline JavaScript expression in page context, return as string. With `--out <file>` the result is written to disk instead of returned (a `data:*;base64,...` result is decoded to raw bytes unless `--raw`). `--out` makes the invocation a WRITE (needs `write` scope, never allowed over the tunnel). |
| `eval <file> [--out <file>] [--raw]` | Run JS from a file (path under /tmp or cwd; same sandbox as `js`). `--out`/`--raw` behave as for `js`. |
| `css <sel> <prop>` | Computed CSS value |
| `attrs <sel\|@ref>` | Element attributes as JSON |
| `is <prop> <sel\|@ref>` | State check: visible, hidden, enabled, disabled, checked, editable, focused |
@@ -317,6 +317,7 @@ from `snapshot`, or `@c` refs from `snapshot -C`. Full table:
| `disconnect` | Close headed Chrome, return to headless |
| `focus [@ref]` | Bring headed Chrome to foreground (macOS); `@ref` also scrolls into view |
| `state save\|load <name>` | Save or load browser state (cookies + URLs) |
| `memory [--json]` | Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. Use `--json` for programmatic consumers; text mode renders sorted top-10 tabs with "and N more" tail. |
### Handoff
+2255
View File
File diff suppressed because it is too large Load Diff
+170 -20
View File
@@ -27,25 +27,31 @@ bun run slop:diff # slop findings in files changed on this branch only
`test:evals` requires `ANTHROPIC_API_KEY`. Codex E2E tests (`test/codex-e2e.test.ts`)
use Codex's own auth from `~/.codex/` config — no `OPENAI_API_KEY` env var needed.
**Where the keys live on this machine.** Conductor workspaces don't inherit the
user's interactive shell env, so `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` aren't
in the default process env. Before running any paid eval / E2E, source them from
`~/.zshrc` (that's where Garry keeps them):
**Env keys in Conductor workspaces.** The `GSTACK_*` env-shim (v1.39.2.0+,
`lib/conductor-env-shim.ts`) promotes `GSTACK_ANTHROPIC_API_KEY` /
`GSTACK_OPENAI_API_KEY` to their canonical names inside gstack's TS binaries.
Tests run through gstack entrypoints inherit this promotion automatically.
Don't echo the key value to stdout, logs, or shell history. The historical
"never pass `env:` to `runAgentSdkTest`" rule is retired: the failure was
partial-env replacement (the SDK's `Options.env` REPLACES the child's entire
environment, so an object without the key broke auth). The runner now always
passes a COMPLETE hermetic env with per-test `env:` merged last, so per-test
overrides are safe; ambient `process.env.ANTHROPIC_API_KEY` mutation also
still works (the env builder reads process.env at call time).
```bash
bash -c '
eval "$(grep -E "^export (ANTHROPIC_API_KEY|OPENAI_API_KEY)=" ~/.zshrc)"
export ANTHROPIC_API_KEY OPENAI_API_KEY
EVALS=1 EVALS_TIER=periodic bun test test/skill-e2e-<whatever>.test.ts
'
```
**Hermetic local E2E (default).** Every E2E runner (claude -p, PTY, Agent
SDK, codex, gemini) spawns children through `test/helpers/hermetic-env.ts`:
allowlist-scrubbed env (operator `CONDUCTOR_*`, `CLAUDE_*`, `GSTACK_*`,
`MCP_*`, `GBRAIN_*`, and credentials like `GH_TOKEN` never reach children),
a fresh seeded `CLAUDE_CONFIG_DIR` (no operator `~/.claude` CLAUDE.md /
MCP servers / skills), a temp `GSTACK_HOME`, and `--strict-mcp-config`.
Local eval signal matches CI. Debug against real operator state with
`EVALS_HERMETIC=0` (restores the legacy env AND drops the strict-MCP flag).
Per-test `env:` overrides merge last, so deliberate contamination
(`CONDUCTOR_WORKSPACE_PATH`, per-test `GSTACK_HOME`) keeps working. Wiring
is pinned by `test/hermetic-wiring.test.ts` (static tripwire) and two
gate-tier canaries in `test/skill-e2e-hermetic-canary.test.ts`.
Do not echo the key value anywhere (stdout, logs, shell history). The grep+eval
pattern keeps it in process env only. When passing to a test's Agent SDK, do NOT
pass `env: {...}` to `runAgentSdkTest` — the SDK's auth pipeline doesn't pick up
the key the same way when env is supplied as an object (confirmed failure mode).
Instead, mutate `process.env.ANTHROPIC_API_KEY` ambiently before the call and
restore in `finally`.
E2E tests stream progress in real-time (tool-by-tool via `--output-format stream-json
--verbose`). Results are persisted to `~/.gstack-dev/evals/` with auto-comparison
against the previous run.
@@ -120,6 +126,7 @@ gstack/
├── land-and-deploy/ # /land-and-deploy skill (merge → deploy → canary verify)
├── office-hours/ # /office-hours skill (YC Office Hours — startup diagnostic + builder brainstorm)
├── investigate/ # /investigate skill (systematic root-cause debugging)
├── spec/ # /spec skill (five-phase spec → GitHub issue, optional agent spawn, /ship auto-closes)
├── retro/ # Retrospective skill (includes /retro global cross-project mode)
├── bin/ # CLI utilities (gstack-repo-mode, gstack-slug, gstack-config, etc.)
├── document-release/ # /document-release skill (post-ship doc updates + Diataxis coverage map)
@@ -145,7 +152,7 @@ gstack/
├── setup # One-time setup: build binary + symlink skills
├── SKILL.md # Generated from SKILL.md.tmpl (don't edit directly)
├── SKILL.md.tmpl # Template: edit this, run gen:skill-docs
├── ETHOS.md # Builder philosophy (Boil the Lake, Search Before Building)
├── ETHOS.md # Builder philosophy (Boil the Ocean, Search Before Building)
└── package.json # Build scripts for browse
```
@@ -236,6 +243,24 @@ Activity / Refs / Inspector as debug overlays behind the footer's
flow, dual-token model, and threat-model boundary — silent failures
here usually trace to not understanding the cross-component flow.
**Embedder terminal-agent ownership** (v1.42.1.0+, identity-based kill v1.44.0.0+).
`buildFetchHandler` in `browse/src/server.ts` accepts `ServerConfig.ownsTerminalAgent?:
boolean` (default `true`). When `true`, factory shutdown runs the full teardown:
identity-based kill via `killAgentByRecord(readAgentRecord(stateDir))` from
`browse/src/terminal-agent-control.ts` plus `safeUnlinkQuiet` on
`<stateDir>/terminal-port`, `<stateDir>/terminal-internal-token`, and
`<stateDir>/terminal-agent-pid` (the per-boot agent record introduced in v1.44).
Embedders (e.g. the gbrowser phoenix overlay) that pre-launch their own PTY
server must pass `false` so their discovery files survive gstack teardown cycles.
The flag is the third caller-owned teardown gate in `ServerConfig` (alongside
`xvfb?` and `proxyBridge?`); polarity is inverted (explicit bool vs presence) and
documented in the field's JSDoc. CLI `start()` always passes `true` explicitly —
the static-grep test in `browse/test/server-embedder-terminal-port.test.ts` fails
CI if a refactor drops it. Pre-v1.44 used `pkill -f terminal-agent\.ts` (regex
match) which would kill sibling gstack sessions on the same host; the new
`browse/test/terminal-agent-pid-identity.test.ts` static-grep tripwire fails CI
if any source file re-introduces `pkill ... terminal-agent` or `spawnSync('pkill', ...)`.
**WebSocket auth uses Sec-WebSocket-Protocol, not cookies.** Browsers
can't set `Authorization` on a WebSocket upgrade, but they CAN set
`Sec-WebSocket-Protocol` via `new WebSocket(url, [token])`. The agent
@@ -284,6 +309,26 @@ response in `server.ts`, read
`browse/test/server-sanitize-surrogates.test.ts` pins the wiring with invariant
tests, so bypasses fail CI.
**SSE endpoint helper** (v1.51.0.0+). New SSE endpoints in `server.ts` MUST route
through `createSseEndpoint(req, config)` from `browse/src/sse-helpers.ts`. The
helper owns the cleanup contract (abort + enqueue-throw + heartbeat-throw, all
idempotent) and bakes in `sanitizeLoneSurrogates` on every JSON.stringify, so
new subscribers can't accidentally regress either invariant. Inline
`ReadableStream` wiring leaked subscribers when the TCP connection died without
firing `req.signal.abort` (Chromium MV3 service-worker suspend, intermediate
proxy half-close). `/activity/stream`, `/inspector/events`, and `/memory`
(SSE-eligible) all route through it. `browse/test/sse-helpers.test.ts` pins the
cleanup contract.
**CDP session lifecycle** (v1.51.0.0+). Direct `page.context().newCDPSession(page)`
calls outside `browse/src/cdp-bridge.ts` fail CI via the static-grep tripwire in
`browse/test/cdp-session-cleanup.test.ts`. Use `withCdpSession(page, async (s) => {...})`
for one-shot CDP work (try/finally detach) or `getOrCreateCdpSession(page, cache)`
for cached sessions tied to a page's lifetime (close-detach via `Map<page, session>`).
Three sites migrated: cdp-bridge frame events, write-commands archive capture,
cdp-inspector. The helpers prevent the per-session leak class where successful-path
detach happened but error-path detach was missed.
**Setup symlink hardening** (v1.38.0.0+). Every link site in `setup` MUST route
through the `_link_or_copy SRC DST` helper near the `IS_WINDOWS` detection. On
Windows without Developer Mode, plain `ln -snf` produces frozen file copies that
@@ -388,6 +433,44 @@ because they're tracked despite `.gitignore` — ignore them. When staging files
always use specific filenames (`git add file1 file2`) — never `git add .` or
`git add -A`, which will accidentally include the binaries.
## Redaction guard (PII / secrets / legal content)
Shared redaction engine catches credentials, PII, and legal/damaging content
before it reaches an external sink (codex dispatch, GitHub issue/PR body, pushed
commit). It is a **guardrail, not airtight enforcement**`git push --no-verify`,
direct `gh issue create`, and `GSTACK_REDACT_PREPUSH=skip` all bypass it. It
catches accidents and carelessness, the 99% case. Do not claim it stops a
determined leaker (a CHANGELOG line that does would fail a hostile screenshotter).
- **Engine + taxonomy:** `lib/redact-patterns.ts` (the single source of truth —
3 tiers; HIGH = genuinely-secret credentials that block, MEDIUM = PII/legal/
internal + high-FP credential shapes that confirm via AskUserQuestion, LOW =
FYI) and `lib/redact-engine.ts` (pure `scan()` + `applyRedactions()`).
Calibration matters: a gate that cries wolf gets ignored, so context-variable
shapes (Stripe `pk_live_`, Google `AIza`, JWT, env `*_KEY=`) sit at MEDIUM.
- **CLI:** `bin/gstack-redact` (exit 0 clean / 2 MEDIUM / 3 HIGH; `--json`,
`--auto-redact`, `--repo-visibility`, `--from-file`). `bin/gstack-redact-prepush`
is the opt-in git hook.
- **Skill docs are generated** from `scripts/resolvers/redact-doc.ts`
(`{{REDACT_TAXONOMY_TABLE}}`, `{{REDACT_INVOCATION_BLOCK:<sink>}}`) so /spec,
/cso, /ship, /document-release, /document-generate never drift from the engine.
- **Scan-at-sink:** always scan the EXACT bytes that will be sent — write to a
temp file, scan that file, pass the SAME file to `gh`/`git`. Never scan a string
then re-render (that reopens a scan-vs-send gap).
- **Visibility (no tier promotion):** resolve once per run, order = local config
(`gstack-config get redact_repo_visibility`, ~/.gstack so never committed) → gh
→ glab → unknown(=public-strict). Public repos get STERNER per-finding
confirmation (no batch-acknowledge, no silent-proceed); MEDIUM is never
auto-promoted to HIGH.
- **Tool-attributed fences:** wrap Codex/Greptile/eval output in ` ```codex-review `
/ ` ```greptile ` fences so example credentials those tools quote WARN-degrade
instead of blocking. A live-format credential inside the fence still blocks.
- **Config keys:** `redact_repo_visibility` (public|private|unknown, local-only
override for repos gh/glab can't read), `redact_prepush_hook` (true|false).
There is intentionally NO key to disable HIGH blocking.
- **Audit:** the /spec semantic pass appends a content-free record (categories +
body sha256, no spec text) to `~/.gstack/security/semantic-reviews.jsonl` (0600).
## Commit style
**Always bisect commits.** Every commit should be a single logical change. When
@@ -708,8 +791,10 @@ When estimating or discussing effort, always show both human-team and CC+gstack
| Research / exploration | 1 day | 3 hours | ~3x |
Completeness is cheap. Don't recommend shortcuts when the complete implementation
is a "lake" (achievable) not an "ocean" (multi-quarter migration). See the
Completeness Principle in the skill preamble for the full philosophy.
is achievable. Boil the ocean — the complete thing is the goal; only genuinely
unrelated multi-quarter migrations are separate scope, never an excuse for a
shortcut. See the Completeness Principle in the skill preamble for the full
philosophy.
## Search before building
@@ -758,6 +843,34 @@ them. Report progress at each check (which tests passed, which are running, any
failures so far). The user wants to see the run complete, not a promise that
you'll check later.
## Running evals as an agent: always detach (SIGTERM-proof)
When **you (an agent/harness)** launch a long eval/benchmark run, run it through
`bin/gstack-detach` — NEVER as a plain backgrounded Bash task. A plain background
task lives in the harness's process group, so a SIGTERM ("polite quit") on a turn
boundary, a stopped Monitor, or an interruption kills the run mid-flight (observed:
`script "test:gate" was terminated by signal SIGTERM` ~40 min into a run). On macOS
the run can also die to idle-sleep. `gstack-detach` fixes both: a fresh session
(escapes the group SIGTERM) wrapped in `caffeinate -i` (blocks idle-sleep).
- Use the `eval:bg*` scripts (`eval:bg`, `eval:bg:all`, `eval:bg:gate`,
`eval:bg:periodic`) — they wrap the eval command in `gstack-detach` with the
machine-wide `gstack-evals` lock (concurrent worktrees serialize instead of
saturating the shared model API), a per-tier watchdog, and a **run-scoped** log
under `~/.gstack-dev/eval-runs/` (no shared-`/tmp` collision). Each prints its
log path. Or call `gstack-detach [--lock NAME] [--timeout SECS] [--label LBL] --
<cmd>` directly for any long agent job. Export `ANTHROPIC_API_KEY` first (never
pass keys in argv).
- Then **poll the printed logfile** with a death-aware watcher: break on the
guaranteed `### gstack-detach EXIT=<code> ###` sentinel (success AND failure are
both marked, so silence is never mistaken for success). The detached run survives
even if your watcher gets reaped, so re-checking the log always works.
- Why the lock: a shared dev box with several Conductor worktrees will rate-limit
the model API if two eval suites run at once (15-way concurrency each), which
mass-times-out E2E tests. The lock makes the second run WAIT, not collide.
- Humans running `bun run test:evals` foreground in their own terminal don't need
this — Ctrl-C is intended there. Detachment is for agent-launched runs only.
## E2E test fixtures: extract, don't copy
**NEVER copy a full SKILL.md file into an E2E test fixture.** SKILL.md files are
@@ -813,6 +926,12 @@ The active skill lives at `~/.claude/skills/gstack/`. After making changes:
2. Fetch and reset in the skill directory: `cd ~/.claude/skills/gstack && git fetch origin && git reset --hard origin/main`
3. Rebuild: `cd ~/.claude/skills/gstack && bun run build`
**If you use gbrain:** the `git reset --hard` in step 2 reverts the brain-aware
(`GBRAIN_CONTEXT_LOAD` / `GBRAIN_SAVE_RESULTS`) blocks that `gstack-config
gbrain-refresh` renders into the install (those generated blocks differ from
`main` by design). After deploying, re-run `gstack-config gbrain-refresh` to
restore them across all your projects' Claude sessions. It's idempotent.
Or copy the binaries directly:
- `cp browse/dist/browse ~/.claude/skills/gstack/browse/dist/browse`
- `cp design/dist/design ~/.claude/skills/gstack/design/dist/design`
@@ -835,6 +954,31 @@ Key routing rules:
- Save progress → invoke /context-save
- Resume context → invoke /context-restore
## Cross-session decision memory
Durable decisions and their rationale are captured in an append-only, event-sourced
store at `~/.gstack/projects/<slug>/decisions.jsonl` so neither you nor the user
re-litigates a settled call or loses the "why" across sessions. This is the reliable,
file-only path: it works with gbrain OFF. (gbrain semantic recall is an optional
enhancement layered on top, never a dependency.)
- **Resurface** active decisions before re-deciding: `bin/gstack-decision-search`
(`--recent N`, `--scope repo|branch|issue`, `--query KW`, `--all`, `--json`).
Add `--semantic` (with `--query`) to append related hits from gbrain memory when
it's up; it degrades silently to the reliable file results when gbrain is off.
Session start already surfaces scope-relevant active decisions via Context Recovery.
If a decision is listed, treat it as settled with its rationale; if you're about to
reverse it, say so explicitly.
- **Capture** a DURABLE decision when you or the user make one:
`bin/gstack-decision-log '{"decision":"...","rationale":"...","scope":"repo|branch|issue","source":"user|skill|agent","confidence":1-10}'`.
Reverse a prior call with `--supersede <id>`; expunge an accidental secret with
`--redact <id>`; rewrite the log to the active set with `--compact`. Non-interactive
(never prompts), injection-sanitized, and HIGH-secret-blocking on write.
- **Durable means:** architecture choice, scope cut, tool/vendor choice, or a reversal
of a prior call. NOT a turn-level edit, a phrasing tweak, or anything trivially
re-derivable. Capture is curated at the source — log durable decisions only, or the
store becomes noise.
## GBrain Search Guidance (configured by /sync-gbrain)
<!-- gstack-gbrain-search-guidance:start -->
@@ -870,4 +1014,10 @@ file globs. Run `/sync-gbrain` after meaningful code changes; for ongoing
auto-sync across all worktrees, run `gbrain autopilot --install` once per
machine — gbrain's daemon handles incremental refresh on a schedule.
Safety: don't run `/sync-gbrain` while `gbrain autopilot` is active — the
orchestrator refuses destructive source ops when it detects a running autopilot
to avoid racing it (#1734). Prefer registering user repos with `gbrain sources
add --path <dir>` (no `--url`): URL-managed sources can auto-reclone, and the
sync code walk for them requires an explicit `--allow-reclone` opt-in.
<!-- gstack-gbrain-search-guidance:end -->
+61 -2
View File
@@ -106,6 +106,22 @@ bun run build
bin/dev-teardown
```
### Brain-aware blocks in a dev workspace (gbrain installed)
If gbrain is installed and usable (`bin/gstack-gbrain-detect --is-ok` exits 0),
`bin/dev-setup` keeps your tracked `SKILL.md` files canonical and renders the
brain-aware variant (the `GBRAIN_CONTEXT_LOAD` / `GBRAIN_SAVE_RESULTS` blocks)
into `.claude/gstack-rendered/` (gitignored, per-workspace). It then repoints the
workspace's `SKILL.md` symlinks at that render, so your Claude sessions get the
full gbrain experience while `git status` stays clean. Under the hood, dev-setup
passes `GSTACK_SKIP_GBRAIN_REGEN=1` inline to the nested `./setup` (so it never
dirties tracked source) and runs `gen:skill-docs:user --out-dir .claude/gstack-rendered`,
which rewrites only the section-base paths to point at the render. `bin/dev-teardown`
removes the render. To make the blocks live across your *other* projects' Claude
sessions, run `gstack-config gbrain-refresh`, which renders them into the global
install (`~/.claude/skills/gstack`), guarded so it never touches a symlinked or
non-gstack directory.
## Testing & evals
### Setup
@@ -160,6 +176,18 @@ EVALS=1 bun test test/skill-e2e-*.test.ts
- Saves full NDJSON transcripts and failure JSON for debugging
- Tests live in `test/skill-e2e-*.test.ts` (split by category), runner logic in `test/helpers/session-runner.ts`
**Hermetic by default.** Every E2E runner (claude -p, the real-PTY plan-mode
runner, the Agent SDK runner, plus the codex and gemini runners) spawns its child
through `test/helpers/hermetic-env.ts`: an allowlist-scrubbed environment, a fresh
seeded `CLAUDE_CONFIG_DIR`, a temp `GSTACK_HOME`, and `--strict-mcp-config`. Your
operator `~/.claude` config, MCP servers (gbrain, Conductor), skills, `~/.gstack`
decision logs, and `CONDUCTOR_*` env never leak into the child, so local eval
signal matches CI instead of disagreeing for reasons unrelated to the code under
test. Set `EVALS_HERMETIC=0` to debug against your real operator state (this also
drops `--strict-mcp-config`). The wiring is pinned by `test/hermetic-wiring.test.ts`
(a free static tripwire) and two gate-tier isolation canaries in
`test/skill-e2e-hermetic-canary.test.ts`.
### E2E observability
When E2E tests run, they produce machine-readable artifacts in `~/.gstack-dev/`:
@@ -182,6 +210,25 @@ bun run eval:compare # compare two runs — shows per-test deltas + Take
bun run eval:summary # aggregate stats + per-test efficiency averages across runs
```
**Detached runs for agents and long suites.** When an agent (or you, for a run
you don't want to babysit) launches a long eval, use the `eval:bg*` scripts. They
wrap the eval command in `bin/gstack-detach`: a fresh session that escapes a
turn-boundary SIGTERM, a `caffeinate` wrapper that blocks idle-sleep, a machine-wide
`gstack-evals` lock so concurrent worktrees serialize instead of saturating the
model API, a run-scoped log under `~/.gstack-dev/eval-runs/`, a per-tier watchdog,
and a guaranteed `### gstack-detach EXIT=<code> ###` sentinel so a poller never
mistakes silence for success.
```bash
bun run eval:bg # detached test:evals (diff-based)
bun run eval:bg:all # detached test:evals:all
bun run eval:bg:gate # detached gate-tier suite
bun run eval:bg:periodic # detached periodic-tier suite
```
Each prints its log path. Humans running `bun run test:evals` foreground in their
own terminal don't need this — Ctrl-C is intended there.
**Eval comparison commentary:** `eval:compare` generates natural-language Takeaway sections interpreting what changed between runs — flagging regressions, noting improvements, calling out efficiency gains (fewer turns, faster, cheaper), and producing an overall summary. This is driven by `generateCommentary()` in `eval-store.ts`.
Artifacts are never cleaned up — they accumulate in `~/.gstack-dev/` for post-mortem debugging and trend analysis.
@@ -232,6 +279,14 @@ For template authoring best practices (natural language over bash-isms, dynamic
To add a browse command, add it to `browse/src/commands.ts`. To add a snapshot flag, add it to `SNAPSHOT_FLAGS` in `browse/src/snapshot.ts`. Then rebuild.
**Don't bundle puppeteer/Chromium in a skill.** `browse` is the one shared
Chromium per box, including offline local-render workloads. A skill that needs to
rasterize its own HTML/JSON (diagrams, cards, og-images) should route through
`browse` — `screenshot --selector` for visual output, `load-html` + `js --out` for
bytes a render function returns — instead of `npm i puppeteer` and downloading a
second Chromium that drifts out of version sync. One install to pin, one daemon to
manage.
## Jargon list (V1 writing style)
gstack's Writing Style section (injected into every tier-≥2 skill's preamble)
@@ -326,13 +381,17 @@ If you're using [Conductor](https://conductor.build) to run multiple Claude Code
| Hook | Script | What it does |
|------|--------|-------------|
| `setup` | `bin/dev-setup` | Copies `.env` from main worktree, installs deps, symlinks skills |
| `archive` | `bin/dev-teardown` | Removes skill symlinks, cleans up `.claude/` directory |
| `setup` | `bin/dev-setup` | Copies `.env` from main worktree, installs deps, symlinks skills, runs `./setup` non-interactively, and (if gbrain is installed) renders brain-aware blocks into `.claude/gstack-rendered/` without dirtying tracked source |
| `archive` | `bin/dev-teardown` | Removes skill symlinks, the `.claude/gstack-rendered/` render, and cleans up `.claude/` directory |
When Conductor creates a new workspace, `bin/dev-setup` runs automatically. It detects the main worktree (via `git worktree list`), copies your `.env` so API keys carry over, and sets up dev mode — no manual steps needed.
`bin/dev-setup` runs `./setup` fully non-interactively (it passes `--plan-tune-hooks=prompt` and closes stdin), so a forwarded Conductor TTY can never hang on a hidden setup prompt. It also never installs the plan-tune Claude Code hooks, which means a throwaway workspace can't rewrite your global `~/.claude/settings.json` to point at an ephemeral worktree path. To install the plan-tune hooks deliberately, run `./setup --plan-tune-hooks` outside dev-setup (or `gstack-config set plan_tune_hooks yes`).
**First-time setup:** Put your `ANTHROPIC_API_KEY` in `.env` in the main repo (see `.env.example`). Every Conductor workspace inherits it automatically.
**`GSTACK_*` env prefix (Conductor-injected keys).** Conductor explicitly strips `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` from every workspace's process env. The `.env` copy path doesn't restore them either — the strip happens after env inheritance. Users who want paid evals, `/sync-gbrain` embeddings, or `claude-agent-sdk` calls to work in a Conductor workspace must set `GSTACK_ANTHROPIC_API_KEY` and `GSTACK_OPENAI_API_KEY` in Conductor's workspace env config; Conductor passes those through untouched. On the gstack side, TS entry points import `lib/conductor-env-shim.ts` as a side effect, which promotes `GSTACK_FOO_API_KEY` to `FOO_API_KEY` when the canonical name is empty. If you add a new TS entry point that hits a paid API, add `import "../lib/conductor-env-shim";` to the top of the file. Today the shim is imported from `bin/gstack-gbrain-sync.ts`, `bin/gstack-model-benchmark`, `scripts/preflight-agent-sdk.ts`, and `test/helpers/e2e-helpers.ts`.
## Things to know
- **SKILL.md files are generated.** Edit the `.tmpl` template, not the `.md`. Run `bun run gen:skill-docs` to regenerate.
+13 -8
View File
@@ -31,16 +31,21 @@ The last 10% of completeness that teams used to skip? It costs seconds now.
---
## 1. Boil the Lake
## 1. Boil the Ocean
AI-assisted coding makes the marginal cost of completeness near-zero. When
the complete implementation costs minutes more than the shortcut — do the
"Don't boil the ocean" was the right advice when engineering time was the
bottleneck. That era is over. AI-assisted coding makes the marginal cost of
completeness near-zero, so the old caution has quietly turned into an excuse.
When the complete implementation costs minutes more than the shortcut — do the
complete thing. Every time.
**Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module,
full feature implementation, all edge cases, complete error paths. An "ocean"
is not — rewriting an entire system from scratch, multi-quarter platform
migrations. Boil lakes. Flag oceans as out of scope.
**Ocean, lakes first:** The ocean is the destination — 100% test coverage for a
module, full feature implementation, all edge cases, complete error paths. You
get there one lake at a time: each lake is a boilable unit, not the ceiling.
"That's boiling the ocean" is no longer a reason to ship a shortcut — boiling
the ocean is the goal. The only thing still out of scope is genuinely unrelated
work: a multi-quarter platform migration that has nothing to do with the task at
hand. Flag that as separate scope. Boil everything else.
**Completeness is cheap.** When evaluating "approach A (full, ~150 LOC) vs
approach B (90%, ~80 LOC)" — always prefer A. The 70-line delta costs
@@ -144,7 +149,7 @@ think it's better, state what context you might be missing, and ask. Never act.
## How They Work Together
Boil the Lake says: **do the complete thing.**
Boil the Ocean says: **do the complete thing.**
Search Before Building says: **know what exists before you decide what to build.**
Together: search first, then build the complete version of the right thing.
+14 -3
View File
@@ -204,7 +204,10 @@ Each skill feeds into the next. `/office-hours` writes a design doc that `/plan-
| `/browse` | **QA Engineer** | Give the agent eyes. Real Chromium browser, real clicks, real screenshots. ~100ms per command. `/open-gstack-browser` launches GStack Browser with sidebar, anti-bot stealth, and auto model routing. |
| `/setup-browser-cookies` | **Session Manager** | Import cookies from your real browser (Chrome, Arc, Brave, Edge) into the headless session. Test authenticated pages. |
| `/autoplan` | **Review Pipeline** | One command, fully reviewed plan. Runs CEO → design → eng review automatically with encoded decision principles. Surfaces only taste decisions for your approval. |
| `/spec` | **Spec Author** | Turn vague intent into a precise, executable spec in five phases (why, scope, technical with mandatory code-reading, draft, file). Codex quality gate before file (blocks below 7/10), fail-closed secret redaction, dedupe against existing issues, archive to `$GSTACK_STATE_ROOT/projects/$SLUG/specs/` for team-corpus recall. `--execute` spawns `claude -p` in a fresh worktree; `/ship` auto-closes the source issue on merge. Plan-mode aware. |
| `/learn` | **Memory** | Manage what gstack learned across sessions. Review, search, prune, and export project-specific patterns, pitfalls, and preferences. Learnings compound across sessions so gstack gets smarter on your codebase over time. |
| `/make-pdf` | **Publisher** | Markdown in, publication-quality document out. Mermaid and excalidraw fences render as vector diagrams, fully offline. Images scale to the page and never truncate; wide diagrams get their own landscape page. `--to html` emits one self-contained file, `--to docx` a Word doc. |
| `/diagram` | **Diagram Maker** | English in, editable diagram out. Emits a triplet: mermaid source, `.excalidraw` you can open and edit on excalidraw.com (hand-drawn style), and rendered SVG/PNG. Zero network. Embed the source in markdown and `/make-pdf` renders it. |
### Which review should I use?
@@ -229,6 +232,8 @@ Each skill feeds into the next. `/office-hours` writes a design doc that `/plan-
| `/setup-gbrain` | **GBrain Onboarding** — from zero to running gbrain in under 5 minutes. PGLite local, Supabase existing URL, or auto-provision a new Supabase project via Management API. MCP registration for Claude Code + per-repo trust triad (read-write/read-only/deny). [Full guide](USING_GBRAIN_WITH_GSTACK.md). |
| `/sync-gbrain` | **Keep Brain Current** — re-index this repo's code into gbrain via `gbrain sources add` + `gbrain sync --strategy code`, refresh the `## GBrain Search Guidance` block in CLAUDE.md, and auto-remove guidance when the capability check fails. `--incremental` (default), `--full`, `--dry-run`. Idempotent; safe to re-run. |
| `/gstack-upgrade` | **Self-Updater** — upgrade gstack to latest. Detects global vs vendored install, syncs both, shows what changed. |
| `/ios-qa` | **iOS Live-Device QA (v1.43.0.0+)** — drive a real iPhone over USB CoreDevice via an embedded `StateServer` in the app. Read Swift source, codegen typed `@Observable` accessors, run the agent loop. Optional `--tailnet` flag exposes the device to OpenClaw or any HTTP-capable agent on your Tailscale tailnet so remote agents can run iOS QA without ever touching the hardware. Capability-tier allowlist (observe/interact/mutate/restore), per-device session lock, audit log. |
| `/ios-fix`, `/ios-design-review`, `/ios-clean`, `/ios-sync` | iOS bug-fix loop, designer's-eye HIG audit, debug-bridge cleanup, and accessor resync. See `docs/skills.md`. End-to-end walkthrough: [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md). |
### New binaries (v0.19)
@@ -238,6 +243,8 @@ Beyond the slash-command skills, gstack ships standalone CLIs for workflows that
|---------|-------------|
| `gstack-model-benchmark` | **Cross-model benchmark** — run the same prompt through Claude, GPT (via Codex CLI), and Gemini; compare latency, tokens, cost, and (optionally) LLM-judge quality score. Auth detected per provider, unavailable providers skip cleanly. Output as table, JSON, or markdown. `--dry-run` validates flags + auth without spending API calls. |
| `gstack-taste-update` | **Design taste learning** — writes approvals and rejections from `/design-shotgun` into a persistent per-project taste profile. Decays 5%/week. Feeds back into future variant generation so the system learns what you actually pick. |
| `gstack-ios-qa-daemon` | **iOS QA daemon** — Mac-side broker between an agent and a connected iPhone over USB CoreDevice. Loopback by default; `--tailnet` opens a Tailscale-facing listener with identity-gated capability tiers. Single-instance via flock on `~/.gstack/ios-qa-daemon.pid`. See [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md). |
| `gstack-ios-qa-mint` | **iOS allowlist manager** — owner-grant CLI for the tailnet allowlist. `grant`/`revoke`/`list` against `~/.gstack/ios-qa-allowlist.json` (mode 0600). Remote agents never auto-allowlist; this is the explicit-intent path. |
### Continuous checkpoint mode (opt-in, local by default)
@@ -388,13 +395,14 @@ I open sourced how I build software. You can fork it and make it your own.
/setup-gbrain
```
Three paths, pick one:
Four paths, pick one:
- **Supabase, existing URL** — your cloud agent already provisioned a brain; paste the Session Pooler URL, now this laptop uses the same data.
- **Supabase, auto-provision** — paste a Supabase Personal Access Token; the skill creates a new project, polls to healthy, fetches the pooler URL, hands it to `gbrain init`. ~90 seconds end-to-end.
- **PGLite local** — zero accounts, zero network, ~30 seconds. Isolated brain on this Mac only. Great for try-first; migrate to Supabase later with `/setup-gbrain --switch`.
- **Remote gbrain MCP** — your brain runs on another machine (Tailscale, ngrok, internal LAN) or a teammate's server; paste an MCP URL and bearer token. Optionally pair with a local PGLite for symbol-aware code search in split-engine mode. Best for cross-machine memory without standing up a local DB.
After init, the skill offers to register gbrain as an MCP server for Claude Code (`claude mcp add gbrain -- gbrain serve`) so `gbrain search`, `gbrain put_page`, etc. show up as first-class typed tools — not bash shell-outs.
After init, the skill offers to register gbrain as an MCP server for Claude Code (`claude mcp add gbrain -- gbrain serve`) so `gbrain search`, `gbrain put`, etc. show up as first-class typed tools — not bash shell-outs.
**Keeping the brain current.** Run `/sync-gbrain` from any repo to re-index its code into gbrain (incremental by default, `--full` for a full reindex, `--dry-run` to preview). The skill registers the cwd as a federated source via `gbrain sources add`, runs `gbrain sync --strategy code`, and writes a `## GBrain Search Guidance` block to your project's CLAUDE.md so the agent prefers `gbrain search`/`code-def`/`code-refs` over Grep. The block is removed automatically if the capability check fails — no stale guidance pointing at tools that aren't installed.
@@ -412,6 +420,8 @@ The skill asks once per repo. The decision is sticky across worktrees and branch
gstack-brain-init
```
**Running gstack in Conductor?** Conductor explicitly strips `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` from every workspace's process env, so paid evals and gbrain embeddings won't work out of the box. Set `GSTACK_ANTHROPIC_API_KEY` and `GSTACK_OPENAI_API_KEY` in Conductor's workspace env config instead — gstack's TS entry points promote them to canonical names at runtime. Full details and the contributor checklist for adding the import to new entry points: [Conductor + GSTACK_* env vars](USING_GBRAIN_WITH_GSTACK.md#conductor--gstack_-env-vars).
**Full monty — every scenario, every flag, every bin helper, every troubleshooting step:** [USING_GBRAIN_WITH_GSTACK.md](USING_GBRAIN_WITH_GSTACK.md)
Other references: [docs/gbrain-sync.md](docs/gbrain-sync.md) (sync-specific guide) • [docs/gbrain-sync-errors.md](docs/gbrain-sync-errors.md) (error index)
@@ -421,7 +431,8 @@ Other references: [docs/gbrain-sync.md](docs/gbrain-sync.md) (sync-specific guid
| Doc | What it covers |
|-----|---------------|
| [Skill Deep Dives](docs/skills.md) | Philosophy, examples, and workflow for every skill (includes Greptile integration) |
| [Builder Ethos](ETHOS.md) | Builder philosophy: Boil the Lake, Search Before Building, three layers of knowledge |
| [Diagrams & Document Formats](docs/howto-diagrams-and-formats.md) | Mermaid/excalidraw fences in PDFs, image sizing and safety defaults, `--to html\|docx`, `/diagram` triplets |
| [Builder Ethos](ETHOS.md) | Builder philosophy: Boil the Ocean, Search Before Building, three layers of knowledge |
| [Using GBrain with GStack](USING_GBRAIN_WITH_GSTACK.md) | Every path, flag, bin helper, and troubleshooting step for `/setup-gbrain` |
| [GBrain Sync](docs/gbrain-sync.md) | Cross-machine memory setup, privacy modes, troubleshooting |
| [Architecture](ARCHITECTURE.md) | Design decisions and system internals |
+42 -14
View File
@@ -2,11 +2,7 @@
name: gstack
preamble-tier: 1
version: 1.1.0
description: |
Fast headless browser for QA testing and site dogfooding. Navigate pages, interact with
elements, verify state, diff before/after, take annotated screenshots, test responsive
layouts, forms, uploads, dialogs, and capture bug evidence. Use when asked to open or
test a site, verify a deployment, dogfood a user flow, or file a bug with screenshots. (gstack)
description: Fast headless browser for QA testing and site dogfooding. (gstack)
allowed-tools:
- Bash
- Read
@@ -21,6 +17,14 @@ triggers:
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
## When to invoke this skill
Navigate pages, interact with
elements, verify state, diff before/after, take annotated screenshots, test responsive
layouts, forms, uploads, dialogs, and capture bug evidence. Use when asked to open or
test a site, verify a deployment, dogfood a user flow, or file a bug with screenshots.
## Preamble (run first)
```bash
@@ -41,6 +45,16 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
# Conductor host: AskUserQuestion is unreliable here (native disabled, MCP
# variant flaky), so skills render decisions as prose instead of calling the
# tool. Gated on !headless so an eval/CI run INSIDE Conductor (GSTACK_HEADLESS)
# still BLOCKs rather than rendering prose to nobody.
if [ "$_SESSION_KIND" != "headless" ] && { [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ]; }; then
echo "CONDUCTOR_SESSION: true"
fi
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -56,7 +70,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
echo "QUESTION_TUNING: $_QUESTION_TUNING"
mkdir -p ~/.gstack/analytics
if [ "$_TEL" != "off" ]; then
echo '{"skill":"gstack","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
echo '{"skill":"gstack","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
fi
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
if [ -f "$_PF" ]; then
@@ -98,6 +112,19 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
# Claude Code exposes plan mode via system reminders; we detect best-effort
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
# fall back to "inactive". Codex hosts and Claude execution mode both end up
# inactive, which is the safe default (defaults to file+execute pipeline).
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
export GSTACK_PLAN_MODE="active"
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
export GSTACK_PLAN_MODE="active"
else
export GSTACK_PLAN_MODE="inactive"
fi
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
```
@@ -107,7 +134,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -142,7 +169,7 @@ touch ~/.gstack/.writing-style-prompted
Skip if `WRITING_STYLE_PENDING` is `no`.
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Lake** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Ocean** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
```bash
open https://garryslist.org/posts/boil-the-ocean
@@ -153,7 +180,7 @@ Only run `open` if yes. Always run `touch`.
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
Options:
- A) Help gstack get better! (recommended)
@@ -229,6 +256,7 @@ Key routing rules:
- Ship/deploy/PR → invoke /ship or /land-and-deploy
- Save progress → invoke /context-save
- Resume context → invoke /context-restore
- Author a backlog-ready spec/issue → invoke /spec
```
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
@@ -473,9 +501,7 @@ Replace `SKILL_NAME`, `OUTCOME`, and `USED_BROWSE` before running.
## Plan Status Footer
In plan mode before ExitPlanMode: if the plan file lacks `## GSTACK REVIEW REPORT`, run `~/.claude/skills/gstack/bin/gstack-review-read` and append the standard runs/status/findings table. With `NO_REVIEWS` or empty, append a 5-row placeholder with verdict "NO REVIEWS YET — run `/autoplan`". If a richer report exists, skip.
PLAN MODE EXCEPTION — always allowed (it's the plan file).
Skills that run plan reviews (`/plan-*-review`, `/codex review`) include the EXIT PLAN MODE GATE blocking checklist at the end of the skill, which verifies the plan file ends with `## GSTACK REVIEW REPORT` before ExitPlanMode is called. Skills that don't run plan reviews (operational skills like `/ship`, `/qa`, `/review`) typically don't operate in plan mode and have no review report to verify; this footer is a no-op for them. Writing the plan file is the one edit allowed in plan mode.
If `PROACTIVE` is `false`: do NOT proactively invoke or suggest other gstack skills during
this session. Only run skills the user explicitly invokes. This preference persists across
@@ -488,6 +514,7 @@ quality gates that produce better results than answering inline.
**Routing rules — when you see these patterns, INVOKE the skill via the Skill tool:**
- User describes a new idea, asks "is this worth building", brainstorms, pitches a concept → invoke `/office-hours`
- User asks to spec something out, file an issue, write up a ticket, "turn this into a GitHub issue", "backlog item" → invoke `/spec`
- User asks about strategy, scope, ambition, "think bigger", "what should we build" → invoke `/plan-ceo-review`
- User asks to review architecture, lock in the plan, "does this design make sense" → invoke `/plan-eng-review`
- User asks about design system, brand, visual identity, "how should this look" → invoke `/design-consultation`
@@ -897,10 +924,10 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
| `cookies` | All cookies as JSON |
| `css <sel> <prop>` | Computed CSS value |
| `dialog [--clear]` | Dialog messages |
| `eval <file>` | Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. |
| `eval <file> [--out <file>] [--raw]` | Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. With --out <file>, the result is written to disk (base64 data URL decoded to bytes unless --raw); --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel). |
| `inspect [selector] [--all] [--history]` | Deep CSS inspection via CDP — full rule cascade, box model, computed styles |
| `is <prop> <sel|@ref>` | State check on element. Valid <prop> values: visible, hidden, enabled, disabled, checked, editable, focused (case-sensitive). <sel> accepts a CSS selector OR an @ref token from a prior snapshot (e.g. @e3, @c1) — refs are interchangeable with selectors anywhere a selector is expected. |
| `js <expr>` | Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. |
| `js <expr> [--out <file>] [--raw]` | Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. With --out <file>, the result is written to disk instead of returned (a base64 data URL is decoded to raw bytes unless --raw is given) — ideal for rasterizing local renders to PNG without serializing megabytes back through the CLI. --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel). |
| `network [--clear]` | Network requests |
| `perf` | Page load timings |
| `storage | storage set <key> <value>` | Read both localStorage and sessionStorage as JSON. With "set <key> <value>", write to localStorage only (sessionStorage is read-only via this command — set it with `js sessionStorage.setItem(...)`). |
@@ -946,6 +973,7 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
| `disconnect` | Disconnect headed browser, return to headless mode |
| `focus [@ref]` | Bring headed browser window to foreground (macOS) |
| `handoff [message]` | Open visible Chrome at current page for user takeover |
| `memory [--json]` | Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. JSON output with --json. |
| `restart` | Restart server |
| `resume` | Re-snapshot after user takeover, return control to AI |
| `state save|load <name>` | Save/load browser state (cookies + URLs) |
+1
View File
@@ -32,6 +32,7 @@ quality gates that produce better results than answering inline.
**Routing rules — when you see these patterns, INVOKE the skill via the Skill tool:**
- User describes a new idea, asks "is this worth building", brainstorms, pitches a concept → invoke `/office-hours`
- User asks to spec something out, file an issue, write up a ticket, "turn this into a GitHub issue", "backlog item" → invoke `/spec`
- User asks about strategy, scope, ambition, "think bigger", "what should we build" → invoke `/plan-ceo-review`
- User asks to review architecture, lock in the plan, "does this design make sense" → invoke `/plan-eng-review`
- User asks about design system, brand, visual identity, "how should this look" → invoke `/design-consultation`
+666 -1
View File
@@ -1,5 +1,359 @@
# TODOS
## NEXT PRIORITY
### P1: #1882 — portable skill-install prefix (non-`gstack` install dirs break silently)
**What:** Every generated SKILL.md hardcodes the literal `~/.claude/skills/gstack/...`
for its `bin/`/asset calls (the per-invocation telemetry/config preamble plus ~9
resolvers). `setup` wires the top-level skill symlinks for any directory name, so
installing at `~/.claude/skills/<other>` leaves every internal `bin` reference
pointing at a non-existent `~/.claude/skills/gstack/` path — failing **silently, at
skill-invocation time**. Make the emitted references portable: resolve the install
root at runtime (the preamble already defines `GSTACK_ROOT`/`GSTACK_BIN` in
`scripts/resolvers/preamble/generate-preamble-bash.ts` but the literals don't use
them) and emit `$GSTACK_BIN`-relative paths instead of the hardcoded prefix.
**Why:** Filed as #1882. Split out of the June 2026 fix wave (decision A) once
implementation showed it is a host-config/design change, not a fix-wave patch. The
urgent half — the guard/freeze/careful frontmatter hooks broken on CC 2.1.162 — was
already fixed in that wave (#1871) with a literal `$HOME`-anchored path, because
frontmatter hooks run before any runtime variable exists and cannot use `$GSTACK_BIN`.
So #1882 is now purely the body-preamble portability work.
**Pros:** Unblocks installs at any directory name; removes a whole class of silent
invocation-time failures.
**Cons:** Touches the most load-bearing bash in the repo (every skill's preamble);
a silent mistake breaks all 52 skills. High blast radius — needs its own focused PR.
**Context / where to start:**
- Rewire `ctx.paths.binDir` (and browse/design dir paths) + the ~9 resolvers that
emit the literal (`testing.ts`, `review.ts`, `design.ts`, `browse.ts`,
`redact-doc.ts`, `tasks-section.ts`, `preamble/generate-*.ts`) to use the
preamble-defined `$GSTACK_ROOT`/`$GSTACK_BIN`.
- Ensure `GSTACK_ROOT`/`GSTACK_BIN` are defined before first use in EVERY skill's
preamble (verify the telemetry preamble's first bin call is after the definition).
- **Test conflict (verified):** `test/gen-skill-docs.test.ts:1942` and the sibling
ship assertion currently *assert* generated Claude output `.toContain('~/.claude/skills/gstack')`
as a guardrail that Codex-host paths don't leak. These must be rewritten to match
the new portable scheme.
- Regenerate all 52 SKILL.md (`bun run scripts/gen-skill-docs.ts --host all`); never
hand-edit generated files. Bisect: resolver/host-config change commit, then the
52-file regen commit.
- Smoke-test a skill invocation from a non-`gstack` install dir to prove the fix.
- Sibling of #349 (the `$CLAUDE_CONFIG_DIR` / `~/.claude` path issue).
## Test infrastructure
### ✅ DONE (v1.53.1.0): Rebaseline parity-suite (v1.44.1 → v1.53.0.0)
**What:** `test/parity-suite.test.ts` checked every skill's SKILL.md size against
the frozen `test/fixtures/parity-baseline-v1.44.1.json`. Five planning skills had
crept past the 1.05x ceiling: `plan-ceo-review` (1.052), `plan-eng-review` (1.062),
`plan-design-review` (1.068), `investigate` (1.053), `office-hours` (1.065) — growth
from the brain-aware-planning releases (v1.49v1.52) plus the v1.53 redaction guard.
**Resolved:** Captured a fresh baseline at HEAD via
`bun run scripts/capture-baseline.ts --tag v1.53.0.0` and re-pointed the test at
`test/fixtures/parity-baseline-v1.53.0.0.json`. The per-skill 1.05 ratio is kept, so
future bloat is still caught — only the stale anchor moved. Mirrors the earlier
`skill-size-budget` rebase (v1.44.1 → v1.47.0.0). Historical v1.44.1 / v1.46.0.0 /
v1.47.0.0 baselines retained in `test/fixtures/` for the v1→v2 audit trail. The
captured skill bytes match `origin/main` exactly (the rebasing branch left every
SKILL.md untouched). `bun test` is green again.
## Token-reduction follow-ups (Phase B, filed via /plan-eng-review on the plan-ceo-review carve)
### P3: Carve the always-loaded `{{PREAMBLE}}` reference blocks into an on-demand doc
**What:** The per-skill section carves (`/ship` v1.54, `/plan-ceo-review` v1.56) yield
real but bounded wins (-42% to -59% on the carved skill) because the shared
`{{PREAMBLE}}` (~40-50KB on every tier-3/4 skill) is the dominant always-loaded cost
and stays inline. Move the rarely-needed preamble REFERENCE blocks (the AskUserQuestion
split-rules and the CJK / lone-surrogate escaping reference) into an on-demand
section-style doc the agent reads only when it hits those edge cases, leaving the hot
path (voice, completeness principle, recommendation format) inline.
**Why:** Highest-ROI remaining token target. One preamble carve helps EVERY tier-≥2
skill at once, not one skill per PR. The eng-review on the plan-ceo carve flagged that
per-skill carves stay modest precisely because the preamble dominates the always-loaded
surface.
**Pros:** A single change reduces always-loaded cost across the whole skill pack.
**Cons:** The preamble is load-bearing and shared; a botched carve regresses every skill.
Needs the same union-parity + per-push freshness guards the section carves use, applied
corpus-wide.
**Context:** Builds on the v2 section pipeline (`scripts/resolvers/sections.ts`,
`{{SECTION:id}}` / `{{SECTION_INDEX}}`). The preamble source is
`scripts/resolvers/preamble.ts`. Measure which sub-blocks are cold (escaping reference,
split-rules) vs hot (voice, recommendation format) before cutting. Validate on one skill,
then roll corpus-wide.
**Effort estimate:** L (human team) → M (CC+gstack)
**Priority:** P3
**Depends on / blocked by:** The section pipeline (shipped v1.54). No hard blocker.
## gbrowser memory follow-ups (filed via /plan-eng-review + /codex on the v1.49 leak-fix PR)
These four items came out of the memory-leak investigation that shipped
the `$B memory` diagnostic + the four leak fixes. They were
deliberately deferred from that PR (already 14 commits / ~12 files);
each stands alone and any one could ship independently.
### P2: MV3 extension service worker memory profile
**What:** The `/memory` endpoint snapshot enumerates pages but does
not enumerate the gstack baked-in extension's service-worker target.
A long-running MV3 service worker can leak through retained DOM
snapshots, message ports that never close, alarms that re-arm, and
caches that grow without bound. The diagnostic should call
`Target.getTargets` with a filter for `service_worker` and include
each one in `tabs[]` (or a sibling `serviceWorkers[]` array) with the
same `Performance.getMetrics` data.
**Why:** Codex's outside-voice review on the eng-review surfaced this
class of leak (the extension is part of the gbrowser process tree but
invisible to today's snapshot). Until we surface it, a SW leak shows
up only in the parent process RSS with no per-target attribution.
**Pros:** Closes the per-target attribution gap for the
single-most-likely future leak source (our own extension).
**Cons:** Extension SW lifecycle is asymmetric vs page lifecycle;
auto-attach + filter is one more piece of CDP plumbing.
**Context:** Codex finding #4 on the eng-review outside voice. Not
in scope of the v1.49 PR; deliberately deferred to keep the PR to
the four highest-confidence leak fixes.
**Priority:** P2. **Effort:** M.
---
### P2: Native + GPU memory breakdown in `$B memory`
**What:** `$B memory` shows Bun RSS + per-tab JS heap + Chromium
process tree (PIDs + types + CPU time) but the per-process RSS is
absent — `SystemInfo.getProcessInfo` doesn't expose RSS and the eng
review (D2 USE_CDP) explicitly chose CDP over shelling to `ps`. The
honest next step is to surface what CDP DOES give for the other
memory categories: `Memory.getDOMCounters` per target (node + listener
counts), `SystemInfo.getInfo` for GPU memory, `Memory.getAllTimeSamplingProfile`
for a sampled native estimate.
**Why:** Codex's outside-voice review flagged that
`Performance.getMetrics` misses native memory, GPU memory, video
buffers, Skia, network cache, extension process RSS, and
browser-process RSS — all the categories where a 160 GB leak would
actually live. A diagnostic that misses the categories where the
leak class lives undersells itself.
**Pros:** Per-process category breakdown closes the gap between
"Activity Monitor says 160 GB" and what the diagnostic shows.
**Cons:** Each CDP method has its own quirks; this is a real
implementation pass, not a one-line addition.
**Context:** Codex finding #5 on the eng-review outside voice. Not
in scope of the v1.49 PR; deliberately deferred.
**Priority:** P2. **Effort:** M.
---
### P3: Single-context CDP listener for Network.loadingFinished
**What:** `wirePageEvents` attaches a `page.on('requestfinished')`
listener PER PAGE. The D10 fix removed the body-materialization leak
inside that listener but kept the per-page listener architecture
(7 listeners attached per tab — close, framenavigated, dialog,
console, request, response, requestfinished). The stretch goal from
D10 was to replace the per-page `requestfinished` listener with a
single context-level CDP listener via
`Target.setAutoAttach({autoAttach: true, waitForDebuggerOnStart: false,
flatten: true})` and a browser-wide `Network.loadingFinished` event
handler.
**Why:** Going from N to 1 listener for the request-size capture is
structurally the right architecture and removes one piece of per-tab
memory pressure. The body-materialization fix already addressed the
acute leak; this is the architectural cleanup that prevents similar
leaks in the same class.
**Pros:** One listener per browser instead of one per tab.
**Cons:** `Target.setAutoAttach` plumbing is more code than the
straight per-page listener; the marginal memory win is small on top
of the body-fetch fix that already landed.
**Context:** D10 stretch goal on the eng-review. The minimal-risk
fix shipped in v1.49 (replaces `await res.body()` with
`await req.sizes()`, preserving the per-page listener); this is the
architectural follow-up.
**Priority:** P3. **Effort:** M-L.
---
### P3: Real-Chromium peak-RSS reproducer (periodic tier)
**What:** The gate-tier reproducer
(`browse/test/memory-leak-reproducer.test.ts`) pins the invariant
that `res.body()` is never called during a burst of
`requestfinished` events. It uses a fake page; it does NOT spin up a
real Chromium nor measure peak Bun RSS during a real concurrent fetch
burst. A periodic-tier follow-up should: spin up a real headless
Chromium, navigate to a fixture page that concurrently fetches 500
mixed responses (small JSON, 100 KB images, 10 MB chunked,
gzip-compressed 2 MB), sample `process.memoryUsage().heapUsed` every
100 ms during the burst, assert `peak_heap < 200 MB above baseline`
AND `post-gc_heap < 30 MB above baseline`. Also include a single-tab
WebGL canvas variant that grows to >4 GB and asserts the per-tab RSS
toast fires.
**Why:** Codex flagged that the leak's real failure mode is transient
amplification under concurrent burst, not retained leak — a steady-state
heap test misses it. The fake-page gate-tier test catches the
listener-architecture regression; the periodic real-browser test
catches the actual peak-RSS class.
**Pros:** Closes the "did we actually demonstrate the OOM is fixed"
question with hard numbers. Feeds the ANGLE_B_NUMBERS CHANGELOG
release-summary table.
**Cons:** Periodic tier costs minutes of CI time and money per run;
real-browser memory tests are inherently flaky.
**Context:** Codex outside-voice finding on the eng-review; D7
ANGLE_B_NUMBERS CHANGELOG framing needs this reproducer's numbers
before /ship time.
**Priority:** P3. **Effort:** M.
---
## design daemon: follow-ups (filed v1.45.0.0 via /ship review army)
### ✅ DONE (v1.45.0.0): Tighten daemon test coverage
**Resolved in commit `6b037c55` (same PR):** All 5 test gaps filled before
landing. Per-file totals after: serve 16, daemon 34, daemon-discovery 23,
feedback-roundtrip-daemon 4 = 77 (+10 from initial ship). Specifically:
- Idle-shutdown actually fires (spawn-based, daemon process observed exiting,
state file removed).
- Bare GET polling doesn't reset idle (hammers `/api/progress` in background,
daemon still idles out).
- Idle-with-active-boards extends, then force-shuts after MAX_EXTENSIONS
(with `DESIGN_DAEMON_EXTENSION_MS=1500` + `MAX_EXTENSIONS=2`).
- Concurrent `ensureDaemon()` race converges on one daemon (lock wins).
- Stale-lock reclaim (dead PID succeeds, alive unrelated PID refuses).
- Malformed-JSON + non-object + array-body + missing-html negatives for
`POST /api/boards` and `POST /boards/<id>/api/reload`.
### P3: Minor maintainability nits from /ship review
- `design/src/cli.ts` and `design/src/serve.ts` both have a small `openBrowser`
helper with identical darwin/linux/else branches. Extract a shared
`design/src/open-browser.ts`.
- `design/src/daemon-client.ts:320` (`AbortSignal.timeout(2000)`) and `:357`
(`delay(50)`) use bare numeric literals while sibling timeouts are named
constants. Promote to `SHUTDOWN_POST_TIMEOUT_MS` and `ALIVE_POLL_INTERVAL_MS`.
- `design/src/daemon-state.ts:21` `serverPath` field is written
(`daemon.ts:541`) but never read by production code. Either remove or
document the forensic intent.
### P3: Daemon scope deferred from v1.45.0.0 plan
Originally listed in the plan's "TODOs surfaced for later" section:
- Per-daemon scoped auth tokens (only relevant once a tunnel/share use case appears).
- Optional persistent board history on disk in
`~/.gstack/projects/$SLUG/designs/history/` so submitted boards survive
daemon restarts.
- Windows spawn branch lifted from browse (V1 daemon is macOS + Linux;
Windows users fall back to legacy `--no-daemon` per-process server).
- `$D board list` / `$D board stop <id>` per-board ops CLI (V1 has only
`$D daemon status` / `stop`).
- Cross-worktree daemon attach (conductor sibling worktrees of the same
repo currently each spawn their own daemon — matches browse; revisit
if it causes friction).
---
## browse server: terminal-agent teardown follow-ups (filed v1.41 via /plan-eng-review)
### ✅ DONE (v1.44.0.0): Identity-based terminal-agent kill (replace pkill regex with PID)
**Resolved:** Bundled into the v1.44.0.0 long-lived-sidebar PR as Commit 0.
`browse/src/terminal-agent-control.ts` is the new home for `readAgentRecord`,
`writeAgentRecord`, `clearAgentRecord`, and `killAgentByRecord`. The agent
writes `<stateDir>/terminal-agent-pid` (JSON `{pid, gen, startedAt}`) at boot
and clears it on SIGTERM/SIGINT. `cli.ts` and `server.ts` both route through
`killAgentByRecord` instead of `pkill -f terminal-agent\.ts`. The new
`browse/test/terminal-agent-pid-identity.test.ts` is the static-grep tripwire
that fails CI if `pkill ... terminal-agent` or `spawnSync('pkill', ...)`
reappears in any source file.
---
### P3: shutdown() reads module-level `config`, not `cfg.config` (composition gap)
**What:** `browse/src/server.ts:shutdown()` reads `path.dirname(config.stateFile)`
where `config` is the module-level value resolved at import time, not the
`cfg.config` passed into `buildFetchHandler`. Same gap applies to
`cleanSingletonLocks(resolveChromiumProfile())` at server.ts:1298 — should
read `cfg.chromiumProfile`.
**Why:** Embedders today happen to share state-dir resolution with the CLI
(both go through `resolveConfig()` against the same env), so this doesn't
bite. But if an embedder ever passes a divergent `cfg.config` (e.g., a test
harness pointing at a temp dir), shutdown will operate on the wrong paths.
The `ownsTerminalAgent` flag exposes the problem without fixing it.
**Pros:** Closes the embedder-composition story properly. Pairs with
`cfg.chromiumProfile` to give a single coherent "this factory teardown
respects cfg" contract.
**Cons:** Pre-existing — not a regression. Two call sites today (1285 for
terminal files, 1298 for chromium locks). Threading `cfg.config` and
`cfg.chromiumProfile` into the right closures is straightforward but
broader than the v1.41 fix.
**Context:** Flagged by both Codex and Claude subagent in the /plan-eng-review
dual voices. Documented as out-of-scope in the v1.41 plan; same shape as the
`chromiumProfile` PR-body note to the gbrowser team.
**Depends on:** None.
---
### P3: Ownership-object refactor if a 4th caller-owned teardown gate appears
**What:** Today `ServerConfig` has three caller-owned teardown gates:
`xvfb?` (presence ⇒ don't close), `proxyBridge?` (same), and now
`ownsTerminalAgent` (explicit boolean). If a 4th gate appears, collapse to
`cfg.callerOwns?: Set<'terminalAgent' | 'xvfb' | 'proxyBridge' | ...>` or
similar.
**Why:** Three independent flags is below the refactor threshold — each
field has clear, distinct semantics and the JSDoc voice is consistent. A
fourth tips the cost balance: the per-field surface gets noisy, and
"what does this factory own?" becomes a question you have to ask of three
or four scattered fields instead of one explicit set.
**Pros:** Single source of truth for "what gstack tears down". Trivial
extension surface for future caller-owned resources. Easier to assert in
tests ("the set should contain X, not Y").
**Cons:** Premature today. The polarity-inversion note in the
`ownsTerminalAgent` JSDoc only hurts a little — it's one anomaly, not a
pattern. Refactoring now to an ownership object would touch every embedder.
**Context:** Recommended by Claude subagent during /plan-ceo-review dual
voice (autoplan). Trigger: a 4th caller-owned teardown gate in this same
`ServerConfig` shape.
**Depends on:** A 4th gate to motivate the refactor.
---
## /sync-gbrain memory stage perf follow-up
### P2: Investigate `gbrain import` perf on large staging dirs
@@ -457,7 +811,24 @@ reads it yet.
**Effort:** L (human: ~1 week / CC: ~4h)
**Priority:** P0
**Depends on:** 2+ weeks of v1 dogfood, profile diversity check passing.
**Depends on:** **90+ days of v1 dogfood stable across 3+ skills** (per
`docs/designs/PLAN_TUNING_V0.md` §"Deferred to v2" E1 acceptance criteria).
Distinct from the lighter-weight diversity-display gate
(`sample_size >= 20 AND skills_covered >= 3 AND question_ids_covered >= 8
AND days_span >= 7`) used in /plan-tune to render the inferred column —
display is a UI affordance, promotion to E1 needs a much higher bar
because behavioral adaptation is consequential and hard to revert. Prior
versions of this card cited "2+ weeks" which conflicted with V0 — V0 wins.
**Substrate risk (Codex outside-voice, Phase A review 2026-05-26):** Generated
skill prose is agent-compliance-based. Tests can verify templates contain the
right reads of `~/.gstack/developer-profile.json` and the right decision
points, but tests cannot prove agents obey them at runtime. E1 ships
adaptations as **advisory annotations on AskUserQuestion recommendations**
("Recommended via your profile: <choice>") until there's a hard runtime
execution path. Do NOT gate any AUTO_DECIDE on inferred profile alone in v1
of E1; explicit per-question preferences remain the only AUTO_DECIDE
source.
### E3 — `/plan-tune narrative` + `/plan-tune vibe`
@@ -1643,6 +2014,49 @@ Shipped in v0.6.5. TemplateContext in gen-skill-docs.ts bakes skill name into pr
**Priority:** P2
**Depends on:** CDP patches proving the value of anti-bot stealth first
## /spec follow-ups (deferred from v1.47.0.0 via /plan-ceo-review SCOPE EXPANSION)
### P2: `/spec --epic` mode (parent issue + child issues + dependency graph)
**Priority:** P2
**What:** Add `--epic` flag that produces an Epic issue (parent) plus N child issues with explicit dependency graph and topological order. Emits multiple `gh issue create` calls with parent linkage in child bodies.
**Why:** Multi-week initiatives often span 3-5 specs that share context but ship sequentially. Today `/spec --epic` would let users author the full initiative in one session and file all linked issues atomically. The Epic template already exists in `spec/SKILL.md.tmpl` (carried over from PR #1698); only the flag routing + multi-issue `gh` orchestration is missing.
**Pros:**
- Closes the multi-issue workflow gap that `/spec` v1 doesn't cover.
- Parent + child linkage means project boards show the full initiative at-a-glance.
- Composes cleanly with existing `--execute` (spawn an agent on the parent epic; agent files children as it works).
**Cons:**
- More gh API surface (one create per child, parent-link edit pass).
- Dependency-graph rendering in markdown is fiddly across GitHub vs GitLab renderers.
**Context:** Considered in `/plan-ceo-review` SCOPE EXPANSION (D5), deferred 2026-05-25 in favor of shipping the 5 critical-path expansions (--execute, --dedupe, archive, quality gate, --audit). Re-evaluate once v1.47 ships and we see how often users hit "this should be 3 issues" in real /spec sessions.
**Depends on:** v1.47.0.0 `/spec` lands first; need real usage data to calibrate the multi-issue surface.
### P3: `/spec --dedupe` semantic matching (LLM-based) for v1.1
**Priority:** P3
**What:** Upgrade `--dedupe`'s string match against `gh issue list --search` to LLM-based semantic similarity. Today's v1 picks string overlap on title keywords; semantic match would catch "the sidebar terminal flakes on reload" matching an existing issue titled "PTY reconnect fails after extension restart" where keyword overlap is zero.
**Why:** String match has high precision but low recall — it misses near-duplicates with different vocabulary. LLM semantic match catches more dupes but costs ~$0.01-0.05 per spec dispatch and adds 5-10s latency.
**Pros:**
- Catches dupes string match misses.
- One more reason `/spec` is more useful than freehand authoring.
**Cons:**
- Paid + slower. Most v1 users probably don't hit enough false-negatives to justify the cost.
- Adds another LLM-judged decision to a skill that already has the quality gate.
**Context:** Considered in `/plan-ceo-review` build-time decisions; chose string match for v1 to keep the dedupe path free + fast. Revisit if v1 produces a meaningful false-negative rate in real use.
**Depends on:** v1.47.0.0 ships; gather real false-negative data from the v1 string matcher.
## Completed
### Slim preamble + real-PTY plan-mode E2E harness (v1.13.1.0)
@@ -1750,3 +2164,254 @@ Shipped in v0.6.5. TemplateContext in gen-skill-docs.ts bakes skill name into pr
### Auto-upgrade mode + smart update check
- Config CLI (`bin/gstack-config`), auto-upgrade via `~/.gstack/config.yaml`, 12h cache TTL, exponential snooze backoff (24h→48h→1wk), "never ask again" option, vendored copy sync on upgrade
**Completed:** v0.3.8
---
## Brain-aware planning follow-ups (filed v1.48.0.0 via /plan-ceo-review + /plan-eng-review)
These are the deferred cherry-picks (E2/E3/E4) from the v1.48 brain-aware
planning plan at `~/.claude/plans/hm-interesting-well-why-dapper-eagle.md`.
The foundation (Phase 0 entity model + Phase 0.5 cache + Phase 1 preflight
+ Phase 1.5 trust policy + Phase 2 write-back scaffolding) ships in
v1.48.0.0. These follow-ups extend it.
### P2: /gstack-reflect nightly synthesis skill (E2)
**What:** Scheduled skill that reads weekly `gstack/skill-run` + takes +
`get_recent_salience` and synthesizes a `gstack/insight` page surfaced at
next skill preflight.
**Why:** Cross-time pattern detection is the compounding move. "You ran 4
plan-ceo on infra this week, 0 on product — is product work getting
starved?" surfaces patterns the user wouldn't notice.
**Pros:** Brain compounds across TIME, not just across skills. Patterns
become actionable.
**Cons:** "You're starving product work" is high-judgment territory; needs
opt-out per project, careful insight templates.
**Context:** Deferred from v1.48.0.0 cherry-pick (D4) — wait 4-6 weeks for
real `gstack/skill-run` data to accumulate before designing the reflection
layer against real patterns instead of imagined ones.
**Effort:** L (human ~1-2 days, CC ~4-6h)
**Depends on:** Phase 0 (gstack/skill-run page type from v1.48.0.0) +
~6 weeks of accumulated data
### P3: Cross-machine brain-cache sync (E3)
**What:** Push compressed digests through the gstack-brain-sync git pipeline
so the brain-cache survives moving between Macs / Conductor workspaces.
**Why:** Eliminates the cold-miss tax on every new machine (~1-2s once per
machine per day).
**Pros:** Instant warm cache on new machines.
**Cons:** Cache poisoning risk if not designed carefully (hash invariants,
endpoint-binding, conflict resolution).
**Context:** Deferred from v1.48.0.0 cherry-pick (D5) — single-machine
cache is fine for V1; correctness risk needs its own design pass.
**Effort:** M (human ~4h, CC ~30min)
**Depends on:** Brain-cache layer from v1.48.0.0
### P3: /gstack-onboarding dedicated skill (E4)
**What:** Guided 5-minute setup skill for new gstack installs: walks user
through reading CLAUDE.md + README + recent commits to build `gstack/product`
and active goals with explicit AUQs.
**Why:** Better UX than the inline bootstrap (which only fires when a
planning skill is invoked).
**Pros:** Cleaner cold-start, explicit ceremony.
**Cons:** Inline bootstrap (in scope for v1.48) already covers the
cold-start path adequately.
**Context:** Deferred from v1.48.0.0 cherry-pick (D6) — observe inline
bootstrap performance first; add dedicated skill if friction is real.
**Effort:** S (human ~2h, CC ~15min)
**Depends on:** Inline bootstrap subcommand from v1.48.0.0
### P2: Upstream gbrain takes_add + takes_resolve MCP ops
**What:** Add `mcp__gbrain__takes_add` and `mcp__gbrain__takes_resolve`
ops in `~/git/gbrain/src/core/operations.ts`. Extract the markdown-fence
mirror logic from `commands/takes.ts:570` into a reusable
`engine.resolveTake()` helper.
**Why:** Unlocks Phase 2 calibration write-back without the fence-block
fallback. ~150 LOC. Already on gbrain's v0.31.x roadmap.
**Pros:** Clean Phase 2 path, removes the "fall back to put_page" smell.
**Cons:** Lives in upstream gbrain repo, not helsinki — separate PR.
**Context:** Phase 2 write-back is already wired in v1.48.0.0 behind the
BRAIN_CALIBRATION_WRITEBACK feature flag (default off). Flag flips to
true once upstream gbrain ships these ops. ~50 LOC follow-up in
helsinki to swap the fallback for the preferred op.
**Effort:** S (human ~1d, CC ~1h) in gbrain repo; trivial wire-up in
helsinki.
**Depends on:** None (parallel-track from v1.48.0.0)
### P3: Background-refresh hook supervision
**What:** Codex outside-voice raised that "background refresh at skill END"
is hand-wavy. Add proper process supervision: PID file, timeout, failure
log, cross-platform spawn.
**Why:** Current implementation backgrounds with `&` which works but
leaves no observability when a refresh fails.
**Context:** Deferred from v1.48.0.0 codex tension T3. Stays low priority
until users report stale digests where a background refresh silently
failed.
**Effort:** S (human ~2h, CC ~20min)
### P2: Re-verify calibration takes when gbrain v0.42+ lands
**What:** When upstream gbrain ships `takes_add` MCP op and we flip
`BRAIN_CALIBRATION_WRITEBACK` from FALSE to TRUE, re-run the manual
probe in `docs/gbrain-write-surfaces.md` against `/office-hours` and
confirm `gbrain takes_list` surfaces a `kind=bet` entry with the
expected weight (0.9 for office-hours, per
`scripts/brain-cache-spec.ts:151-157`).
**Why:** Today the calibration take path falls back to writing inside a
`gbrain put` fence block because `takes_add` isn't available yet. Once
v0.42+ ships, the agent will call `takes_add` directly — we should
confirm the new path actually persists a queryable take.
**Context:** v1.50.0.0 plan §"NOT in scope". The fence-block fallback
test (`test/takes-fence-fallback.test.ts`) covers wiring for both paths;
this TODO is about live verification of the preferred path when it
becomes available.
**Effort:** XS (human ~15min, CC ~5min)
**Depends on:** Upstream gbrain v0.42+ release shipping `takes_add` MCP
op (separate TODO above).
### P2: Extend brain-writeback E2E to the other 4 planning skills
**What:** `test/skill-e2e-office-hours-brain-writeback.test.ts` covers
the brain-writeback path for `/office-hours` only. Adding parallel
tests for `/plan-ceo-review`, `/plan-eng-review`, `/plan-design-review`,
and `/plan-devex-review` would bring per-skill agent-obedience coverage
to parity with the resolver unit test
(`test/resolvers-gbrain-save-results.test.ts`, which covers wiring for
all 5).
**Why:** The resolver test proves the right instructions get emitted;
the E2E proves the agent actually obeys. Today we only have that
end-to-end signal for one of five planning skills.
**Context:** v1.50.0.0 plan §"NOT in scope". Extract `makeFakeGbrain`
into `test/helpers/fake-gbrain.ts` when the second consumer arrives
(YAGNI for one consumer today).
**Effort:** S (human ~1d, CC ~1h). Periodic-tier (~$2-4 total for 4
runs).
**Depends on:** None.
### P2: Real-session carve canary (E3, deferred from carve-guard plan)
**What:** Wire a real-session section-Read-miss canary on top of the
carved skills. When a real user session drives a carved skill and the
agent does NOT Read a section the skeleton's STOP directive pointed it
at, log it (salted, content-free) to
`~/.gstack/analytics/section-reads.jsonl` and surface drift via
`bun run eval:summary`. Non-blocking alert, never a merge gate
(real-session data is non-deterministic).
**Why:** The static (E2) + behavioral (T2) guards prove carves are
structurally sound and that a real agent Reads sections in a controlled
eval. They do NOT see production drift — a prompt-context change that
makes live agents start skipping a section. The canary is the only
mechanism that catches that, from real usage.
**Context:** Deferred from the carve-guard-hardening plan (D5→T2, codex
outside-voice #7). `test/helpers/transcript-section-logger.ts` exists but
is built for deterministic test transcripts + ship action fingerprints,
NOT real-session drift — it needs rework before it can back this. Ship
the deterministic guards first; add this once they've proven useful. The
carved-skill set + each skill's `requiredReads` are already declared in
`test/helpers/carve-guards.ts`, so the canary reads its expectations
from there.
**Effort:** M (human ~2d, CC ~4h).
**Depends on:** `transcript-section-logger.ts` real-session-drift rework.
### P2: Harden behavioral section-loading test hermeticity
**What:** `captureSectionReads` in `test/helpers/auq-sdk-capture.ts` accepts ANY
Read whose path matches `sections/<file>.md`. The skeleton's STOP-Read directive
points at the gstack-root install path (`scripts/resolvers/sections.ts` builds it
from `ctx.paths.skillRoot`), not the planted fixture copy. So a run can satisfy
the section-read assertion by reading the GLOBAL install's section instead of the
hermetic fixture.
**Why:** A behavioral test that passes by reading the global install doesn't prove
THIS branch's carved section loads. If the fixture's section were broken but the
global install's weren't, the test would still pass.
**Context:** Codex outside-voice finding on the carve-guard ship (v1.57.0.0).
Pre-existing in `auq-sdk-capture.ts` — affects `skill-e2e-ship-section-loading`,
`skill-e2e-plan-ceo-review-section-loading`, and the new
`carve-section-loading.test.ts`. Fix: match the fixture's ABSOLUTE sections path
(the `planDir` copy), not a bare `sections/<file>.md` regex; or rewrite the STOP
path to the fixture during the run.
**Effort:** S (human ~3h, CC ~30min). **Depends on:** None.
### P3: Content-hash diagram render cache for make-pdf
**What:** Cache rendered diagram SVG/PNG in `~/.gstack/cache/diagram-render/`,
keyed on `sha256(fence source + bundle version + render options)`, so repeat
`make-pdf` runs skip the browse render tab for unchanged diagrams.
**Why:** Every run currently re-renders every fence (~150-300ms each). Docs with
10+ diagrams pay seconds per iteration during write-preview loops. Codex
outside-voice flagged the missing cache story during the eng review of the
diagram engine plan (2026-06-11, D7).
**Context:** The diagram-render bundle ships a `BUILD_INFO.json` with a content
hash (see `lib/diagram-render/`) — use that as the bundle-version cache key
component so bundle bumps invalidate cleanly. Invalidation surface is the main
risk: stale renders after a mermaid theme change must not survive. Only worth
building once users hit multi-diagram docs; wedge perf is fine without it.
**Effort:** S (human ~1d, CC ~30min). **Depends on:** diagram engine wedge
shipping (lib/diagram-render bundle versioning).
### P3: Dedupe the make-pdf e2e gate-test harness
**What:** Five e2e files (`combined-gate`, `emoji-gate`, `diagram-gate`,
`landscape-gate`, `format-gate`) each hand-roll the same prerequisite probe
(binary/browse/poppler checks with CI hard-fail vs local skip), mkdtemp/rm
lifecycle, and child-timeout constants. Extract a shared
`make-pdf/test/e2e/helpers.ts` (prerequisites(), withWorkDir(), runGenerate()).
**Why:** Review-army maintainability finding on v1.58.0.0 — the boilerplate
diverges a little more with each new gate (diagram-gate now captures stderr
via Bun.spawnSync while the others use execFileSync), and a future fix to the
CI-hard-fail contract has to land five times.
**Context:** Deferred at ship time (D8.2) because it's test-only churn across
five green files at the tail of a release. Zero user-facing value; pure DRY.
**Effort:** S (human ~3h, CC ~20min). **Depends on:** None.
+99 -6
View File
@@ -16,7 +16,16 @@ This is the full monty: every scenario, every flag, every helper bin, every trou
That's it. The skill detects your current state, asks three questions at most, and walks you through install, init, MCP registration for Claude Code, and per-repo trust policy. On a clean Mac with nothing installed it finishes in under five minutes. On a Mac where something's already set up it takes seconds (it detects the existing state and skips done work).
## The three paths
## What you get after setup
Once `/setup-gbrain` finishes, your coding agent has two retrieval surfaces it didn't have before:
- **Semantic code search across this repo.** `gbrain search "browser security canary"` returns ranked file regions, not exact-match grep hits. `gbrain code-def`, `code-refs`, `code-callers`, `code-callees` walk the call graph by symbol — useful when you don't know which file holds the implementation but you know what it does. The agent prefers these over Grep when the question is semantic; CLAUDE.md gets a `## GBrain Search Guidance` block that teaches it the routing rules.
- **Cross-session memory.** Plans, retros, decisions, and learnings from past sessions live in `~/.gstack/` and (if you opted in to artifacts sync) get pushed to a private git repo that gbrain indexes. `gbrain search "what did we decide about auth?"` actually finds the prior CEO plan instead of you re-describing context every session.
If you also enabled remote MCP (Path 4 below), brain queries route to a shared brain server that other machines can write to — your laptop, your desktop, and a teammate's machine all see the same memory.
## The four paths
You pick one when the skill asks "Where should your brain live?"
@@ -48,10 +57,25 @@ Best for: you'd rather click through supabase.com yourself than paste a PAT.
Best for: try-it-first, no account, no cloud, no sharing. Or a dedicated "this Mac's brain" that stays isolated from any cloud agent.
**What happens:** `gbrain init --pglite`. Brain lives at `~/.gbrain/brain.pglite`. No network calls. Done in 30 seconds.
**What happens:** `gbrain init --pglite`. Brain lives at `~/.gbrain/brain.pglite`. No network calls for the init itself. Done in 30 seconds.
**Embedding model.** When `VOYAGE_API_KEY` is set, gstack inits PGLite with `voyage-code-3` (1024-dim) — Voyage's code-specialized embedding model, which beats their general-purpose `voyage-4-large` and OpenAI `text-embedding-3-large` head-to-head on this codebase's symbol queries. Without `VOYAGE_API_KEY`, gbrain auto-selects (OpenAI 1536-dim when `OPENAI_API_KEY` is present, else falls down its provider chain). Either way, the embeddings call out to the chosen provider's API during sync — set the key for the provider you want before running `/sync-gbrain`.
This is the best first choice if you just want to see what gbrain feels like before committing to cloud. You can always migrate later with `/setup-gbrain --switch`.
### Path 4: Remote gbrain MCP (split-engine)
Best for: your brain runs on another machine you control (Tailscale, ngrok, internal LAN) or a teammate's server. You want the cross-machine memory benefit without standing up a local database, and you still want symbol-aware code search on this Mac.
**What happens:** You paste an MCP URL (e.g. `https://wintermute.tail554574.ts.net:3131/mcp`) and a bearer token. The skill verifies the URL over the wire, registers gbrain as an HTTP MCP in `~/.claude.json` at user scope, and offers to also stand up a tiny local PGLite for code search (~30 seconds, ~120 MB disk).
If you accept the local PGLite, you end up in **split-engine mode**:
- **Brain/context queries** (`mcp__gbrain__search`, `mcp__gbrain__query`, `mcp__gbrain__get_page`) route to the remote MCP. Plans, retros, learnings, cross-machine memory — all on the shared server.
- **Code queries** (`gbrain code-def`, `code-refs`, `code-callers`, `code-callees`, `gbrain search` for code) route to the local PGLite via the `.gbrain-source` pin in each worktree. Indexed locally, fast, never leaves the machine.
The two engines are independent. Wiping the local PGLite doesn't touch the remote brain; rotating the remote MCP bearer doesn't affect local code search. This is also the right configuration if your remote brain admin can't (or shouldn't) index every developer's checkout — local code stays local.
## MCP registration for Claude Code
By default the skill asks "Give Claude Code a typed tool surface for gbrain?" If you say yes, it runs:
@@ -60,7 +84,7 @@ By default the skill asks "Give Claude Code a typed tool surface for gbrain?" If
claude mcp add gbrain -- gbrain serve
```
That registers gbrain's stdio MCP server with Claude Code. Now `gbrain search`, `gbrain put_page`, `gbrain get_page`, etc. show up as first-class tools in every session, not bash shell-outs.
That registers gbrain's stdio MCP server with Claude Code. Now `gbrain search`, `gbrain put`, `gbrain get`, etc. show up as first-class tools in every session, not bash shell-outs.
**If `claude` is not on PATH**, the skill skips MCP registration gracefully with a manual-register hint. The CLI resolver still works from any skill that shells out to `gbrain` — MCP is an upgrade, not a prerequisite.
@@ -95,6 +119,35 @@ SSH and HTTPS remote variants collapse to the same key: `https://github.com/foo/
Storage: `~/.gstack/gbrain-repo-policy.json`, mode 0600, schema-versioned so future migrations stay deterministic.
## Keeping the brain current with `/sync-gbrain`
`/setup-gbrain` is one-time onboarding. `/sync-gbrain` is the verb you run every time you want gbrain to see fresh changes in this repo's code.
```bash
/sync-gbrain # incremental: mtime fast-path, ~seconds on a clean tree
/sync-gbrain --full # full reindex (~25-35 minutes on a big Mac)
/sync-gbrain --code-only # only the code stage; skip memory + brain-sync
/sync-gbrain --dry-run # preview what would sync; no writes
```
The skill runs three stages — code, memory, brain-sync — independently. A failure in one doesn't block the others. State persists to `~/.gstack/.gbrain-sync-state.json` so re-running picks up cleanly.
**What it does on a fresh worktree:**
1. **Pre-flight.** Checks `gbrain_local_status` (the local engine's health). If the engine is `broken-db` or `broken-config`, the skill STOPs with a remediation menu — it refuses to silently degrade. If the local engine is missing and you're in remote-MCP mode (Path 4), the code stage SKIPs cleanly and only brain-sync runs.
2. **Code stage.** Registers the cwd as a federated source via `gbrain sources add`, writes a `.gbrain-source` pin file in the repo root (kubectl-style context — every worktree gets its own pin, so Conductor sibling worktrees don't collide), runs `gbrain sync --strategy code`.
3. **Memory stage.** Stages your `~/.gstack/` transcripts + curated memory. In local-stdio MCP mode, ingests into the local engine. In remote-http MCP mode, persists staged markdown to `~/.gstack/transcripts/run-<pid>-<ts>/` for the remote brain admin's pull pipeline. The ingest timeout is 30 minutes by default; raise it for a big brain with `GSTACK_INGEST_TIMEOUT_MS` (accepts 1 min24h). On timeout the gbrain import checkpoint is preserved, so the next `/sync-gbrain` resumes instead of starting over.
4. **Brain-sync stage.** Pushes curated artifacts (plans, designs, retros) to your private artifacts repo if you have one configured.
5. **CLAUDE.md guidance.** Capability-checks the round-trip (write a page → search → find it). If green, writes the `## GBrain Search Guidance` block to your project's CLAUDE.md. If red, REMOVES the block — the agent should never be told to use a tool that isn't installed.
**The watermark.** Sync state advances by commit hash. If gbrain hits a file it can't index (5 MB hard limit per file, or a file vanished mid-sync), the watermark stays put and subsequent syncs retry. To acknowledge an unfixable failure and move past it:
```bash
gbrain sync --source <source-id> --skip-failed
```
Re-runnable, idempotent, safe to run from multiple terminals on the same machine (locked at `~/.gstack/.sync-gbrain.lock`).
## Switching engines later
Picked PGLite and now want to join a team brain? One command:
@@ -173,8 +226,8 @@ Gbrain itself ships with these that gstack wraps:
| `gbrain migrate --to supabase --url ...` | Move a PGLite brain to Supabase (lossless, preserves source as backup) |
| `gbrain migrate --to pglite` | Reverse migration |
| `gbrain search "query"` | Search the brain |
| `gbrain put_page --title "..." --tags "a,b" <<<"content"` | Write a page |
| `gbrain get_page "<slug>"` | Fetch a page |
| `gbrain put "<slug>" --content "<markdown-with-frontmatter>"` | Write a page (title/tags go in YAML frontmatter inside `--content`) |
| `gbrain get "<slug>"` | Fetch a page |
| `gbrain serve` | Start the MCP stdio server (used by `claude mcp add`) |
### Config files + state
@@ -200,6 +253,26 @@ Gbrain itself ships with these that gstack wraps:
| `SUPABASE_API_BASE` | `gstack-gbrain-supabase-provision` | Override the Management API host. Used by tests to point at a mock server. |
| `GBRAIN_INSTALL_DIR` | `gstack-gbrain-install` | Override default install path (`~/gbrain`) |
| `GSTACK_HOME` | every bin helper | Override `~/.gstack` state dir. Heavy test use. |
| `VOYAGE_API_KEY` | `gbrain embed` subprocess; gstack PGLite init | When set, gstack inits PGLite with `voyage-code-3` (1024-dim), Voyage's code-specialized embedding model. Beats `voyage-4-large` and OpenAI `text-embedding-3-large` head-to-head on this codebase's symbol queries. See CHANGELOG v1.43.1.0 for the A/B numbers. |
| `OPENAI_API_KEY` | `gbrain embed` subprocess | Used for embeddings during `gbrain sync` / `/sync-gbrain` when `VOYAGE_API_KEY` is not set (gbrain's auto-selected fallback, `text-embedding-3-large` 1536-dim). Without either key, pages are imported structurally (symbol tables, chunks) but semantic search degrades — you'll see `[gbrain] embedding failed for code file ...` in the sync log. |
| `ANTHROPIC_API_KEY` | `claude-agent-sdk`, paid evals | Required for `bun run test:evals` and any direct `query()` call against Claude. |
| `GSTACK_OPENAI_API_KEY` | `lib/conductor-env-shim.ts` | Conductor-injected fallback. Promoted to `OPENAI_API_KEY` when the canonical name is empty. |
| `GSTACK_ANTHROPIC_API_KEY` | `lib/conductor-env-shim.ts` | Same pattern as above for Anthropic. |
## Conductor + GSTACK_* env vars
If you run gstack inside a [Conductor](https://conductor.build) workspace, **Conductor explicitly strips `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` from the workspace env.** Setting them in `~/.zshrc` or `.env` won't help — the strip happens after env inheritance. To get a usable API key into a workspace, set `GSTACK_ANTHROPIC_API_KEY` and `GSTACK_OPENAI_API_KEY` in Conductor's workspace env config instead. Conductor passes those through untouched.
`lib/conductor-env-shim.ts` bridges the gap on the gstack side: when imported as a side effect (`import "../lib/conductor-env-shim";`), it promotes `GSTACK_FOO_API_KEY` to `FOO_API_KEY` for any subprocess that doesn't see the canonical name. The shim is already wired into:
- `bin/gstack-gbrain-sync.ts` — so `/sync-gbrain` picks up OpenAI for embeddings
- `bin/gstack-model-benchmark` — so `--judge` runs work without manual env mapping
- `scripts/preflight-agent-sdk.ts` — so paid-eval auth probes work
- `test/helpers/e2e-helpers.ts` — so `bun run test:evals` finds Anthropic
If you add a new TS entry point that hits a paid API or needs gbrain embeddings, add the same one-line import at the top. See [CONTRIBUTING.md "Conductor workspaces"](CONTRIBUTING.md#conductor-workspaces) for the contributor checklist.
`bin/gstack-codex-probe` is bash and doesn't read these directly — it relies on `~/.codex/` auth managed by the Codex CLI.
## Security model
@@ -267,6 +340,26 @@ You edited `~/.gstack/gbrain-repo-policy.json` by hand with legacy `allow` value
`/health` treats that as yellow, not red. Check `gbrain doctor --json | jq .checks` to see which sub-checks are warning. Typical causes: resolver MECE overlap (skill names clashing) or DB connection not yet configured.
### `/sync-gbrain` reports `OK` but `gbrain search` returns nothing semantic
Embeddings probably failed during import. Symbol queries (`code-def`, `code-refs`) still work because they don't need embeddings, but `gbrain search "<terms>"` falls back to a degraded BM25 path. Look in the sync output for lines like:
```
[gbrain] embedding failed for code file <name>: OpenAI embedding requires OPENAI_API_KEY
```
The fix is to put a provider API key in the process env before re-running. `VOYAGE_API_KEY` is preferred for code (gstack defaults PGLite to `voyage-code-3` when set); otherwise `OPENAI_API_KEY` falls back to `text-embedding-3-large`. On a bare Mac shell, source the key from `~/.zshrc` before calling. In Conductor, the `lib/conductor-env-shim.ts` shim promotes `GSTACK_ANTHROPIC_API_KEY` / `GSTACK_OPENAI_API_KEY` to their canonical names automatically; for `VOYAGE_API_KEY`, set it directly in your Conductor workspace env. Re-run `/sync-gbrain --code-only` to backfill embeddings on already-imported pages.
### `gbrain sync` blocked at a commit hash — `FILE_TOO_LARGE`
A file in your tree exceeds gbrain's 5 MB hard limit (`MAX_FILE_SIZE` in `gbrain/src/core/import-file.ts`). Common culprits: response replay caches, captured screenshots, large JSON fixtures. Gbrain doesn't honor `.gitignore`-style exclude lists for code sync; the only knob is acknowledging the failure:
```bash
gbrain sync --source <source-id> --skip-failed
```
Watermark advances past the offending commit. The same file fails again if it changes; re-skip when that happens.
### Switching PGLite → Supabase hangs
Another gstack session in a sibling Conductor workspace may be holding a lock on your local PGLite file via its preamble's `gstack-brain-sync` call. Close other workspaces, re-run `/setup-gbrain --switch`. The timeout is bounded at 180s so you'll never actually wait forever.
@@ -286,7 +379,7 @@ Another gstack session in a sibling Conductor workspace may be holding a lock on
## Related skills + next steps
- `/health` — includes a GBrain dimension (doctor status, sync queue depth, last-push age) in its 0-10 composite score. The dimension is omitted when gbrain isn't installed; running `/health` on a non-gbrain machine doesn't penalize that choice.
- `/gstack-upgrade` — keeps gstack itself up to date. Does NOT upgrade gbrain independently. To bump gbrain, update `PINNED_COMMIT` in `bin/gstack-gbrain-install` and re-run `/setup-gbrain`.
- `/gstack-upgrade` — keeps gstack itself up to date. Does NOT upgrade gbrain independently. gbrain installs at the latest HEAD by default; to refresh it, `git pull` in your gbrain clone (default `~/gbrain`) and re-run `/setup-gbrain`. Pin a specific commit with `gstack-gbrain-install --pinned-commit <sha>` if you need reproducibility. Installs below the minimum tested version are refused.
- `/retro` — weekly retrospective pulls learnings and plans from your gbrain when memory sync is on, letting the retro reference cross-machine history.
Run `/setup-gbrain` and see what sticks.
+1 -1
View File
@@ -1 +1 @@
1.40.0.2
1.58.1.0
+133 -122
View File
@@ -2,16 +2,7 @@
name: autoplan
preamble-tier: 3
version: 1.0.0
description: |
Auto-review pipeline — reads the full CEO, design, eng, and DX review skills from disk
and runs them sequentially with auto-decisions using 6 decision principles. Surfaces
taste decisions (close approaches, borderline scope, codex disagreements) at a final
approval gate. One command, fully reviewed plan out.
Use when asked to "auto review", "autoplan", "run all reviews", "review this plan
automatically", or "make the decisions for me".
Proactively suggest when the user has a plan file and wants to run the full review
gauntlet without answering 15-30 intermediate questions. (gstack)
Voice triggers (speech-to-text aliases): "auto plan", "automatic review".
description: Auto-review pipeline — reads the full CEO, design, eng, and DX review skills from disk and runs them sequentially with auto-decisions using 6 decision principles. (gstack)
benefits-from: [office-hours]
triggers:
- run all reviews
@@ -30,6 +21,19 @@ allowed-tools:
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
## When to invoke this skill
Surfaces
taste decisions (close approaches, borderline scope, codex disagreements) at a final
approval gate. One command, fully reviewed plan out.
Use when asked to "auto review", "autoplan", "run all reviews", "review this plan
automatically", or "make the decisions for me".
Proactively suggest when the user has a plan file and wants to run the full review
gauntlet without answering 15-30 intermediate questions.
Voice triggers (speech-to-text aliases): "auto plan", "automatic review".
## Preamble (run first)
```bash
@@ -50,6 +54,16 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
# Conductor host: AskUserQuestion is unreliable here (native disabled, MCP
# variant flaky), so skills render decisions as prose instead of calling the
# tool. Gated on !headless so an eval/CI run INSIDE Conductor (GSTACK_HEADLESS)
# still BLOCKs rather than rendering prose to nobody.
if [ "$_SESSION_KIND" != "headless" ] && { [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ]; }; then
echo "CONDUCTOR_SESSION: true"
fi
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -65,7 +79,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
echo "QUESTION_TUNING: $_QUESTION_TUNING"
mkdir -p ~/.gstack/analytics
if [ "$_TEL" != "off" ]; then
echo '{"skill":"autoplan","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
echo '{"skill":"autoplan","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
fi
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
if [ -f "$_PF" ]; then
@@ -107,6 +121,19 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
# Claude Code exposes plan mode via system reminders; we detect best-effort
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
# fall back to "inactive". Codex hosts and Claude execution mode both end up
# inactive, which is the safe default (defaults to file+execute pipeline).
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
export GSTACK_PLAN_MODE="active"
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
export GSTACK_PLAN_MODE="active"
else
export GSTACK_PLAN_MODE="inactive"
fi
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
```
@@ -116,7 +143,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -151,7 +178,7 @@ touch ~/.gstack/.writing-style-prompted
Skip if `WRITING_STYLE_PENDING` is `no`.
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Lake** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Ocean** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
```bash
open https://garryslist.org/posts/boil-the-ocean
@@ -162,7 +189,7 @@ Only run `open` if yes. Always run `touch`.
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
Options:
- A) Help gstack get better! (recommended)
@@ -238,6 +265,7 @@ Key routing rules:
- Ship/deploy/PR → invoke /ship or /land-and-deploy
- Save progress → invoke /context-save
- Resume context → invoke /context-restore
- Author a backlog-ready spec/issue → invoke /spec
```
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
@@ -285,13 +313,39 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
"AskUserQuestion" can resolve to two tools at runtime: the **host MCP variant** (e.g. `mcp__conductor__AskUserQuestion` — appears in your tool list when the host registers it) or the **native** Claude Code tool.
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
**Conductor rule (read before the MCP rule):** if `CONDUCTOR_SESSION: true` was echoed by the preamble, do NOT call AskUserQuestion at all — neither native nor any `mcp__*__AskUserQuestion` variant. Render EVERY decision brief as the **prose form** below and STOP. This is proactive, not a reaction to a failure: Conductor disables native AUQ and its MCP variant is flaky (it returns `[Tool result missing due to internal error]`), so prose is the reliable path. **Auto-decide preferences still apply first:** if a `[plan-tune auto-decide] <id> → <option>` result has already surfaced for a question, proceed with that option (no prose). Because in Conductor you go straight to prose without ever calling the tool, this auto-decide-first ordering is enforced HERE, not only by the PreToolUse hook. When you render a Conductor prose brief, also capture it with `bin/gstack-question-log` (the PostToolUse capture hook never fires on a prose path, so `/plan-tune` history/learning depends on this call).
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
**Rule (non-Conductor):** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
### When AskUserQuestion is unavailable or a call fails
Tell three outcomes apart:
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
- `headless``BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
- `interactive`**prose fallback** (below).
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
Layout: a `D<N>` title + a one-line note to reply with a letter (in Conductor this is the normal path; elsewhere it means AskUserQuestion was unavailable or errored); the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
**Continuation — mapping a typed reply back to a brief.** Each brief carries a stable label (`D<N>`, or `D<N>.k` in a split chain). The user references it (e.g. "3.2: B"). A bare letter maps to the single most-recent UNANSWERED brief; if more than one is open (a split chain), do NOT guess — ask which `D<N>.k` it answers. Never apply a bare letter ambiguously across a chain.
**One-way / destructive confirmations in prose.** When the decision is a one-way door (irreversible or destructive — delete, force-push, drop, overwrite), prose is a WEAKER gate than the tool, so make it stronger: require an explicit typed confirmation (the exact option letter or word), state plainly what is irreversible, and NEVER proceed on a vague, partial, or ambiguous reply — re-ask instead. Treat silence or "ok"/"sure" without the explicit choice as not-yet-confirmed.
### Format
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
```
D<N> — <one-line question title>
@@ -324,25 +378,41 @@ Effort both-scales: when an option involves effort, label both human-team and CC
Net line closes the tradeoff. Per-skill instructions may add stricter rules.
12. **Non-ASCII characters — write directly, never \u-escape.** When any
string field (question, option label, option description) contains
Chinese (繁體/簡體), Japanese, Korean, or other non-ASCII text, emit
the literal UTF-8 characters in the JSON string. **Never escape them
as `\uXXXX`.** Claude Code's tool parameter pipe is UTF-8 native
and passes characters through unchanged. Manually escaping requires
recalling each codepoint from training, which is unreliable for long
CJK strings — the model regularly emits the wrong codepoint (e.g.
writes `\u3103` thinking it is 管 U+7BA1, but `\u3103` is
actually ㄃, so the user sees `管理工具` rendered as `㄃3用箱`).
The trigger is long, multi-line questions with hundreds of CJK
characters: that is exactly when reflexive escaping kicks in and
exactly when miscoding is most damaging. Long ≠ escape. Keep
characters literal.
### Handling 5+ options — split, never drop
Wrong: `"question": "請選擇\uXXXX\uXXXX\uXXXX\uXXXX"`
Right: `"question": "請選擇管理工具"`
AskUserQuestion caps every call at **4 options**. With 5+ real options, NEVER
drop, merge, or silently defer one to fit. Pick a compliant shape:
Only JSON-mandatory escapes remain allowed: `\n`, `\t`, `\"`, `\\`.
- **Batch into ≤4-groups** — for coherent alternatives (e.g. version bumps,
layout variants). One call, 5th surfaced only if first 4 don't fit.
- **Split per-option** — for independent scope items (e.g. "ship E1..E6?").
Fire N sequential calls, one per option. Default to this when unsure.
Per-option call shape: `D<N>.k` header (e.g. D3.1..D3.5), ELI10 per option,
Recommendation, kind-note (no completeness score — Include/Defer/Cut/Hold are
decision actions), and 4 buckets:
**A) Include**, **B) Defer**, **C) Cut**, **D) Hold** (stop chain, discuss).
After the chain, fire `D<N>.final` to validate the assembled set (reprompt
dependency conflicts) and confirm shipping it. Use `D<N>.revise-<k>` to
revise one option without re-running the chain.
For N>6, fire a `D<N>.0` meta-AskUserQuestion first (proceed / narrow / batch).
question_ids for split chains: `<skill>-split-<option-slug>` (kebab-case ASCII,
≤64 chars, `-2`/`-3` suffix on collision). The runtime checker
(`bin/gstack-question-preference`) refuses `never-ask` on any `*-split-*` id,
so split chains are never AUTO_DECIDE-eligible — the user's option set is sacred.
**Full rule + worked examples + Hold/dependency semantics:** see
`docs/askuserquestion-split.md` in the gstack repo. Read on demand when N>4.
**Non-ASCII characters — write directly, never \u-escape.** When any string
field contains Chinese (繁體/簡體), Japanese, Korean, or other non-ASCII text,
emit the literal UTF-8 characters; never escape them as `\uXXXX` (the pipe is
UTF-8 native, and manual escaping miscodes long CJK strings). Only `\n`,
`\t`, `\"`, `\\` remain allowed. Full rationale + worked example: see
`docs/askuserquestion-cjk.md`. Read on demand when a question contains CJK.
### Self-check before emitting
@@ -355,8 +425,11 @@ Before calling AskUserQuestion, verify:
- [ ] (recommended) label on one option (even for neutral-posture)
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
- [ ] Net line closes the decision
- [ ] You are calling the tool, not writing prose
- [ ] You are calling the tool, not writing prose — unless `CONDUCTOR_SESSION: true` (then prose is the DEFAULT, not the tool) OR the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
- [ ] If you split, you checked dependencies between options before firing the chain
- [ ] If a per-option Hold fires, you stopped the chain immediately (didn't queue)
## Artifacts Sync (skill start)
@@ -539,12 +612,19 @@ if [ -d "$_PROJ" ]; then
fi
_LATEST_CP=$(find "$_PROJ/checkpoints" -name "*.md" -type f 2>/dev/null | xargs ls -t 2>/dev/null | head -1)
[ -n "$_LATEST_CP" ] && echo "LATEST_CHECKPOINT: $_LATEST_CP"
if [ -f "$_PROJ/decisions.active.json" ]; then
echo "--- ACTIVE DECISIONS (recent, scope-relevant) ---"
~/.claude/skills/gstack/bin/gstack-decision-search --recent 5 2>/dev/null
echo "--- END DECISIONS ---"
fi
echo "--- END ARTIFACTS ---"
fi
```
If artifacts are listed, read the newest useful one. If `LAST_SESSION` or `LATEST_CHECKPOINT` appears, give a 2-sentence welcome back summary. If `RECENT_PATTERN` clearly implies a next skill, suggest it once.
**Cross-session decisions.** If `ACTIVE DECISIONS` are listed, treat them as prior settled calls with their rationale — do not silently re-litigate them; if you're about to reverse one, say so explicitly. Reach for `~/.claude/skills/gstack/bin/gstack-decision-search` whenever a question touches a past decision ("what did we decide / why / did we try"). When you or the user make a DURABLE decision (architecture, scope, tool/vendor choice, or a reversal) — NOT a turn-level or trivial choice — log it with `~/.claude/skills/gstack/bin/gstack-decision-log` (`--supersede <id>` for a reversal). Reliable and local; gbrain not required.
## Writing Style (skip entirely if `EXPLAIN_LEVEL: terse` appears in the preamble echo OR the user's current message explicitly requests terse / no-explanations output)
Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format is structure; this is prose quality.
@@ -556,89 +636,12 @@ Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format i
- User-turn override wins: if the current message asks for terse / no explanations / just the answer, skip this section.
- Terse mode (EXPLAIN_LEVEL: terse): no glosses, no outcome-framing layer, shorter responses.
Jargon list, gloss on first use if the term appears:
- idempotent
- idempotency
- race condition
- deadlock
- cyclomatic complexity
- N+1
- N+1 query
- backpressure
- memoization
- eventual consistency
- CAP theorem
- CORS
- CSRF
- XSS
- SQL injection
- prompt injection
- DDoS
- rate limit
- throttle
- circuit breaker
- load balancer
- reverse proxy
- SSR
- CSR
- hydration
- tree-shaking
- bundle splitting
- code splitting
- hot reload
- tombstone
- soft delete
- cascade delete
- foreign key
- composite index
- covering index
- OLTP
- OLAP
- sharding
- replication lag
- quorum
- two-phase commit
- saga
- outbox pattern
- inbox pattern
- optimistic locking
- pessimistic locking
- thundering herd
- cache stampede
- bloom filter
- consistent hashing
- virtual DOM
- reconciliation
- closure
- hoisting
- tail call
- GIL
- zero-copy
- mmap
- cold start
- warm start
- green-blue deploy
- canary deploy
- feature flag
- kill switch
- dead letter queue
- fan-out
- fan-in
- debounce
- throttle (UI)
- hydration mismatch
- memory leak
- GC pause
- heap fragmentation
- stack overflow
- null pointer
- dangling pointer
- buffer overflow
Curated jargon list lives at `~/.claude/skills/gstack/scripts/jargon-list.json` (80+ terms). On the first jargon term you encounter this session, Read that file once; treat the `terms` array as the canonical list. The list is repo-owned and may grow between releases.
## Completeness Principle — Boil the Lake
## Completeness Principle — Boil the Ocean
AI makes completeness cheap. Recommend complete lakes (tests, edge cases, error paths); flag oceans (rewrites, multi-quarter migrations).
AI makes completeness cheap, so the complete thing is the goal. Recommend full coverage (tests, edge cases, error paths) — boil the ocean one lake at a time. The only thing out of scope is genuinely unrelated work (rewrites, multi-quarter migrations); flag that as separate scope, never as an excuse for a shortcut.
When options differ in coverage, include `Completeness: X/10` (10 = all edge cases, 7 = happy path, 3 = shortcut). When options differ in kind, write: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate scores.
@@ -681,7 +684,11 @@ If you are looping on the same diagnostic, same file, or failed fix variants, ST
Before each AskUserQuestion, choose `question_id` from `scripts/question-registry.ts` or `{skill}-{slug}`, then run `~/.claude/skills/gstack/bin/gstack-question-preference --check "<id>"`. `AUTO_DECIDE` means choose the recommended option and say "Auto-decided [summary] → [option] (your preference). Change with /plan-tune." `ASK_NORMALLY` means ask.
After answer, log best-effort:
**Embed the question_id as a marker in the question text** so hooks can identify it deterministically (plan-tune cathedral T14 / D18 progressive markers). Append `<gstack-qid:{question_id}>` somewhere in the rendered question (the leading line or trailing line is fine; the marker doesn't render visibly to the user when wrapped in HTML-style angle brackets, but the hook strips it). Without the marker the PreToolUse enforcement hook treats the AUQ as observed-only and never auto-decides — so always include it when the question matches a registered `question_id`.
**Embed the option recommendation via the `(recommended)` label suffix** on exactly one option per AUQ. The PreToolUse hook parses `(recommended)` first, falls back to "Recommendation: X" prose, and refuses to auto-decide if ambiguous. Two `(recommended)` labels = refuse.
After answer, log best-effort (PostToolUse hook also captures deterministically when installed; dedup on (source, tool_use_id) handles double-writes):
```bash
~/.claude/skills/gstack/bin/gstack-question-log '{"skill":"autoplan","question_id":"<id>","question_summary":"<short>","category":"<approval|clarification|routing|cherry-pick|feedback-loop>","door_type":"<one-way|two-way>","options_count":N,"user_choice":"<key>","recommended":"<key>","session_id":"'"$_SESSION_ID"'"}' 2>/dev/null || true
```
@@ -766,9 +773,7 @@ Replace `SKILL_NAME`, `OUTCOME`, and `USED_BROWSE` before running.
## Plan Status Footer
In plan mode before ExitPlanMode: if the plan file lacks `## GSTACK REVIEW REPORT`, run `~/.claude/skills/gstack/bin/gstack-review-read` and append the standard runs/status/findings table. With `NO_REVIEWS` or empty, append a 5-row placeholder with verdict "NO REVIEWS YET — run `/autoplan`". If a richer report exists, skip.
PLAN MODE EXCEPTION — always allowed (it's the plan file).
Skills that run plan reviews (`/plan-*-review`, `/codex review`) include the EXIT PLAN MODE GATE blocking checklist at the end of the skill, which verifies the plan file ends with `## GSTACK REVIEW REPORT` before ExitPlanMode is called. Skills that don't run plan reviews (operational skills like `/ship`, `/qa`, `/review`) typically don't operate in plan mode and have no review report to verify; this footer is a no-op for them. Writing the plan file is the one edit allowed in plan mode.
## Step 0: Detect platform and base branch
@@ -840,7 +845,7 @@ Read the `/office-hours` skill file at `~/.claude/skills/gstack/office-hours/SKI
Follow its instructions from top to bottom, **skipping these sections** (already handled by the parent skill):
- Preamble (run first)
- AskUserQuestion Format
- Completeness Principle — Boil the Lake
- Completeness Principle — Boil the Ocean
- Search Before Building
- Contributor Mode
- Completion Status Protocol
@@ -1046,7 +1051,7 @@ Read each file using the Read tool:
(they are already handled by /autoplan):**
- Preamble (run first)
- AskUserQuestion Format
- Completeness Principle — Boil the Lake
- Completeness Principle — Boil the Ocean
- Search Before Building
- Completion Status Protocol
- Telemetry (run last)
@@ -1073,11 +1078,17 @@ workflow.
```bash
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || echo off)
_CODEX_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || echo enabled)
source ~/.claude/skills/gstack/bin/gstack-codex-probe
# Master switch first: codex_reviews=disabled turns off ALL Codex work globally,
# including autoplan's own dual-voice orchestration. Honor it before probing.
if [ "$_CODEX_CFG" = "disabled" ]; then
echo "[codex disabled by config — Claude-only voices] Re-enable: gstack-config set codex_reviews enabled"
_CODEX_AVAILABLE=false
# Check Codex binary. If missing, tag the degradation matrix and continue
# with Claude subagent only (autoplan's existing degradation fallback).
if ! command -v codex >/dev/null 2>&1; then
elif ! command -v codex >/dev/null 2>&1; then
_gstack_codex_log_event "codex_cli_missing"
echo "[codex-unavailable: binary not found] — proceeding with Claude subagent only"
_CODEX_AVAILABLE=false
+8 -2
View File
@@ -216,7 +216,7 @@ Read each file using the Read tool:
(they are already handled by /autoplan):**
- Preamble (run first)
- AskUserQuestion Format
- Completeness Principle — Boil the Lake
- Completeness Principle — Boil the Ocean
- Search Before Building
- Completion Status Protocol
- Telemetry (run last)
@@ -243,11 +243,17 @@ workflow.
```bash
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || echo off)
_CODEX_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || echo enabled)
source ~/.claude/skills/gstack/bin/gstack-codex-probe
# Master switch first: codex_reviews=disabled turns off ALL Codex work globally,
# including autoplan's own dual-voice orchestration. Honor it before probing.
if [ "$_CODEX_CFG" = "disabled" ]; then
echo "[codex disabled by config — Claude-only voices] Re-enable: gstack-config set codex_reviews enabled"
_CODEX_AVAILABLE=false
# Check Codex binary. If missing, tag the degradation matrix and continue
# with Claude subagent only (autoplan's existing degradation fallback).
if ! command -v codex >/dev/null 2>&1; then
elif ! command -v codex >/dev/null 2>&1; then
_gstack_codex_log_event "codex_cli_missing"
echo "[codex-unavailable: binary not found] — proceeding with Claude subagent only"
_CODEX_AVAILABLE=false
+42 -15
View File
@@ -2,14 +2,7 @@
name: benchmark-models
preamble-tier: 1
version: 1.0.0
description: |
Cross-model benchmark for gstack skills. Runs the same prompt through Claude,
GPT (via Codex CLI), and Gemini side-by-side — compares latency, tokens, cost,
and optionally quality via LLM judge. Answers "which model is actually best
for this skill?" with data instead of vibes. Separate from /benchmark, which
measures web page performance. Use when: "benchmark models", "compare models",
"which model is best for X", "cross-model comparison", "model shootout". (gstack)
Voice triggers (speech-to-text aliases): "compare models", "model shootout", "which model is best".
description: Cross-model benchmark for gstack skills. (gstack)
triggers:
- cross model benchmark
- compare claude gpt gemini
@@ -23,6 +16,18 @@ allowed-tools:
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
## When to invoke this skill
Runs the same prompt through Claude,
GPT (via Codex CLI), and Gemini side-by-side — compares latency, tokens, cost,
and optionally quality via LLM judge. Answers "which model is actually best
for this skill?" with data instead of vibes. Separate from /benchmark, which
measures web page performance. Use when: "benchmark models", "compare models",
"which model is best for X", "cross-model comparison", "model shootout".
Voice triggers (speech-to-text aliases): "compare models", "model shootout", "which model is best".
## Preamble (run first)
```bash
@@ -43,6 +48,16 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
# Conductor host: AskUserQuestion is unreliable here (native disabled, MCP
# variant flaky), so skills render decisions as prose instead of calling the
# tool. Gated on !headless so an eval/CI run INSIDE Conductor (GSTACK_HEADLESS)
# still BLOCKs rather than rendering prose to nobody.
if [ "$_SESSION_KIND" != "headless" ] && { [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ]; }; then
echo "CONDUCTOR_SESSION: true"
fi
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -58,7 +73,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
echo "QUESTION_TUNING: $_QUESTION_TUNING"
mkdir -p ~/.gstack/analytics
if [ "$_TEL" != "off" ]; then
echo '{"skill":"benchmark-models","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
echo '{"skill":"benchmark-models","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
fi
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
if [ -f "$_PF" ]; then
@@ -100,6 +115,19 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
# Claude Code exposes plan mode via system reminders; we detect best-effort
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
# fall back to "inactive". Codex hosts and Claude execution mode both end up
# inactive, which is the safe default (defaults to file+execute pipeline).
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
export GSTACK_PLAN_MODE="active"
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
export GSTACK_PLAN_MODE="active"
else
export GSTACK_PLAN_MODE="inactive"
fi
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
```
@@ -109,7 +137,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -144,7 +172,7 @@ touch ~/.gstack/.writing-style-prompted
Skip if `WRITING_STYLE_PENDING` is `no`.
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Lake** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Ocean** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
```bash
open https://garryslist.org/posts/boil-the-ocean
@@ -155,7 +183,7 @@ Only run `open` if yes. Always run `touch`.
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
Options:
- A) Help gstack get better! (recommended)
@@ -231,6 +259,7 @@ Key routing rules:
- Ship/deploy/PR → invoke /ship or /land-and-deploy
- Save progress → invoke /context-save
- Resume context → invoke /context-restore
- Author a backlog-ready spec/issue → invoke /spec
```
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
@@ -475,9 +504,7 @@ Replace `SKILL_NAME`, `OUTCOME`, and `USED_BROWSE` before running.
## Plan Status Footer
In plan mode before ExitPlanMode: if the plan file lacks `## GSTACK REVIEW REPORT`, run `~/.claude/skills/gstack/bin/gstack-review-read` and append the standard runs/status/findings table. With `NO_REVIEWS` or empty, append a 5-row placeholder with verdict "NO REVIEWS YET — run `/autoplan`". If a richer report exists, skip.
PLAN MODE EXCEPTION — always allowed (it's the plan file).
Skills that run plan reviews (`/plan-*-review`, `/codex review`) include the EXIT PLAN MODE GATE blocking checklist at the end of the skill, which verifies the plan file ends with `## GSTACK REVIEW REPORT` before ExitPlanMode is called. Skills that don't run plan reviews (operational skills like `/ship`, `/qa`, `/review`) typically don't operate in plan mode and have no review report to verify; this footer is a no-op for them. Writing the plan file is the one edit allowed in plan mode.
# /benchmark-models — Cross-Model Skill Benchmark
+41 -14
View File
@@ -2,13 +2,7 @@
name: benchmark
preamble-tier: 1
version: 1.0.0
description: |
Performance regression detection using the browse daemon. Establishes
baselines for page load times, Core Web Vitals, and resource sizes.
Compares before/after on every PR. Tracks performance trends over time.
Use when: "performance", "benchmark", "page speed", "lighthouse", "web vitals",
"bundle size", "load time". (gstack)
Voice triggers (speech-to-text aliases): "speed test", "check performance".
description: Performance regression detection using the browse daemon. (gstack)
triggers:
- performance benchmark
- check page speed
@@ -23,6 +17,17 @@ allowed-tools:
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
## When to invoke this skill
Establishes
baselines for page load times, Core Web Vitals, and resource sizes.
Compares before/after on every PR. Tracks performance trends over time.
Use when: "performance", "benchmark", "page speed", "lighthouse", "web vitals",
"bundle size", "load time".
Voice triggers (speech-to-text aliases): "speed test", "check performance".
## Preamble (run first)
```bash
@@ -43,6 +48,16 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
# Conductor host: AskUserQuestion is unreliable here (native disabled, MCP
# variant flaky), so skills render decisions as prose instead of calling the
# tool. Gated on !headless so an eval/CI run INSIDE Conductor (GSTACK_HEADLESS)
# still BLOCKs rather than rendering prose to nobody.
if [ "$_SESSION_KIND" != "headless" ] && { [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ]; }; then
echo "CONDUCTOR_SESSION: true"
fi
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -58,7 +73,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
echo "QUESTION_TUNING: $_QUESTION_TUNING"
mkdir -p ~/.gstack/analytics
if [ "$_TEL" != "off" ]; then
echo '{"skill":"benchmark","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
echo '{"skill":"benchmark","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
fi
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
if [ -f "$_PF" ]; then
@@ -100,6 +115,19 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
# Claude Code exposes plan mode via system reminders; we detect best-effort
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
# fall back to "inactive". Codex hosts and Claude execution mode both end up
# inactive, which is the safe default (defaults to file+execute pipeline).
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
export GSTACK_PLAN_MODE="active"
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
export GSTACK_PLAN_MODE="active"
else
export GSTACK_PLAN_MODE="inactive"
fi
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
```
@@ -109,7 +137,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -144,7 +172,7 @@ touch ~/.gstack/.writing-style-prompted
Skip if `WRITING_STYLE_PENDING` is `no`.
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Lake** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Ocean** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
```bash
open https://garryslist.org/posts/boil-the-ocean
@@ -155,7 +183,7 @@ Only run `open` if yes. Always run `touch`.
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
Options:
- A) Help gstack get better! (recommended)
@@ -231,6 +259,7 @@ Key routing rules:
- Ship/deploy/PR → invoke /ship or /land-and-deploy
- Save progress → invoke /context-save
- Resume context → invoke /context-restore
- Author a backlog-ready spec/issue → invoke /spec
```
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
@@ -475,9 +504,7 @@ Replace `SKILL_NAME`, `OUTCOME`, and `USED_BROWSE` before running.
## Plan Status Footer
In plan mode before ExitPlanMode: if the plan file lacks `## GSTACK REVIEW REPORT`, run `~/.claude/skills/gstack/bin/gstack-review-read` and append the standard runs/status/findings table. With `NO_REVIEWS` or empty, append a 5-row placeholder with verdict "NO REVIEWS YET — run `/autoplan`". If a richer report exists, skip.
PLAN MODE EXCEPTION — always allowed (it's the plan file).
Skills that run plan reviews (`/plan-*-review`, `/codex review`) include the EXIT PLAN MODE GATE blocking checklist at the end of the skill, which verifies the plan file ends with `## GSTACK REVIEW REPORT` before ExitPlanMode is called. Skills that don't run plan reviews (operational skills like `/ship`, `/qa`, `/review`) typically don't operate in plan mode and have no review report to verify; this footer is a no-op for them. Writing the plan file is the one edit allowed in plan mode.
## SETUP (run this check BEFORE any browse command)
+61 -2
View File
@@ -56,8 +56,64 @@ if [ ! -e "$AGENTS_LINK" ]; then
ln -s "$REPO_ROOT" "$AGENTS_LINK"
fi
# 6. Run setup via the symlink so it detects .claude/skills/ as its parent
"$GSTACK_LINK/setup"
# 6. Run setup via the symlink so it detects .claude/skills/ as its parent.
#
# Workspace/dev setup MUST be non-interactive: Conductor runs this under a
# forwarded pty, so any `read` in setup (skill-prefix prompt, plan-tune hook
# consent) would hang the workspace forever. Detaching stdin makes every setup
# prompt take its smart non-interactive default (flat skill names, etc.).
#
# `--plan-tune-hooks=prompt` is load-bearing, not redundant: stdin alone only
# suppresses the *prompt* branch. A saved `plan_tune_hooks: yes` or an exported
# GSTACK_PLAN_TUNE_HOOKS=yes would still resolve to "install" and rewrite the
# user's global ~/.claude/settings.json to point at THIS ephemeral worktree —
# which breaks once the workspace is deleted. The flag has highest precedence,
# so it pins resolution to "prompt", and closed stdin then makes prompt-mode a
# no-op skip (no install, no decline marker). A dev workspace must never mutate
# global settings.json. To install the hooks, run `./setup --plan-tune-hooks`
# directly (outside dev-setup). Saved prefix/other config preferences still apply.
#
# GSTACK_SKIP_GBRAIN_REGEN=1 is passed INLINE (not exported) so it scopes to
# exactly this nested setup call and can't leak into any other setup path. It
# tells setup NOT to regenerate the gbrain :user variant into the tracked
# worktree (that would dirty checked-in source). We render it into an untracked
# per-workspace dir below instead.
GSTACK_SKIP_GBRAIN_REGEN=1 "$GSTACK_LINK/setup" --plan-tune-hooks=prompt </dev/null
# 7. Brain-aware (gbrain) blocks — render into an untracked workspace dir.
#
# The worktree's SKILL.md files stay canonical (the guard above). If gbrain is
# installed, render the :user variant (with GBRAIN_CONTEXT_LOAD +
# GBRAIN_SAVE_RESULTS) into .claude/gstack-rendered (gitignored, per-workspace)
# and repoint the workspace's SKILL.md symlinks at it. gen-skill-docs --out-dir
# also rewrites the section-base path so section reads resolve to the render, not
# the global install. Result: this workspace gets the full gbrain experience
# while git stays clean. Other projects pick up blocks via `gstack-config
# gbrain-refresh` (printed below).
GBRAIN_DETECT="$REPO_ROOT/bin/gstack-gbrain-detect"
RENDER_DIR="$REPO_ROOT/.claude/gstack-rendered"
if [ -x "$GBRAIN_DETECT" ] && "$GBRAIN_DETECT" --is-ok 2>/dev/null; then
echo ""
echo "gbrain detected — rendering brain-aware skills into .claude/gstack-rendered (workspace-only, untracked)..."
rm -rf "$RENDER_DIR"
if ( cd "$REPO_ROOT" && bun run gen:skill-docs:user --host claude --out-dir "$RENDER_DIR" >/dev/null 2>&1 ); then
# Repoint each project-local SKILL.md symlink whose worktree target has a
# rendered counterpart. The skill DIRECTORY name (basename of the symlink
# target's dir) maps to RENDER_DIR/<dir>/SKILL.md, which is robust to
# frontmatter renames and the gstack- prefix on the link name.
repointed=0
for skill_link in "$REPO_ROOT"/.claude/skills/*/SKILL.md; do
[ -L "$skill_link" ] || continue
target="$(readlink "$skill_link")"
skilldir="$(basename "$(dirname "$target")")"
rendered="$RENDER_DIR/$skilldir/SKILL.md"
if [ -f "$rendered" ]; then ln -snf "$rendered" "$skill_link"; repointed=$((repointed + 1)); fi
done
echo " $repointed workspace skills now serve brain-aware blocks (worktree stays canonical)."
else
echo " warning: brain-aware render failed — workspace uses canonical skills."
fi
fi
echo ""
echo "Dev mode active. Skills resolve from this working tree."
@@ -65,4 +121,7 @@ echo " .claude/skills/gstack → $REPO_ROOT"
echo " .agents/skills/gstack → $REPO_ROOT"
echo "Edit any SKILL.md and test immediately — no copy/deploy needed."
echo ""
echo "To make brain-aware blocks live across your OTHER projects too, run:"
echo " gstack-config gbrain-refresh"
echo ""
echo "To tear down: bin/dev-teardown"
+8 -1
View File
@@ -24,9 +24,16 @@ if [ -d "$CLAUDE_SKILLS" ]; then
fi
rmdir "$CLAUDE_SKILLS" 2>/dev/null || true
rmdir "$REPO_ROOT/.claude" 2>/dev/null || true
fi
# ─── Clean up the untracked brain-aware render (bin/dev-setup step 7) ──
RENDER_DIR="$REPO_ROOT/.claude/gstack-rendered"
if [ -d "$RENDER_DIR" ]; then
rm -rf "$RENDER_DIR"
removed+=("claude/gstack-rendered")
fi
rmdir "$REPO_ROOT/.claude" 2>/dev/null || true
# ─── Clean up .agents/skills/ ────────────────────────────────
AGENTS_SKILLS="$REPO_ROOT/.agents/skills"
if [ -d "$AGENTS_SKILLS" ]; then
+11
View File
@@ -227,8 +227,18 @@ projects/*/ceo-plans/*.md
projects/*/ceo-plans/*/*.md
projects/*/designs/*.md
projects/*/designs/*/*.md
# Project-root design / test-plan artifacts written by /office-hours,
# /plan-eng-review, and /autoplan. The skills emit
# `{user}-{branch}-design-{datetime}.md`,
# `{user}-{branch}-test-plan-{datetime}.md`, and
# `{user}-{branch}-eng-review-test-plan-{datetime}.md` at the project
# root (not under designs/), so the existing `designs/*.md` patterns
# miss them. Without these the cross-machine pull on machine B gets
# the referencing CEO plan but not the underlying design / test plan
# (#1452).
projects/*/*-design-*.md
projects/*/*-test-plan-*.md
projects/*/*-eng-review-test-plan-*.md
projects/*/timeline.jsonl
retros/*.md
developer-profile.json
@@ -256,6 +266,7 @@ cat > "$GSTACK_HOME/.brain-privacy-map.json" <<'EOF'
{"pattern": "projects/*/designs/*/*.md", "class": "artifact"},
{"pattern": "projects/*/*-design-*.md", "class": "artifact"},
{"pattern": "projects/*/*-test-plan-*.md", "class": "artifact"},
{"pattern": "projects/*/*-eng-review-test-plan-*.md", "class": "artifact"},
{"pattern": "retros/*.md", "class": "artifact"},
{"pattern": "builder-journey.md", "class": "artifact"},
{"pattern": "projects/*/timeline.jsonl", "class": "behavioral"},
+14 -1
View File
@@ -49,6 +49,19 @@ strip_git() {
echo "${1%.git}"
}
valid_owner_repo() {
local owner_repo="$1"
case "$owner_repo" in
""|/*|*/|*//*)
return 1
;;
esac
case "$owner_repo" in
*/*) return 0 ;;
*) return 1 ;;
esac
}
# Parse to (host, owner_repo) regardless of input shape.
parse_url() {
local u="$1"
@@ -82,7 +95,7 @@ parse_url() {
exit 3
;;
esac
if [ -z "$host" ] || [ -z "$owner_repo" ] || [ "$owner_repo" = "$u" ]; then
if [ -z "$host" ] || ! valid_owner_repo "$owner_repo"; then
echo "gstack-artifacts-url: failed to parse host/owner from: $u" >&2
exit 3
fi
+965
View File
@@ -0,0 +1,965 @@
#!/usr/bin/env bun
/**
* gstack-brain-cache — three-tier cache for brain-aware planning skills.
*
* Subcommands:
* get <entity-name> [--project <slug>] — return digest content; refresh if stale
* refresh [--full] [--entity X] [--project <slug>] — force refresh one or all
* invalidate <entity-name> [--project <slug>] — mark stale; next get triggers cold
* digest <entity-slug> — compress a brain page slug to digest
* meta [--project <slug>] — print _meta.json
*
* (Later commits add: bootstrap [T2b], list [T18], purge [T18], retention sweep [T18].)
*
* Cache layout:
* ~/.gstack/brain-cache/ ← cross-project (user-profile only)
* ~/.gstack/projects/<slug>/brain-cache/ ← per-project (everything else)
*
* Atomic writes via .tmp + rename. Stale-but-usable fallback when brain
* unreachable. Concurrent-refresh dedup is a follow-up commit (T15).
*/
import { existsSync, mkdirSync, readFileSync, writeFileSync, renameSync, statSync, unlinkSync, readdirSync, openSync, closeSync } from 'fs';
import { join, dirname } from 'path';
import { homedir, hostname } from 'os';
import { spawnSync } from 'child_process';
import { execGbrainJson, spawnGbrain } from '../lib/gbrain-exec';
import {
BRAIN_CACHE_ENTITIES,
CACHE_REFRESH_LOCK_TIMEOUT_MS,
GSTACK_SCHEMA_PACK_NAME,
GSTACK_SCHEMA_PACK_VERSION,
SALIENCE_DEFAULT_ALLOWLIST,
type BrainCacheEntity,
} from '../scripts/brain-cache-spec';
// ──────────────────────────────────────────────────────────────────────────
// Paths + meta
// ──────────────────────────────────────────────────────────────────────────
const GSTACK_HOME = process.env.GSTACK_HOME || join(homedir(), '.gstack');
interface CacheMeta {
/** Version of the schema pack the cache was built against. Mismatch → full rebuild. */
schema_version: string;
/** SHA8 hash of the brain MCP endpoint URL (or 'local' for on-disk engines). */
endpoint_hash: string;
/** Per-entity last-refresh epoch ms. Absent → never refreshed. */
last_refresh: Record<string, number>;
/** Per-entity last-attempt epoch ms (even if attempt failed). For stale-but-usable diagnostics. */
last_attempt?: Record<string, number>;
}
/** Returns the directory holding a given entity's cache file. */
export function entityDir(entity: BrainCacheEntity, projectSlug: string | null): string {
if (entity.scope === 'cross-project') {
return join(GSTACK_HOME, 'brain-cache');
}
if (!projectSlug) {
throw new Error(`Per-project entity needs a project slug: ${entity.file}`);
}
return join(GSTACK_HOME, 'projects', projectSlug, 'brain-cache');
}
/** Returns the path to the cache file for a given entity. */
export function entityPath(entityName: string, projectSlug: string | null): string {
const entity = BRAIN_CACHE_ENTITIES[entityName];
if (!entity) throw new Error(`Unknown brain cache entity: ${entityName}`);
return join(entityDir(entity, projectSlug), entity.file);
}
/** Returns the path to the _meta.json for a given scope. */
export function metaPath(scope: 'cross-project' | 'per-project', projectSlug: string | null): string {
if (scope === 'cross-project') {
return join(GSTACK_HOME, 'brain-cache', '_meta.json');
}
if (!projectSlug) throw new Error('Per-project meta needs a project slug');
return join(GSTACK_HOME, 'projects', projectSlug, 'brain-cache', '_meta.json');
}
function loadMeta(scope: 'cross-project' | 'per-project', projectSlug: string | null): CacheMeta {
const path = metaPath(scope, projectSlug);
if (!existsSync(path)) {
return { schema_version: GSTACK_SCHEMA_PACK_VERSION, endpoint_hash: detectEndpointHash(), last_refresh: {}, last_attempt: {} };
}
try {
const parsed = JSON.parse(readFileSync(path, 'utf-8')) as unknown;
// #1879: a valid JSON file can still be the wrong shape. JSON.parse can return
// null/array/string/number, and a partial object can omit last_refresh — three
// consumers (isStale, cmdInvalidate, refreshEntity) dereference meta.last_refresh
// unguarded and crash with a TypeError.
if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) {
return { schema_version: GSTACK_SCHEMA_PACK_VERSION, endpoint_hash: detectEndpointHash(), last_refresh: {}, last_attempt: {} };
}
const meta = parsed as CacheMeta;
// Normalize ONLY the dereferenced maps. Do NOT default schema_version /
// endpoint_hash — leaving them absent makes schemaVersionMismatch() /
// endpointSwitched() correctly force a rebuild (missing identity = mismatch =
// safe). Defaulting them to current values would suppress invalidation and
// trust a stale file of unknown provenance.
meta.last_refresh = meta.last_refresh ?? {};
meta.last_attempt = meta.last_attempt ?? {};
return meta;
} catch {
// Corrupt _meta — start fresh (entries will refresh on next access).
return { schema_version: GSTACK_SCHEMA_PACK_VERSION, endpoint_hash: detectEndpointHash(), last_refresh: {}, last_attempt: {} };
}
}
function saveMeta(scope: 'cross-project' | 'per-project', projectSlug: string | null, meta: CacheMeta): void {
const path = metaPath(scope, projectSlug);
mkdirSync(dirname(path), { recursive: true });
atomicWrite(path, JSON.stringify(meta, null, 2));
}
// ──────────────────────────────────────────────────────────────────────────
// Endpoint hash detection
// ──────────────────────────────────────────────────────────────────────────
import { createHash } from 'crypto';
function sha8(input: string): string {
return createHash('sha256').update(input).digest('hex').slice(0, 8);
}
/**
* Detects the active brain endpoint (MCP URL or 'local') and returns its
* stable identity hash. Used to detect when the user switches brains
* (different endpoint → different cache).
*/
export function detectEndpointHash(): string {
const claudeJsonPath = join(homedir(), '.claude.json');
if (existsSync(claudeJsonPath)) {
try {
const cfg = JSON.parse(readFileSync(claudeJsonPath, 'utf-8'));
const gbrainServer = cfg?.mcpServers?.gbrain;
const url = gbrainServer?.url || gbrainServer?.transport?.url;
if (typeof url === 'string' && url.length > 0) {
return sha8(url);
}
} catch { /* fall through to local */ }
}
// Local engine — no endpoint URL; use a stable literal hash.
return 'local';
}
// ──────────────────────────────────────────────────────────────────────────
// Atomic write (tmp + rename)
// ──────────────────────────────────────────────────────────────────────────
function atomicWrite(path: string, content: string): void {
mkdirSync(dirname(path), { recursive: true });
const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
writeFileSync(tmp, content, 'utf-8');
renameSync(tmp, path);
}
// ──────────────────────────────────────────────────────────────────────────
// Staleness + refresh logic
// ──────────────────────────────────────────────────────────────────────────
/** Returns true if the cached digest is past its TTL. */
function isStale(entityName: string, meta: CacheMeta): boolean {
const entity = BRAIN_CACHE_ENTITIES[entityName];
if (!entity) return true;
const last = meta.last_refresh[entityName];
if (!last) return true;
return Date.now() - last > entity.ttl_ms;
}
/** Returns true if the cache file exists on disk. */
function hasFile(entityName: string, projectSlug: string | null): boolean {
return existsSync(entityPath(entityName, projectSlug));
}
/** Returns true if schema version recorded in meta differs from current pack version. */
function schemaVersionMismatch(meta: CacheMeta): boolean {
return meta.schema_version !== GSTACK_SCHEMA_PACK_VERSION;
}
/** Returns true if endpoint hash recorded in meta differs from current detected endpoint. */
function endpointSwitched(meta: CacheMeta): boolean {
return meta.endpoint_hash !== detectEndpointHash();
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: get
// ──────────────────────────────────────────────────────────────────────────
interface GetResult {
/** Path to the digest file. */
path: string;
/** Cache state: 'warm' (fresh + valid), 'cold-refreshed' (was stale, refreshed inline), 'stale-fallback' (used stale because refresh failed), 'missing' (no cache and no refresh). */
state: 'warm' | 'cold-refreshed' | 'stale-fallback' | 'missing';
/** Optional message for diagnostics. */
message?: string;
}
export function cmdGet(entityName: string, projectSlug: string | null): GetResult {
const entity = BRAIN_CACHE_ENTITIES[entityName];
if (!entity) throw new Error(`Unknown entity: ${entityName}`);
const scope = entity.scope;
const meta = loadMeta(scope, projectSlug);
// Schema-version mismatch → full rebuild (D4 A4).
if (schemaVersionMismatch(meta) || endpointSwitched(meta)) {
rebuildAllForScope(scope, projectSlug);
// After rebuild, meta is fresh; fall through to warm path.
const newMeta = loadMeta(scope, projectSlug);
if (hasFile(entityName, projectSlug) && !isStale(entityName, newMeta)) {
return { path: entityPath(entityName, projectSlug), state: 'warm' };
}
// Rebuild may have failed for this entity specifically.
return { path: entityPath(entityName, projectSlug), state: 'missing', message: 'rebuild after schema/endpoint change' };
}
if (hasFile(entityName, projectSlug) && !isStale(entityName, meta)) {
return { path: entityPath(entityName, projectSlug), state: 'warm' };
}
// Stale or missing — try cold refresh.
const refreshed = refreshEntity(entityName, projectSlug);
if (refreshed) {
return { path: entityPath(entityName, projectSlug), state: 'cold-refreshed' };
}
// Refresh failed. Use stale-but-usable if file exists.
if (hasFile(entityName, projectSlug)) {
return { path: entityPath(entityName, projectSlug), state: 'stale-fallback', message: 'brain unreachable; using stale cache' };
}
// No cache and no refresh = missing.
return { path: entityPath(entityName, projectSlug), state: 'missing', message: 'brain unreachable; no cache available' };
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: refresh
// ──────────────────────────────────────────────────────────────────────────
// ──────────────────────────────────────────────────────────────────────────
// Lockfile dedup (T15 / D3)
// ──────────────────────────────────────────────────────────────────────────
/**
* Returns the lock file path for a project scope. Cross-project entities
* still lock per-project (the project triggering the refresh holds the lock);
* concurrent attempts from different projects on cross-project entities
* serialize naturally because they're rare and the lock window is short.
*/
function lockPath(projectSlug: string | null): string {
const dir = projectSlug
? join(GSTACK_HOME, 'projects', projectSlug, 'brain-cache')
: join(GSTACK_HOME, 'brain-cache');
return join(dir, '.refresh.lock');
}
interface LockHandle {
fd: number;
path: string;
}
/**
* Try to acquire the refresh lock. Returns null when another process holds it
* (and the lock is fresh). Stale locks (process dead OR older than the
* timeout) are taken over.
*/
function tryAcquireLock(projectSlug: string | null): LockHandle | null {
const path = lockPath(projectSlug);
mkdirSync(dirname(path), { recursive: true });
// If a lock exists, see if it's stale
if (existsSync(path)) {
try {
const raw = readFileSync(path, 'utf-8');
const lock = JSON.parse(raw) as { pid: number; host: string; ts: number };
const age = Date.now() - lock.ts;
const sameHost = lock.host === hostname();
const processGone = sameHost && lock.pid > 0 && !isPidAlive(lock.pid);
if (age <= CACHE_REFRESH_LOCK_TIMEOUT_MS && !processGone) {
return null; // someone else holds a fresh lock
}
// Stale: take over
} catch {
// Corrupt lock file → take over
}
}
// Write our lock (best-effort O_EXCL via tmp+rename for atomic creation)
const payload = JSON.stringify({ pid: process.pid, host: hostname(), ts: Date.now() });
const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
try {
writeFileSync(tmp, payload);
renameSync(tmp, path);
} catch (err) {
return null;
}
// Race: another process may have raced us. Re-read and verify ownership.
try {
const raw = readFileSync(path, 'utf-8');
const lock = JSON.parse(raw) as { pid: number; host: string };
if (lock.pid !== process.pid || lock.host !== hostname()) {
return null;
}
} catch {
return null;
}
return { fd: -1, path };
}
function releaseLock(handle: LockHandle): void {
try { unlinkSync(handle.path); } catch { /* best effort */ }
}
function isPidAlive(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch (err: any) {
if (err?.code === 'EPERM') return true; // exists but we don't own it
return false;
}
}
/**
* Run a refresh callback under the project-scoped lock. If another refresh is
* already in flight, returns 'dedup' and the caller can either wait + retry
* (the resolver does this) or fall through to stale-but-usable. Stale locks
* (process dead, or older than CACHE_REFRESH_LOCK_TIMEOUT_MS) are taken over.
*/
export function withRefreshLock<T>(projectSlug: string | null, fn: () => T): T | 'dedup' {
const handle = tryAcquireLock(projectSlug);
if (!handle) return 'dedup';
try {
return fn();
} finally {
releaseLock(handle);
}
}
/** Refreshes one entity from the brain. Returns true on success. */
export function refreshEntity(entityName: string, projectSlug: string | null): boolean {
const entity = BRAIN_CACHE_ENTITIES[entityName];
if (!entity) return false;
// Mark attempt
const meta = loadMeta(entity.scope, projectSlug);
meta.last_attempt = meta.last_attempt || {};
meta.last_attempt[entityName] = Date.now();
// Fetch from brain. The actual fetch logic varies per entity — derived digests
// (recent-decisions, salience) need different queries from direct page reads.
// For T2a we implement the direct-page path; derived digests get filled in by
// the resolver / write-back paths in later commits.
const digestContent = fetchAndCompressEntity(entityName, projectSlug);
if (digestContent === null) {
saveMeta(entity.scope, projectSlug, meta);
return false;
}
// Enforce per-entity budget by truncating from end (oldest items live there
// by convention in our compressor). The per-skill budget is separately
// enforced at preflight injection time.
let final = digestContent;
if (Buffer.byteLength(final, 'utf-8') > entity.budget_bytes) {
final = truncateToBudget(final, entity.budget_bytes);
}
atomicWrite(entityPath(entityName, projectSlug), final);
meta.last_refresh[entityName] = Date.now();
// Keep schema/endpoint identity fresh.
meta.schema_version = GSTACK_SCHEMA_PACK_VERSION;
meta.endpoint_hash = detectEndpointHash();
saveMeta(entity.scope, projectSlug, meta);
return true;
}
/**
* Refresh all entities for a scope (per-project or cross-project).
* Used by --full and by schema/endpoint-change rebuilds.
*/
export function refreshAll(projectSlug: string | null): { success: number; failed: number } {
let success = 0;
let failed = 0;
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
// Cross-project entities only refresh when explicitly targeted via no-slug calls
if (entity.scope === 'cross-project' && projectSlug) continue;
if (entity.scope === 'per-project' && !projectSlug) continue;
if (refreshEntity(name, projectSlug)) success++; else failed++;
}
return { success, failed };
}
/** Rebuild on schema-version mismatch or endpoint switch. Wipes affected scope first. */
function rebuildAllForScope(scope: 'cross-project' | 'per-project', projectSlug: string | null): void {
// Wipe files but preserve dir; meta gets fully rewritten by refreshes below.
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
if (entity.scope !== scope) continue;
const p = entityPath(name, projectSlug);
if (existsSync(p)) {
try { unlinkSync(p); } catch { /* best effort */ }
}
}
// Fresh meta starts here
const fresh: CacheMeta = {
schema_version: GSTACK_SCHEMA_PACK_VERSION,
endpoint_hash: detectEndpointHash(),
last_refresh: {},
last_attempt: {},
};
saveMeta(scope, projectSlug, fresh);
// Refresh all entities in this scope
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
if (entity.scope !== scope) continue;
refreshEntity(name, projectSlug);
}
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: invalidate
// ──────────────────────────────────────────────────────────────────────────
export function cmdInvalidate(entityName: string, projectSlug: string | null): void {
const entity = BRAIN_CACHE_ENTITIES[entityName];
if (!entity) throw new Error(`Unknown entity: ${entityName}`);
const meta = loadMeta(entity.scope, projectSlug);
delete meta.last_refresh[entityName];
saveMeta(entity.scope, projectSlug, meta);
}
// ──────────────────────────────────────────────────────────────────────────
// Fetch + compress per-entity
// ──────────────────────────────────────────────────────────────────────────
/**
* Returns the digest markdown content for an entity, or null if the brain is
* unreachable / the source page doesn't exist.
*
* For T2a we implement the entity → page-slug mapping for the simple cases.
* Derived digests (recent-decisions, salience) get specialized paths.
*/
function fetchAndCompressEntity(entityName: string, projectSlug: string | null): string | null {
switch (entityName) {
case 'user-profile':
return fetchUserProfile();
case 'product':
return fetchProduct(projectSlug);
case 'goals':
return fetchGoals(projectSlug);
case 'developer-persona':
return fetchSimplePage(`gstack/developer-persona/${projectSlug}`);
case 'brand':
return fetchSimplePage(`gstack/brand/${projectSlug}`);
case 'competitive-intel':
return fetchSimplePage(`gstack/competitive-intel/${projectSlug}`);
case 'recent-decisions':
return fetchRecentDecisions(projectSlug);
case 'salience':
// D9 salience allowlist applied in T17 commit; T2a returns raw output for now.
return fetchSalience(projectSlug);
default:
return null;
}
}
/** Generic single-page fetch via `gbrain get`. Returns null on miss/unreachable. */
function fetchSimplePage(slug: string): string | null {
const result = spawnGbrain(['get', slug, '--json'], { timeout: 10_000 });
if (result.status !== 0) return null;
try {
const page = JSON.parse(result.stdout) as { body?: string; title?: string };
if (!page?.body) return null;
return compressPage(slug, page.title || slug, page.body);
} catch {
return null;
}
}
function fetchUserProfile(): string | null {
// The user-slug discovery is implemented in T16 (D4 A3). For T2a we accept
// env GSTACK_USER_SLUG as override, fallback to $USER for direct calls.
const slug = process.env.GSTACK_USER_SLUG || process.env.USER || 'unknown';
return fetchSimplePage(`gstack/user-profile/${slug}`);
}
function fetchProduct(projectSlug: string | null): string | null {
if (!projectSlug) return null;
return fetchSimplePage(`gstack/product/${projectSlug}`);
}
/**
* Goals are LIST queries: all gstack/goal/<project>/* pages.
* Compress the top N by recency.
*/
function fetchGoals(projectSlug: string | null): string | null {
if (!projectSlug) return null;
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string; body?: string }> }>([
'list-pages',
'--type', 'gstack/goal',
'--limit', '10',
'--json',
]);
if (!result?.pages) return null;
const goals = result.pages.filter((p) => p.slug?.startsWith(`gstack/goal/${projectSlug}/`));
if (goals.length === 0) {
// Empty digest is valid (just header + 'no active goals' line)
return `# Active goals (project: ${projectSlug})\n\n_No active goals recorded yet._\n`;
}
const lines = goals.map((g) => `- [[${g.slug}]] — ${g.title || '(untitled)'}`);
return `# Active goals (project: ${projectSlug})\n\n${lines.join('\n')}\n`;
}
/**
* recent-decisions: last 5 gstack/skill-run pages for this project, compressed
* to one-line summaries.
*/
function fetchRecentDecisions(projectSlug: string | null): string | null {
if (!projectSlug) return null;
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string }> }>([
'list-pages',
'--type', 'gstack/skill-run',
'--limit', '5',
'--sort', 'updated_desc',
'--json',
]);
if (!result?.pages) {
return `# Recent decisions (project: ${projectSlug})\n\n_No prior skill runs recorded._\n`;
}
const lines = result.pages.map((p) => `- ${p.title || p.slug}`);
return `# Recent decisions (project: ${projectSlug})\n\n${lines.join('\n')}\n`;
}
/**
* Reads the user's salience allowlist override from gstack-config. If unset,
* returns SALIENCE_DEFAULT_ALLOWLIST. The override is comma-separated; we
* trim and drop empty entries.
*/
export function getSalienceAllowlist(): ReadonlyArray<string> {
// Short-circuit via env var for tests + headless callers.
const env = process.env.GSTACK_SALIENCE_ALLOWLIST;
if (typeof env === 'string' && env.length > 0) {
return env.split(',').map((s) => s.trim()).filter(Boolean);
}
// Shell out to gstack-config with a tight timeout. Falls back to defaults
// on any failure (config script missing, command non-zero, parse error).
try {
const skillRoot = join(homedir(), '.claude', 'skills', 'gstack');
const bin = join(skillRoot, 'bin', 'gstack-config');
if (!existsSync(bin)) return SALIENCE_DEFAULT_ALLOWLIST;
const result = spawnSync(bin, ['get', 'salience_allowlist'], { timeout: 2000, encoding: 'utf-8' });
if (result.status !== 0 || !result.stdout) return SALIENCE_DEFAULT_ALLOWLIST;
const trimmed = result.stdout.trim();
if (!trimmed) return SALIENCE_DEFAULT_ALLOWLIST;
const parts = trimmed.split(',').map((s) => s.trim()).filter(Boolean);
return parts.length > 0 ? parts : SALIENCE_DEFAULT_ALLOWLIST;
} catch {
return SALIENCE_DEFAULT_ALLOWLIST;
}
}
/**
* D9 salience privacy gate: returns true if the slug starts with any allowlisted
* prefix. Anything NOT matching is stripped at digest write time so that family,
* therapy, reflection, and other sensitive content never leaks into work-flow
* planning prompts by default.
*/
export function isSalienceSlugAllowed(slug: string, allowlist: ReadonlyArray<string>): boolean {
for (const prefix of allowlist) {
if (slug.startsWith(prefix)) return true;
}
return false;
}
function fetchSalience(projectSlug: string | null): string | null {
// get-recent-salience is a gbrain CLI sub-shape; we use the MCP-shape JSON
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string; emotional_weight?: number }> }>([
'get-recent-salience',
'--days', '14',
'--limit', '10',
'--json',
]);
if (!result?.pages) return `# Recent salience\n\n_No salient pages in last 14d._\n`;
// D9 privacy gate: strip entries outside the allowlist BEFORE rendering.
// Sensitive personal content (family, therapy, reflection) is never written
// into the digest cache file, even when the brain itself ranks it salient.
const allowlist = getSalienceAllowlist();
const filtered = result.pages.filter((p) => p.slug && isSalienceSlugAllowed(p.slug, allowlist));
const stripped = result.pages.length - filtered.length;
if (filtered.length === 0) {
const header = `# Recent salience (last 14d)`;
const note = stripped > 0
? `\n_All ${stripped} salient entries stripped by allowlist gate (no work-flow content in window)._\n`
: `\n_No salient pages in last 14d._\n`;
return `${header}\n${note}`;
}
const lines = filtered.map((p) => `- [[${p.slug}]] — ${p.title || ''} (weight: ${p.emotional_weight?.toFixed(2) ?? 'n/a'})`);
const footer = stripped > 0
? `\n\n_${stripped} private entries stripped by allowlist gate._`
: '';
return `# Recent salience (last 14d)\n\n${lines.join('\n')}${footer}\n`;
}
/**
* Compress a brain page body into a digest. The compressor keeps frontmatter
* out, trims body to the first H2/H3 sections, and prepends a slug header.
* Per-entity budget enforcement happens at the caller (refreshEntity).
*/
function compressPage(slug: string, title: string, body: string): string {
const trimmed = body
.replace(/^---[\s\S]*?---\s*\n/m, '') // strip frontmatter
.trim();
return `# ${title}\nslug: ${slug}\n\n${trimmed}\n`;
}
/**
* Truncate a digest to a byte budget. Tries to cut at the last newline before
* the budget so the digest stays readable.
*/
function truncateToBudget(content: string, budgetBytes: number): string {
const buf = Buffer.from(content, 'utf-8');
if (buf.byteLength <= budgetBytes) return content;
const truncated = buf.slice(0, budgetBytes).toString('utf-8');
const lastNewline = truncated.lastIndexOf('\n');
const cleanCut = lastNewline > budgetBytes * 0.8 ? truncated.slice(0, lastNewline) : truncated;
return `${cleanCut}\n\n_(digest truncated to ${budgetBytes}-byte budget)_\n`;
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: digest
// ──────────────────────────────────────────────────────────────────────────
/**
* Public: compress a brain page slug to digest format. Used by callers that
* want to know what the digest WOULD look like without writing to cache.
*/
export function cmdDigest(slug: string): string | null {
return fetchSimplePage(slug);
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: meta
// ──────────────────────────────────────────────────────────────────────────
export function cmdMeta(projectSlug: string | null): CacheMeta {
if (projectSlug) return loadMeta('per-project', projectSlug);
return loadMeta('cross-project', null);
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: bootstrap (T2b)
// ──────────────────────────────────────────────────────────────────────────
/**
* Bootstrap synthesizes draft entity content from CLAUDE.md + README +
* recent commits + learnings.jsonl for a fresh project. Emits as JSON for
* the caller (skill template) to AUQ-confirm before any write to the brain.
*
* This keeps the CLI pure (no AUQ logic) while preventing silent
* auto-extraction garbage (D10 T4 fix). The agent is responsible for the
* "Synthesized X — looks right?" prompt per entity.
*/
export interface BootstrapDraft {
product?: { slug: string; title: string; body: string };
goals?: Array<{ slug: string; title: string; body: string }>;
developer_persona?: { slug: string; title: string; body: string };
brand?: { slug: string; title: string; body: string };
competitive_intel?: { slug: string; title: string; body: string };
}
export function cmdBootstrap(projectSlug: string): BootstrapDraft {
const draft: BootstrapDraft = {};
const repoRoot = process.env.GSTACK_REPO_ROOT || process.cwd();
// Product synthesis: CLAUDE.md headline + README first paragraph
let claudeMd = '';
try { claudeMd = readFileSync(join(repoRoot, 'CLAUDE.md'), 'utf-8'); } catch { /* missing is fine */ }
let readmeMd = '';
try { readmeMd = readFileSync(join(repoRoot, 'README.md'), 'utf-8'); } catch { /* missing is fine */ }
const productLead = synthesizeProductLead(claudeMd, readmeMd, projectSlug);
if (productLead) {
draft.product = {
slug: `gstack/product/${projectSlug}`,
title: projectSlug,
body: productLead,
};
}
// Goals: try learnings.jsonl + recent commit messages mentioning "goal" or "ship"
const learningsPath = join(GSTACK_HOME, 'projects', projectSlug, 'learnings.jsonl');
const goalsHints = synthesizeGoalsHints(learningsPath, repoRoot);
if (goalsHints.length > 0) {
draft.goals = goalsHints.slice(0, 3).map((hint, idx) => ({
slug: `gstack/goal/${projectSlug}/bootstrap-${idx + 1}`,
title: hint.title,
body: hint.body,
}));
}
return draft;
}
function synthesizeProductLead(claudeMd: string, readmeMd: string, slug: string): string | null {
// First H1 in CLAUDE.md or README, plus first paragraph after it.
const source = claudeMd || readmeMd;
if (!source) return null;
const h1Match = source.match(/^#\s+(.+)$/m);
const heading = h1Match?.[1]?.trim() || slug;
// First non-heading paragraph
const paraMatch = source.match(/(?:^|\n)([^#\n][^\n]+(?:\n[^#\n][^\n]+)*)/);
const lead = paraMatch?.[1]?.trim() || '(no description found in CLAUDE.md or README)';
return [
`# ${heading}`,
'',
'## What',
lead.slice(0, 500),
'',
'## Stage',
'(fill in current stage, e.g., v1.x shipped, in development, paused)',
'',
'## Team',
'(fill in team composition + size)',
'',
'## Active goals',
'(populated by /office-hours over time)',
'',
'## Recent decisions',
'(populated by /plan-ceo-review over time)',
'',
].join('\n');
}
function synthesizeGoalsHints(learningsPath: string, repoRoot: string): Array<{ title: string; body: string }> {
const hints: Array<{ title: string; body: string }> = [];
if (existsSync(learningsPath)) {
try {
const lines = readFileSync(learningsPath, 'utf-8').split('\n').filter(Boolean);
for (const line of lines.slice(-10)) {
try {
const entry = JSON.parse(line);
if (entry?.insight && (entry?.type === 'pattern' || entry?.type === 'architecture')) {
hints.push({
title: entry.insight.slice(0, 80),
body: `Source: learnings.jsonl\nType: ${entry.type}\n\n${entry.insight}\n`,
});
}
} catch { /* skip malformed line */ }
}
} catch { /* unreadable file, skip */ }
}
return hints;
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: list (T18)
// ──────────────────────────────────────────────────────────────────────────
/**
* Lists all gstack-owned pages currently in the brain for a project, grouped
* by type. Powers the user's ability to audit what gstack has written.
*/
export function cmdList(projectSlug: string | null): Array<{ type: string; slug: string; title?: string }> {
// We probe each gstack/<type>/ namespace via list-pages with a type filter.
const types = ['gstack/user-profile', 'gstack/product', 'gstack/goal', 'gstack/developer-persona', 'gstack/brand', 'gstack/competitive-intel', 'gstack/skill-run', 'gstack/take'];
const all: Array<{ type: string; slug: string; title?: string }> = [];
for (const type of types) {
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string }> }>([
'list-pages',
'--type', type,
'--limit', '200',
'--json',
]);
if (!result?.pages) continue;
for (const page of result.pages) {
if (projectSlug && !page.slug?.includes(`/${projectSlug}`) && type !== 'gstack/user-profile') {
continue;
}
all.push({ type, slug: page.slug, title: page.title });
}
}
return all;
}
// ──────────────────────────────────────────────────────────────────────────
// Subcommand: purge (T18)
// ──────────────────────────────────────────────────────────────────────────
/**
* Delete one gstack-owned page from the brain. Caller (skill template) is
* responsible for the confirm prompt; this is the raw operation.
*/
export function cmdPurge(slug: string): { deleted: boolean; error?: string } {
if (!slug.startsWith('gstack/')) {
return { deleted: false, error: 'refusing to purge non-gstack page' };
}
const result = spawnGbrain(['delete-page', slug], { timeout: 10_000 });
if (result.status !== 0) {
return { deleted: false, error: result.stderr?.trim() || `exit ${result.status}` };
}
// Also invalidate any cached digests that referenced this page.
// Best-effort — derived digests may need explicit invalidate.
return { deleted: true };
}
// ──────────────────────────────────────────────────────────────────────────
// CLI dispatch
// ──────────────────────────────────────────────────────────────────────────
function parseArgs(argv: string[]): { cmd: string; positional: string[]; flags: Record<string, string | boolean> } {
const cmd = argv[2] || '';
const rest = argv.slice(3);
const positional: string[] = [];
const flags: Record<string, string | boolean> = {};
for (let i = 0; i < rest.length; i++) {
const arg = rest[i];
if (arg.startsWith('--')) {
const key = arg.slice(2);
const next = rest[i + 1];
if (next && !next.startsWith('--')) {
flags[key] = next;
i++;
} else {
flags[key] = true;
}
} else {
positional.push(arg);
}
}
return { cmd, positional, flags };
}
function projectSlugFromFlag(flags: Record<string, string | boolean>): string | null {
const v = flags.project;
return typeof v === 'string' ? v : null;
}
function printUsage(): void {
process.stderr.write(`Usage: gstack-brain-cache <subcommand>
Subcommands:
get <entity-name> [--project <slug>]
refresh [--full] [--entity X] [--project <slug>]
invalidate <entity-name> [--project <slug>]
digest <entity-slug>
meta [--project <slug>]
bootstrap --project <slug> — emit synthesized entity drafts (JSON)
list [--project <slug>] — list gstack-owned pages in brain
purge <slug> — delete a gstack-owned brain page (refuses non-gstack/ slugs)
`);
}
async function main(): Promise<number> {
const { cmd, positional, flags } = parseArgs(process.argv);
const projectSlug = projectSlugFromFlag(flags);
try {
switch (cmd) {
case 'get': {
const entityName = positional[0];
if (!entityName) { printUsage(); return 1; }
const result = cmdGet(entityName, projectSlug);
if (result.state === 'missing') {
process.stderr.write(`(${result.state}: ${result.message ?? 'no cache'})\n`);
return 2;
}
if (result.state !== 'warm') {
process.stderr.write(`(${result.state}${result.message ? ': ' + result.message : ''})\n`);
}
process.stdout.write(readFileSync(result.path, 'utf-8'));
return 0;
}
case 'refresh': {
// D3: dedup concurrent refreshes via lockfile. Skipped (dedup) when
// another process is already mid-refresh on the same project.
if (flags.entity) {
const entityName = String(flags.entity);
const result = withRefreshLock(projectSlug, () => refreshEntity(entityName, projectSlug));
if (result === 'dedup') {
process.stderr.write(`(dedup: another refresh in flight)\n`);
return 3;
}
process.stdout.write(result ? `refreshed ${entityName}\n` : `failed to refresh ${entityName}\n`);
return result ? 0 : 1;
}
const allResult = withRefreshLock(projectSlug, () => refreshAll(projectSlug));
if (allResult === 'dedup') {
process.stderr.write(`(dedup: another refresh in flight)\n`);
return 3;
}
process.stdout.write(`refreshed=${allResult.success} failed=${allResult.failed}\n`);
return allResult.failed > 0 ? 1 : 0;
}
case 'invalidate': {
const entityName = positional[0];
if (!entityName) { printUsage(); return 1; }
cmdInvalidate(entityName, projectSlug);
process.stdout.write(`invalidated ${entityName}\n`);
return 0;
}
case 'digest': {
const slug = positional[0];
if (!slug) { printUsage(); return 1; }
const content = cmdDigest(slug);
if (content === null) {
process.stderr.write('brain unreachable or page not found\n');
return 2;
}
process.stdout.write(content);
return 0;
}
case 'meta': {
const meta = cmdMeta(projectSlug);
process.stdout.write(JSON.stringify(meta, null, 2) + '\n');
return 0;
}
case 'bootstrap': {
if (!projectSlug) {
process.stderr.write('bootstrap requires --project <slug>\n');
return 1;
}
const draft = cmdBootstrap(projectSlug);
process.stdout.write(JSON.stringify(draft, null, 2) + '\n');
return 0;
}
case 'list': {
const pages = cmdList(projectSlug);
if (flags.json) {
process.stdout.write(JSON.stringify(pages, null, 2) + '\n');
} else {
for (const p of pages) {
process.stdout.write(`${p.type}\t${p.slug}\t${p.title ?? ''}\n`);
}
}
return 0;
}
case 'purge': {
const slug = positional[0];
if (!slug) { printUsage(); return 1; }
const result = cmdPurge(slug);
if (result.deleted) {
process.stdout.write(`deleted ${slug}\n`);
return 0;
}
process.stderr.write(`failed: ${result.error}\n`);
return 1;
}
case '':
case 'help':
case '--help':
case '-h':
printUsage();
return 0;
default:
process.stderr.write(`unknown subcommand: ${cmd}\n`);
printUsage();
return 1;
}
} catch (err) {
process.stderr.write(`error: ${err instanceof Error ? err.message : String(err)}\n`);
return 1;
}
}
// Only run main when invoked as a script (not when imported by tests)
if (import.meta.main) {
main().then((code) => process.exit(code));
}
+4 -1
View File
@@ -192,7 +192,10 @@ function resolveSkillFile(args: CliArgs): string | null {
function gbrainAvailable(): boolean {
try {
execFileSync("command", ["-v", "gbrain"], { stdio: "ignore" });
execFileSync("gbrain", ["--version"], {
stdio: "ignore",
timeout: MCP_TIMEOUT_MS,
});
return true;
} catch {
return false;
+57 -12
View File
@@ -136,7 +136,11 @@ def load_privacy_map(path):
allowlist_globs = load_lines(allowlist_path)
privacy_map = load_privacy_map(privacy_path)
skip_lines = set(load_lines(skip_path))
# Normalize skip entries to the POSIX form queued paths use, so a backslash
# entry in .brain-skip.txt still matches on Windows. The drain is the safety
# boundary that actually stages files, so it must normalize identically to
# discover_new — otherwise an explicitly-skipped file gets committed.
skip_lines = {s.replace(os.sep, "/") for s in load_lines(skip_path)}
# Read queue; collect unique file paths.
queue_paths = set()
@@ -253,6 +257,8 @@ subcmd_once() {
# Stage with git add -f (forces past .gitignore=*) explicit paths only.
while IFS= read -r p; do
p="${p%$'\r'}" # Windows: compute_paths_to_stage's python print() emits CRLF;
# a trailing CR makes the pathspec match nothing (silent no-stage).
[ -z "$p" ] && continue
git -C "$GSTACK_HOME" add -f -- "$p" 2>/dev/null || true
done < "$paths_file"
@@ -376,10 +382,13 @@ subcmd_discover_new() {
exit 0
fi
# Walk allowlist globs; enqueue any file where mtime+size differs from cursor.
python3 - "$GSTACK_HOME" "$ALLOWLIST" "$DISCOVER_CURSOR" "$SCRIPT_DIR/gstack-brain-enqueue" <<'PYEOF' 2>/dev/null || true
import sys, os, json, glob, fnmatch, subprocess, hashlib
python3 - "$GSTACK_HOME" "$ALLOWLIST" "$DISCOVER_CURSOR" <<'PYEOF' 2>/dev/null || true
import sys, os, json, fnmatch
from datetime import datetime, timezone
gstack_home, allowlist_path, cursor_path, enqueue_bin = sys.argv[1:5]
gstack_home, allowlist_path, cursor_path = sys.argv[1:4]
queue_path = os.path.join(gstack_home, ".brain-queue.jsonl")
skip_path = os.path.join(gstack_home, ".brain-skip.txt")
def load_lines(path):
try:
@@ -403,8 +412,12 @@ def save_cursor(path, data):
pass
allowlist = load_lines(allowlist_path)
# Normalize skip entries to the same POSIX form as `rel` below, so a
# backslash entry in .brain-skip.txt still matches a normalized path on Windows.
skip = {s.replace(os.sep, "/") for s in load_lines(skip_path)}
cursor = load_cursor(cursor_path)
new_cursor = dict(cursor)
to_enqueue = []
# Walk all files under gstack_home, match against allowlist.
for root, dirs, files in os.walk(gstack_home):
@@ -413,22 +426,54 @@ for root, dirs, files in os.walk(gstack_home):
continue
for name in files:
full = os.path.join(root, name)
rel = os.path.relpath(full, gstack_home)
# Repo paths are POSIX-relative. os.path.relpath yields backslash
# separators on Windows, which never match the forward-slash allowlist
# globs (e.g. "projects/*/learnings.jsonl"), so discovery silently
# enqueued nothing under projects/ on Windows. Normalize to "/".
rel = os.path.relpath(full, gstack_home).replace(os.sep, "/")
if rel.startswith(".brain-"):
continue
matched = any(fnmatch.fnmatchcase(rel, pat) for pat in allowlist)
if not matched:
if not any(fnmatch.fnmatchcase(rel, pat) for pat in allowlist):
continue
if rel in skip:
continue
try:
st = os.stat(full)
key = f"{int(st.st_mtime)}:{st.st_size}"
except OSError:
continue
prev = cursor.get(rel)
if prev != key:
# Enqueue via the shim (respects sync mode + skip list).
subprocess.run([enqueue_bin, rel], check=False)
new_cursor[rel] = key
if cursor.get(rel) != key:
to_enqueue.append((rel, key))
# Append to the queue directly. The previous implementation shelled out to
# gstack-brain-enqueue once per file, but Windows Python cannot exec a
# bash-shebang script (the spawn fails with a fork error), so discovery
# enqueued nothing on Windows even after the path-match fix above.
# Writing the queue line here is platform-agnostic; the drain step
# (compute_paths_to_stage) still re-applies the skip-list + privacy filters.
if to_enqueue:
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
try:
# One atomic append per record (O_APPEND, each line < PIPE_BUF), matching
# gstack-brain-enqueue's concurrency contract so a writer-shim append
# running in parallel can't interleave mid-record. Buffered text writes
# don't guarantee that. Compact separators match the shim's JSON shape.
fd = os.open(queue_path, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o644)
try:
for rel, key in to_enqueue:
rec = json.dumps({"file": rel, "ts": ts}, separators=(",", ":"))
os.write(fd, (rec + "\n").encode("utf-8"))
finally:
os.close(fd)
except OSError:
# Queue write failed (disk full, AV file lock). Leave the cursor
# unadvanced so these files are retried on the next discover instead of
# being silently recorded as synced (which loses the change until the
# file next changes).
to_enqueue = []
# Advance the cursor only for records actually written.
for rel, key in to_enqueue:
new_cursor[rel] = key
save_cursor(cursor_path, new_cursor)
PYEOF
+223
View File
@@ -0,0 +1,223 @@
#!/usr/bin/env bash
# gstack-codex-session-import — backfill question-log.jsonl from Codex sessions.
#
# Codex has no AskUserQuestion tool (per docs/spikes/codex-session-format.md).
# gstack skills running on Codex emit Decision Briefs as plain agent_message
# text, and the user's response shows up in the next user_message. This
# importer reconstructs those question/answer pairs from the structured
# JSONL session files at ~/.codex/sessions/<date>/.
#
# Usage:
# gstack-codex-session-import # latest session under ~/.codex/sessions/
# gstack-codex-session-import <path/to.jsonl> # explicit session file
# gstack-codex-session-import --since <iso> # all sessions newer than <iso>
#
# Recovery strategy (two-tier per D5/T4 spike):
# 1. Marker-first: extract <gstack-qid:foo-bar> from agent_message → stable id.
# 2. Pattern fallback: detect D<N> header + numbered options → hash id
# (source=codex-import-pattern, never used as preference key per D18).
#
# Writes via bin/gstack-question-log so source tagging, dedup, and async
# derive all apply uniformly.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
CODEX_SESSIONS_ROOT="${CODEX_SESSIONS_ROOT:-$HOME/.codex/sessions}"
MODE="latest"
EXPLICIT_PATH=""
SINCE_ISO=""
if [ $# -gt 0 ]; then
case "$1" in
--since)
MODE="since"
SINCE_ISO="${2:-}"
;;
--help|-h)
sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||'
exit 0
;;
-*)
echo "unknown flag: $1" >&2
exit 1
;;
*)
MODE="explicit"
EXPLICIT_PATH="$1"
;;
esac
fi
# Resolve list of session files to process.
SESSION_FILES=()
case "$MODE" in
explicit)
if [ ! -f "$EXPLICIT_PATH" ]; then
echo "gstack-codex-session-import: file not found: $EXPLICIT_PATH" >&2
exit 1
fi
SESSION_FILES=("$EXPLICIT_PATH")
;;
latest)
if [ ! -d "$CODEX_SESSIONS_ROOT" ]; then
echo "NO_SESSIONS: $CODEX_SESSIONS_ROOT does not exist"
exit 0
fi
LATEST=$(find "$CODEX_SESSIONS_ROOT" -type f -name "rollout-*.jsonl" -print 2>/dev/null \
| xargs ls -t 2>/dev/null | head -1 || true)
if [ -z "$LATEST" ]; then
echo "NO_SESSIONS: no rollout-*.jsonl files under $CODEX_SESSIONS_ROOT"
exit 0
fi
SESSION_FILES=("$LATEST")
;;
since)
if [ -z "$SINCE_ISO" ]; then
echo "--since requires an ISO 8601 timestamp" >&2
exit 1
fi
while IFS= read -r f; do
SESSION_FILES+=("$f")
done < <(find "$CODEX_SESSIONS_ROOT" -type f -name "rollout-*.jsonl" -newer <(date -u -d "$SINCE_ISO" 2>/dev/null || date -u) 2>/dev/null)
;;
esac
if [ ${#SESSION_FILES[@]} -eq 0 ]; then
echo "NO_SESSIONS: nothing to import"
exit 0
fi
# Parse + extract via bun. Emits one line per question found, ready to pipe
# into gstack-question-log. Tagged with source so downstream consumers
# (/plan-tune stats, dream cycle) can distinguish backfilled events from
# live captures.
IMPORTED=0
SKIPPED_NO_ANSWER=0
for SESSION_FILE in "${SESSION_FILES[@]}"; do
COUNT_LINE=$(SESSION_FILE_PATH="$SESSION_FILE" QLOG_BIN="$SCRIPT_DIR/gstack-question-log" bun -e '
const fs = require("fs");
const path = require("path");
const { spawnSync } = require("child_process");
const crypto = require("crypto");
const sessionPath = process.env.SESSION_FILE_PATH;
const qlogBin = process.env.QLOG_BIN;
const lines = fs.readFileSync(sessionPath, "utf-8").trim().split("\n").filter(Boolean);
let meta = null;
const stream = [];
for (const ln of lines) {
try {
const e = JSON.parse(ln);
if (e.type === "session_meta") meta = e.payload;
else stream.push(e);
} catch {}
}
if (!meta) {
console.error("WARN: no session_meta in " + sessionPath);
console.log("0 0");
process.exit(0);
}
const cwd = meta.cwd || "";
const sessionId = (meta.id || path.basename(sessionPath)).slice(0, 64);
// Walk for agent_message → next user_message pairs.
const briefs = [];
for (let i = 0; i < stream.length; i++) {
const e = stream[i];
if (e.type !== "event_msg" || e.payload?.type !== "agent_message") continue;
const text = String(e.payload?.message || "");
if (!text) continue;
// Detect D-numbered brief or marker. Markers are sufficient on their own.
const markerMatch = text.match(/<gstack-qid:([a-z0-9-]{1,64})>/i);
const dMatch = text.match(/^D\d+[\.\d]*\s*[—\-]\s*(.+?)$/m);
if (!markerMatch && !dMatch) continue;
// Find the next user_message in the stream.
let answer = null;
for (let j = i + 1; j < stream.length; j++) {
const e2 = stream[j];
if (e2.type === "event_msg" && e2.payload?.type === "user_message") {
answer = String(e2.payload?.message || "").trim();
break;
}
}
if (!answer) continue;
// Extract options A) ... B) ... from the brief.
const optMatches = [...text.matchAll(/^([A-Z])\)\s+(.+?)(?:\s+\(recommended\))?$/gm)];
const options = optMatches.map((m) => m[2].trim());
// Identify recommended option (label first, prose fallback).
let recommended;
const recLabel = [...text.matchAll(/^([A-Z])\)\s+(.+?)\s+\(recommended\)$/gm)];
if (recLabel.length === 1) recommended = recLabel[0][2].trim();
// Identify which option the user picked from their answer.
// Look for "A" / "A) ..." / option-label prefix match.
let userChoice = "__unknown__";
const letterMatch = answer.match(/^\s*([A-Z])\b/);
if (letterMatch) {
const idx = letterMatch[1].charCodeAt(0) - 65;
if (idx >= 0 && idx < options.length) userChoice = options[idx];
else userChoice = letterMatch[1];
} else if (options.length > 0) {
const lower = answer.toLowerCase();
const m = options.find((o) => lower.includes(o.toLowerCase().slice(0, 12)));
if (m) userChoice = m;
}
if (userChoice === "__unknown__") {
userChoice = answer.slice(0, 64);
}
const summary = (dMatch?.[1] || text.split("\n")[0]).slice(0, 200);
let questionId, source;
if (markerMatch) {
questionId = markerMatch[1];
source = "codex-import-marker";
} else {
const sortedOpts = [...options].sort().join("|");
const h = crypto.createHash("sha1").update("codex::" + summary + "::" + sortedOpts).digest("hex").slice(0, 10);
questionId = "hook-" + h;
source = "codex-import-pattern";
}
briefs.push({
skill: "codex",
question_id: questionId,
question_summary: summary,
options_count: options.length || 1,
user_choice: userChoice.slice(0, 64),
...(recommended ? { recommended: recommended.slice(0, 64) } : {}),
source,
session_id: sessionId,
// Use ts_nanos+ts shape from the event itself if available; else null.
ts: e.timestamp || undefined,
});
}
let imported = 0;
for (const b of briefs) {
const res = spawnSync(qlogBin, [JSON.stringify(b)], {
encoding: "utf-8",
stdio: ["ignore", "pipe", "pipe"],
// Run from the originating cwd so gstack-slug bucks events into the
// right project. Falls back to the importer cwd if the session cwd
// no longer exists.
cwd: cwd && fs.existsSync(cwd) ? cwd : undefined,
timeout: 5000,
});
if (res.status === 0) imported++;
}
console.log(imported + " 0");
' 2>&1)
IMP=$(echo "$COUNT_LINE" | awk "{print \$1}")
IMPORTED=$((IMPORTED + IMP))
done
echo "IMPORTED: $IMPORTED events from ${#SESSION_FILES[@]} session(s)"
+267 -16
View File
@@ -8,11 +8,13 @@
# gstack-config defaults — show just the defaults table
#
# Env overrides (for testing):
# GSTACK_STATE_ROOT — override ~/.gstack state directory (highest priority,
# matches D16 cathedral isolation convention)
# GSTACK_HOME — override ~/.gstack state directory (aligns with writer scripts)
# GSTACK_STATE_DIR — legacy alias for GSTACK_HOME (kept for backwards compat)
set -euo pipefail
STATE_DIR="${GSTACK_HOME:-${GSTACK_STATE_DIR:-$HOME/.gstack}}"
STATE_DIR="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-${GSTACK_STATE_DIR:-$HOME/.gstack}}}"
CONFIG_FILE="$STATE_DIR/config.yaml"
# Annotated header for new config files. Written once on first `set`.
@@ -73,8 +75,27 @@ CONFIG_HEADER='# gstack configuration — edit freely, changes take effect on ne
# # Set to true once the privacy gate has asked the user.
# # Flip back to false to be re-prompted.
#
# ─── Plan-tune hooks ─────────────────────────────────────────────────
# plan_tune_hooks: prompt # Controls whether ./setup installs the plan-tune
# # Claude Code hooks (PostToolUse capture +
# # PreToolUse preference enforcement).
# # prompt — ask on a real TTY, skip otherwise (default)
# # yes — install non-interactively
# # no — skip non-interactively
# # Override per-run: ./setup --plan-tune-hooks /
# # --no-plan-tune-hooks, or env GSTACK_PLAN_TUNE_HOOKS.
#
# ─── Advanced ────────────────────────────────────────────────────────
# codex_reviews: enabled # disabled = skip Codex adversarial reviews in /ship
# codex_reviews: enabled # Master switch for Codex cross-model review. enabled =
# # Codex runs as a standard step in /review, /ship,
# # /document-release, plan reviews, and /autoplan (auto
# # falls back to a Claude subagent if Codex is missing or
# # not authenticated). disabled = skip all Codex passes.
# # Asymmetry on disabled: diff-review (/review, /ship) still
# # runs the free Claude adversarial subagent; plan-review and
# # /document-release skip the outside-voice step entirely.
# # An invalid value is REJECTED (existing value preserved) so
# # a typo cannot silently turn paid Codex calls on or off.
# gstack_contributor: false # true = file field reports when gstack misbehaves
# skip_eng_review: false # true = skip eng review gate in /ship (not recommended)
#
@@ -100,6 +121,7 @@ lookup_default() {
skill_prefix) echo "false" ;;
checkpoint_mode) echo "explicit" ;;
checkpoint_push) echo "false" ;;
explain_level) echo "default" ;;
codex_reviews) echo "enabled" ;;
gstack_contributor) echo "false" ;;
skip_eng_review) echo "false" ;;
@@ -107,19 +129,145 @@ lookup_default() {
cross_project_learnings) echo "" ;; # intentionally empty → unset triggers first-time prompt
artifacts_sync_mode) echo "off" ;;
artifacts_sync_mode_prompted) echo "false" ;;
plan_tune_hooks) echo "prompt" ;; # prompt | yes | no — controls ./setup plan-tune hook install
redact_repo_visibility) echo "" ;; # empty → fall through to gh/glab detection
redact_prepush_hook) echo "false" ;;
# Brain-aware planning (v1.48 / T5+T10+T16). Defaults documented inline:
# brain_trust_policy@<hash> — unset on fresh install; setup-gbrain
# writes 'personal' for local engines,
# asks the user for remote-ambiguous.
# salience_allowlist — empty falls through to
# SALIENCE_DEFAULT_ALLOWLIST (D9).
# user_slug_at_<hash> — empty triggers resolve-user-slug
# fallback chain (D4 A3) on first call.
brain_trust_policy*) echo "unset" ;;
salience_allowlist) echo "" ;;
user_slug_at_*) echo "" ;;
*) echo "" ;;
esac
}
# ──────────────────────────────────────────────────────────────────────
# Brain-integration helpers (T5+T10+T16)
# ──────────────────────────────────────────────────────────────────────
# Compute sha8 of a string. Used for endpoint hashing.
sha8_of() {
printf '%s' "$1" | shasum -a 256 | cut -c1-8
}
# Detect the active brain endpoint hash. Reads ~/.claude.json for the gbrain
# MCP server URL. Falls back to the literal 'local' when no MCP is configured.
endpoint_hash() {
_claude_json="$HOME/.claude.json"
if [ -f "$_claude_json" ] && command -v jq >/dev/null 2>&1; then
_url=$(jq -r '.mcpServers.gbrain.url // .mcpServers.gbrain.transport.url // empty' "$_claude_json" 2>/dev/null)
if [ -n "$_url" ] && [ "$_url" != "null" ]; then
sha8_of "$_url"
return 0
fi
fi
printf '%s' "local"
}
# Detect endpoint hash collisions. When two distinct endpoints share the same
# sha8 prefix (rare but possible), escalate to sha16 by emitting the longer
# hash. Detection: scan config file for existing brain_trust_policy@<hash> or
# user_slug_at_<hash> keys; if any non-active hash equals the active sha8 but
# would differ at sha16, the active endpoint needs sha16.
endpoint_hash_with_collision_check() {
_active=$(endpoint_hash)
if [ "$_active" = "local" ]; then
printf '%s' "$_active"
return 0
fi
# If a different endpoint (different URL) shares this sha8, escalate.
# We only catch this when the config has another endpoint recorded.
_matching=$(grep -E "^(brain_trust_policy|user_slug_at)@${_active}" "$CONFIG_FILE" 2>/dev/null | head -1 || true)
_claude_json="$HOME/.claude.json"
if [ -n "$_matching" ] && [ -f "$_claude_json" ] && command -v jq >/dev/null 2>&1; then
_url=$(jq -r '.mcpServers.gbrain.url // .mcpServers.gbrain.transport.url // empty' "$_claude_json" 2>/dev/null)
_sha16=$(printf '%s' "$_url" | shasum -a 256 | cut -c1-16)
# Look for any sha16-namespaced key that conflicts. If a stored sha16 exists
# and differs from current sha16, that's the collision evidence; emit sha16.
_stored16=$(grep -E "^(brain_trust_policy|user_slug_at)@${_sha16}" "$CONFIG_FILE" 2>/dev/null | head -1 || true)
if [ -n "$_stored16" ]; then
printf '%s' "$_sha16"
return 0
fi
fi
printf '%s' "$_active"
}
# Resolve the user-slug per D4 A3 chain:
# 1. mcp__gbrain__whoami.client_name (best effort via gbrain CLI shell-out)
# 2. $USER env
# 3. sha8($(git config user.email))
# 4. anonymous-<sha8(hostname)>
# Persists result via gstack-config set user_slug_at_<endpoint-hash> on first call.
resolve_user_slug() {
_hash=$(endpoint_hash_with_collision_check)
_stored=$(grep -E "^user_slug_at_${_hash}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
if [ -n "$_stored" ]; then
printf '%s' "$_stored"
return 0
fi
_slug=""
# Layer 1: gbrain whoami
if command -v gbrain >/dev/null 2>&1; then
_whoami=$(gbrain whoami --json 2>/dev/null || true)
if [ -n "$_whoami" ] && command -v jq >/dev/null 2>&1; then
_client_name=$(printf '%s' "$_whoami" | jq -r '.client_name // .token_name // empty' 2>/dev/null || true)
if [ -n "$_client_name" ] && [ "$_client_name" != "null" ]; then
_slug=$(printf '%s' "$_client_name" | tr '[:upper:] ' '[:lower:]-' | tr -dc '[:alnum:]-')
fi
fi
fi
# Layer 2: $USER
if [ -z "$_slug" ] && [ -n "${USER:-}" ]; then
_slug=$(printf '%s' "$USER" | tr '[:upper:] ' '[:lower:]-' | tr -dc '[:alnum:]-')
fi
# Layer 3: sha8 of git email
if [ -z "$_slug" ]; then
_email=$(git config user.email 2>/dev/null || true)
if [ -n "$_email" ]; then
_slug="email-$(sha8_of "$_email")"
fi
fi
# Layer 4: anonymous-<sha8(hostname)>
if [ -z "$_slug" ]; then
_slug="anonymous-$(sha8_of "$(hostname 2>/dev/null || echo unknown)")"
fi
# Persist via direct file write (avoid recursion into gstack-config set)
mkdir -p "$STATE_DIR"
if [ ! -f "$CONFIG_FILE" ]; then
printf '%s' "$CONFIG_HEADER" > "$CONFIG_FILE"
fi
if ! grep -qE "^user_slug_at_${_hash}:" "$CONFIG_FILE" 2>/dev/null; then
echo "user_slug_at_${_hash}: ${_slug}" >> "$CONFIG_FILE"
fi
printf '%s' "$_slug"
}
case "${1:-}" in
get)
KEY="${2:?Usage: gstack-config get <key>}"
# Validate key (alphanumeric + underscore only)
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+$'; then
echo "Error: key must contain only alphanumeric characters and underscores" >&2
# Validate key (alphanumeric + underscore + optional @<hash> suffix for
# endpoint-namespaced keys introduced by the brain-aware planning layer)
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+(@[a-f0-9]+)?$'; then
echo "Error: key must contain only alphanumeric characters, underscores, and an optional @<hex-hash> suffix" >&2
exit 1
fi
VALUE=$(grep -E "^${KEY}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
# Use literal match for keys containing @ (sha hashes), regex otherwise
VALUE=$(grep -F "${KEY}:" "$CONFIG_FILE" 2>/dev/null | grep -E "^${KEY%@*}(@[a-f0-9]+)?:" | grep -F "${KEY}:" | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
if [ -z "$VALUE" ]; then
VALUE=$(lookup_default "$KEY")
fi
@@ -128,11 +276,17 @@ case "${1:-}" in
set)
KEY="${2:?Usage: gstack-config set <key> <value>}"
VALUE="${3:?Usage: gstack-config set <key> <value>}"
# Validate key (alphanumeric + underscore only)
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+$'; then
echo "Error: key must contain only alphanumeric characters and underscores" >&2
# Validate key (alphanumeric + underscore + optional @<hash> suffix)
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+(@[a-f0-9]+)?$'; then
echo "Error: key must contain only alphanumeric characters, underscores, and an optional @<hex-hash> suffix" >&2
exit 1
fi
# Validate brain_trust_policy value domain (D4 / D11)
if printf '%s' "$KEY" | grep -qE '^brain_trust_policy(@|$)' && \
[ "$VALUE" != "personal" ] && [ "$VALUE" != "shared" ] && [ "$VALUE" != "unset" ]; then
echo "Warning: brain_trust_policy '$VALUE' not recognized. Valid values: personal, shared, unset. Using unset." >&2
VALUE="unset"
fi
# V1: whitelist values for keys with closed value domains. Unknown values warn + default.
if [ "$KEY" = "explain_level" ] && [ "$VALUE" != "default" ] && [ "$VALUE" != "terse" ]; then
echo "Warning: explain_level '$VALUE' not recognized. Valid values: default, terse. Using default." >&2
@@ -142,6 +296,28 @@ case "${1:-}" in
echo "Warning: artifacts_sync_mode '$VALUE' not recognized. Valid values: off, artifacts-only, full. Using off." >&2
VALUE="off"
fi
# redact_repo_visibility: a LOCAL override for repos gh/glab can't read (e.g.
# self-hosted GitLab). It lives in ~/.gstack/config.yaml (never committed), so
# it can't be used to weaken the gate repo-wide for other contributors.
if [ "$KEY" = "redact_repo_visibility" ] && [ "$VALUE" != "public" ] && [ "$VALUE" != "private" ] && [ "$VALUE" != "unknown" ]; then
echo "Warning: redact_repo_visibility '$VALUE' not recognized. Valid values: public, private, unknown. Using unknown." >&2
VALUE="unknown"
fi
if [ "$KEY" = "redact_prepush_hook" ] && [ "$VALUE" != "true" ] && [ "$VALUE" != "false" ]; then
echo "Warning: redact_prepush_hook '$VALUE' not recognized. Valid values: true, false. Using false." >&2
VALUE="false"
fi
if [ "$KEY" = "plan_tune_hooks" ] && [ "$VALUE" != "prompt" ] && [ "$VALUE" != "yes" ] && [ "$VALUE" != "no" ]; then
echo "Warning: plan_tune_hooks '$VALUE' not recognized. Valid values: prompt, yes, no. Using prompt." >&2
VALUE="prompt"
fi
# codex_reviews controls PAID Codex calls. Unlike the warn-and-default keys above,
# an invalid value is REJECTED and the existing setting is left unchanged — a typo
# must never silently flip the switch and turn paid Codex calls on or off.
if [ "$KEY" = "codex_reviews" ] && [ "$VALUE" != "enabled" ] && [ "$VALUE" != "disabled" ]; then
echo "Error: codex_reviews '$VALUE' not recognized. Valid values: enabled, disabled. Existing value left unchanged." >&2
exit 1
fi
mkdir -p "$STATE_DIR"
# Write annotated header on first creation
if [ ! -f "$CONFIG_FILE" ]; then
@@ -169,9 +345,9 @@ case "${1:-}" in
echo ""
echo "# ─── Active values (including defaults for unset keys) ───"
for KEY in proactive routing_declined telemetry auto_upgrade update_check \
skill_prefix checkpoint_mode checkpoint_push codex_reviews \
gstack_contributor skip_eng_review workspace_root \
artifacts_sync_mode artifacts_sync_mode_prompted; do
skill_prefix checkpoint_mode checkpoint_push explain_level \
codex_reviews gstack_contributor skip_eng_review workspace_root \
artifacts_sync_mode artifacts_sync_mode_prompted plan_tune_hooks; do
VALUE=$(grep -E "^${KEY}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
SOURCE="default"
if [ -n "$VALUE" ]; then
@@ -185,14 +361,89 @@ case "${1:-}" in
defaults)
echo "# gstack-config defaults"
for KEY in proactive routing_declined telemetry auto_upgrade update_check \
skill_prefix checkpoint_mode checkpoint_push codex_reviews \
gstack_contributor skip_eng_review workspace_root \
artifacts_sync_mode artifacts_sync_mode_prompted; do
skill_prefix checkpoint_mode checkpoint_push explain_level \
codex_reviews gstack_contributor skip_eng_review workspace_root \
artifacts_sync_mode artifacts_sync_mode_prompted plan_tune_hooks; do
printf ' %-24s %s\n' "$KEY:" "$(lookup_default "$KEY")"
done
;;
endpoint-hash)
# Brain integration helper (T10): print active brain endpoint sha8
endpoint_hash_with_collision_check
;;
resolve-user-slug)
# Brain integration helper (T16 / D4 A3): resolve + persist user-slug
resolve_user_slug
;;
gbrain-refresh)
# Brain integration helper: re-detect gbrain installation state and
# persist to ~/.gstack/gbrain-detection.json. gen-skill-docs reads this
# file (when invoked with --respect-detection) to decide whether to
# render GBRAIN_CONTEXT_LOAD and GBRAIN_SAVE_RESULTS blocks in
# generated SKILL.md files.
#
# Run this after installing or uninstalling gbrain so your locally
# generated SKILL.md files match your installation state.
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
DETECT_BIN="$SCRIPT_DIR/gstack-gbrain-detect"
DETECTION_FILE="$STATE_DIR/gbrain-detection.json"
mkdir -p "$STATE_DIR"
if [ ! -x "$DETECT_BIN" ]; then
echo "gstack-gbrain-detect not found at $DETECT_BIN" >&2
exit 1
fi
if ! "$DETECT_BIN" > "$DETECTION_FILE.tmp" 2>/dev/null; then
printf '{"gbrain_on_path":false,"gbrain_local_status":"no-cli"}\n' > "$DETECTION_FILE.tmp"
fi
mv "$DETECTION_FILE.tmp" "$DETECTION_FILE"
# Summarize for the user. Use python (already required elsewhere) to
# parse the JSON portably; fall back to grep if python is unavailable.
PYTHON_CMD=$(command -v python3 || command -v python || true)
if [ -n "$PYTHON_CMD" ]; then
STATUS=$("$PYTHON_CMD" -c "import json,sys; d=json.load(open('$DETECTION_FILE')); print(d.get('gbrain_local_status','unknown'))" 2>/dev/null || echo unknown)
VERSION=$("$PYTHON_CMD" -c "import json,sys; d=json.load(open('$DETECTION_FILE')); print(d.get('gbrain_version') or 'unknown')" 2>/dev/null || echo unknown)
else
STATUS=$(grep -o '"gbrain_local_status":[[:space:]]*"[^"]*"' "$DETECTION_FILE" | sed 's/.*"\([^"]*\)"$/\1/')
VERSION=$(grep -o '"gbrain_version":[[:space:]]*"[^"]*"' "$DETECTION_FILE" | sed 's/.*"\([^"]*\)"$/\1/')
[ -z "$STATUS" ] && STATUS=unknown
[ -z "$VERSION" ] && VERSION=unknown
fi
case "$STATUS" in
ok)
echo "Detected gbrain v$VERSION."
# Render brain-aware blocks INTO the global install so EVERY project's
# Claude sessions get them (other projects read SKILL.md + sections from
# ~/.claude/skills/gstack via absolute paths baked at gen time). Guards
# (never mutate an arbitrary directory): the target must exist, not be a
# symlink (a symlinked install points at a dev worktree — rendering there
# would dirty tracked source), and look like a real gstack clone.
INSTALL_DIR="$HOME/.claude/skills/gstack"
if [ ! -d "$INSTALL_DIR" ]; then
echo "No global install at $INSTALL_DIR — nothing to render. (Dev workspaces get blocks via bin/dev-setup.)"
elif [ -L "$INSTALL_DIR" ]; then
echo "Skip: $INSTALL_DIR is a symlink (likely a dev worktree). Rendering there would dirty tracked source — run bin/dev-setup in that worktree instead."
elif [ ! -f "$INSTALL_DIR/VERSION" ] || [ ! -f "$INSTALL_DIR/package.json" ]; then
echo "Skip: $INSTALL_DIR doesn't look like a gstack clone (missing VERSION/package.json) — refusing to modify it."
elif ! command -v bun >/dev/null 2>&1; then
echo "Skip: bun not on PATH — can't render. Install bun, then re-run 'gstack-config gbrain-refresh'."
elif ( cd "$INSTALL_DIR" && bun run gen:skill-docs:user --host claude >/dev/null 2>&1 ); then
echo "Rendered brain-aware blocks into $INSTALL_DIR — now live across all your projects' Claude sessions."
echo "Note: this dirties the install's git tree (generated blocks differ from main, by design)."
echo " A 'git reset --hard origin/main' there reverts them; re-run 'gstack-config gbrain-refresh' to restore."
else
echo "Warning: render failed. Run 'cd $INSTALL_DIR && bun run gen:skill-docs:user --host claude' manually to see the error."
fi
;;
*)
echo "gbrain not detected (local-status: $STATUS) → brain-aware blocks will be suppressed in planning-skill SKILL.md files."
echo "Install gbrain (see /setup-gbrain) and re-run 'gstack-config gbrain-refresh' once it's configured."
;;
esac
;;
*)
echo "Usage: gstack-config {get|set|list|defaults} [key] [value]"
echo "Usage: gstack-config {get|set|list|defaults|endpoint-hash|resolve-user-slug|gbrain-refresh} [key] [value]"
exit 1
;;
esac
+89
View File
@@ -0,0 +1,89 @@
#!/usr/bin/env bun
/**
* gstack-decision-log — append a durable decision (or supersede/redact/compact it).
*
* Usage:
* gstack-decision-log '{"decision":"...","rationale":"...","scope":"repo","source":"user"}'
* gstack-decision-log --supersede <decision-id>
* gstack-decision-log --redact <decision-id>
* gstack-decision-log --compact
*
* Event-sourced (lib/gstack-decision): every call appends an event and refreshes the
* bounded active snapshot. NON-INTERACTIVE — never prompts (agents/skills call this;
* a prompt would hang them). Validation + injection + HIGH-secret rejection happen in
* validateDecide; a rejected decision exits 1 with a message, nothing persisted.
*/
import { mkdirSync } from "fs";
import { dirname } from "path";
import { spawnSync } from "child_process";
import {
decisionPaths,
validateDecide,
makeRefEvent,
appendEvent,
rebuildSnapshot,
compact,
type DecisionEvent,
} from "../lib/gstack-decision";
import { resolveSlug, gitBranch, flagValue } from "../lib/bin-context";
const HERE = import.meta.dir;
const args = process.argv.slice(2);
const slug = resolveSlug(`${HERE}/gstack-slug`);
const paths = decisionPaths(slug);
mkdirSync(dirname(paths.log), { recursive: true });
function enqueue(): void {
// Fire-and-forget cross-machine sync (no-op when artifacts_sync is off).
spawnSync(`${HERE}/gstack-brain-enqueue`, [`projects/${slug}/decisions.jsonl`], { stdio: "ignore" });
}
if (args.includes("--compact")) {
const r = compact(paths);
if (r.skipped) {
console.log("compact skipped: a concurrent write/compact is in progress; log left intact — re-run");
process.exit(0);
}
console.log(`compacted: ${r.activeCount} active, ${r.archivedCount} archived, ${r.expungedCount} expunged`);
enqueue();
process.exit(0);
}
const supersedeId = flagValue(args, "--supersede");
const redactId = flagValue(args, "--redact");
if (supersedeId || redactId) {
const kind = supersedeId ? "supersede" : "redact";
const targetId = (supersedeId || redactId) as string;
appendEvent(paths, makeRefEvent(kind, targetId, { source: "agent" }));
rebuildSnapshot(paths);
enqueue();
console.log(`${kind}: ${targetId}`);
process.exit(0);
}
const jsonArg = args.find((a) => !a.startsWith("--"));
if (!jsonArg) {
process.stderr.write(
"gstack-decision-log: provide a JSON decision, or --supersede/--redact <id>, or --compact\n",
);
process.exit(1);
}
let obj: Partial<DecisionEvent>;
try {
obj = JSON.parse(jsonArg);
} catch {
process.stderr.write("gstack-decision-log: invalid JSON\n");
process.exit(1);
}
if (obj.scope === "branch" && !obj.branch) obj.branch = gitBranch();
const res = validateDecide(obj);
if (!res.ok) {
process.stderr.write(`gstack-decision-log: ${res.error}\n`);
process.exit(1);
}
appendEvent(paths, res.event);
rebuildSnapshot(paths);
enqueue();
console.log(res.event.id);
+108
View File
@@ -0,0 +1,108 @@
#!/usr/bin/env bun
/**
* gstack-decision-search — read active decisions (the curated "what did we decide" view).
*
* Usage:
* gstack-decision-search [--query KW] [--scope repo|branch|issue]
* [--branch B] [--issue I] [--recent N] [--all] [--json]
* [--semantic]
*
* Reads the BOUNDED active snapshot (decisions.active.json) — O(active), not a full
* history scan — and rebuilds it from the event log if missing. Scope-filtered to the
* current branch/issue context (recency != relevance). NON-INTERACTIVE. `--all` shows
* superseded decisions too (from the full log). Exit 0 silently when there are none.
*
* `--semantic` (with `--query`) appends an OPTIONAL "related from memory" block from
* gbrain semantic recall. It is a pure enhancement: when gbrain is off/unconfigured/
* empty it degrades silently to the reliable file results above. The reliable path
* never loads gbrain code (the semantic module is imported lazily only here).
*/
import { existsSync } from "fs";
import {
decisionPaths,
readSnapshot,
rebuildSnapshot,
readEvents,
filterByScope,
datamark,
type ActiveDecision,
} from "../lib/gstack-decision";
import { resolveSlug, gitBranch, flagValue } from "../lib/bin-context";
const HERE = import.meta.dir;
const args = process.argv.slice(2);
const slug = resolveSlug(`${HERE}/gstack-slug`);
const paths = decisionPaths(slug);
const queryRaw = flagValue(args, "--query");
const query = queryRaw?.toLowerCase();
const scope = flagValue(args, "--scope");
const branch = flagValue(args, "--branch") ?? gitBranch();
const issue = flagValue(args, "--issue");
const recentRaw = flagValue(args, "--recent");
const recent = recentRaw ? parseInt(recentRaw, 10) : undefined;
const showAll = args.includes("--all");
const asJson = args.includes("--json");
const semantic = args.includes("--semantic");
let rows: ActiveDecision[];
if (showAll) {
// --all includes SUPERSEDED decisions (history), but NEVER redacted ones — a redact
// is an expunge, so it must remove the text from every read path, not just active.
const events = readEvents(paths);
const redacted = new Set(
events.filter((e) => e.kind === "redact" && e.supersedes).map((e) => e.supersedes as string),
);
rows = events.filter((e): e is ActiveDecision => e.kind === "decide" && !redacted.has(e.id));
} else {
rows = readSnapshot(paths);
// Rebuild only when a snapshot is absent but a log exists (don't write a snapshot
// into a nonexistent store on an empty read — just return nothing).
if (!rows.length && existsSync(paths.log)) rows = rebuildSnapshot(paths);
}
rows = filterByScope(rows, { branch, issue });
if (scope) rows = rows.filter((d) => d.scope === scope);
if (query) {
rows = rows.filter((d) =>
[d.decision, d.rationale, d.alternatives_considered]
.filter((s): s is string => typeof s === "string")
.some((s) => s.toLowerCase().includes(query)),
);
}
rows.sort((a, b) => (a.date < b.date ? 1 : a.date > b.date ? -1 : 0)); // newest first
if (recent && recent > 0) rows = rows.slice(0, recent);
if (asJson) {
// --json stays reliable-only (semantic recall is a human-facing supplement).
console.log(JSON.stringify(rows));
process.exit(0);
}
for (const d of rows) {
// Datamark all stored free-text (decision, rationale, branch/issue) — it lands in
// agent context via Context Recovery, so treat it as DATA, not instructions.
const branchTag = d.branch ? `:${datamark(d.branch)}` : "";
const issueTag = d.issue ? `:${datamark(d.issue)}` : "";
const scopeTag = d.scope === "repo" ? "" : ` [${d.scope}${branchTag}${issueTag}]`;
console.log(`- ${datamark(d.decision ?? "")}${scopeTag} (${d.source}, ${d.date.slice(0, 10)})`);
if (d.rationale) console.log(` why: ${datamark(d.rationale)}`);
}
// OPTIONAL gbrain enhancement. Lazy import so the reliable path above never loads
// gbrain code. Degrades silently: null (gbrain off) or [] (nothing found) leaves the
// reliable results above as the answer.
if (semantic && queryRaw) {
const { semanticRecall } = await import("../lib/gstack-decision-semantic");
const hits = semanticRecall(queryRaw);
if (hits && hits.length) {
console.log("\nRelated from memory (gbrain semantic recall):");
for (const h of hits) {
// gbrain hits are EXTERNAL corpus content — datamark slug + snippet too so they
// can't spoof role markers / fences when printed into agent context.
const snip = datamark(h.snippet.length > 100 ? `${h.snippet.slice(0, 100)}…` : h.snippet);
console.log(` [${h.score.toFixed(2)}] ${datamark(h.slug)}: ${snip}`);
}
}
}
+167
View File
@@ -0,0 +1,167 @@
#!/usr/bin/env python3
"""gstack-detach — run a long agent job (evals, benchmarks, syncs) robustly.
Agent-launched long jobs on a shared dev box keep dying to environmental
killers. This tool bakes in the fixes so gstack (and every gstack user) runs
them properly:
* SIGTERM-proof: fork + setsid puts the job in its OWN session, so the
harness's "polite quit" SIGTERM to the launching process group can't reach
it (observed: `script "test:gate" was terminated by signal SIGTERM`).
* No idle-sleep death (macOS): wraps the command in `caffeinate -i`.
* No cross-worktree API saturation: `--lock NAME` takes a machine-wide
advisory lock so concurrent Conductor worktrees SERIALIZE their eval runs
instead of saturating the shared model API (which mass-times-out E2E suites).
* No shared-/tmp collision: a run-scoped log path by default
(~/.gstack-dev/eval-runs/<label>-<slug>-<branch>-<ts>-<pid>.log), so
concurrent runs never clobber or contaminate each other's logs.
* No silent hang: `--timeout SECS` watchdog kills a stalled run, and a
`### gstack-detach EXIT=<code> ###` sentinel is ALWAYS appended on a
terminal path so a poller can tell finished-vs-died (silence != success).
Usage:
gstack-detach [--log PATH] [--lock NAME] [--timeout SECS] [--label LBL] -- CMD [ARGS...]
Prints `gstack-detach LOG <path>` and returns immediately. Poll the log; break
on `### gstack-detach EXIT=` (both success and failure are marked).
Secrets are inherited from the environment ONLY — never pass an API key in argv.
"""
import argparse
import os
import shutil
import signal
import subprocess
import sys
import time
from datetime import datetime, timezone
def _now():
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def _git(*args):
try:
return subprocess.check_output(["git", *args], stderr=subprocess.DEVNULL, text=True).strip()
except Exception:
return ""
def run_scoped_log(label):
base = os.path.expanduser("~/.gstack-dev/eval-runs")
os.makedirs(base, exist_ok=True)
root = _git("rev-parse", "--show-toplevel")
slug = os.path.basename(root) if root else "unknown"
branch = (_git("branch", "--show-current") or "nobranch").replace("/", "-")
stamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
return os.path.join(base, f"{label}-{slug}-{branch}-{stamp}-{os.getpid()}.log")
def log_line(path, msg):
with open(path, "ab", buffering=0) as f:
f.write((msg + "\n").encode("utf-8", "replace"))
def acquire_lock(name, log):
"""Machine-wide advisory lock via fcntl (portable on macOS + Linux). Blocks
until free so concurrent worktrees serialize rather than saturate the API.
Returns the held fd (kept open for the process lifetime)."""
import fcntl
d = os.path.expanduser("~/.gstack/locks")
os.makedirs(d, exist_ok=True)
fd = open(os.path.join(d, f"{name}.lock"), "w")
try:
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
except OSError:
log_line(log, f"### gstack-detach WAITING for lock '{name}' (another run holds it) ### {_now()}")
fcntl.flock(fd, fcntl.LOCK_EX) # block until released
fd.write(f"{os.getpid()} {_now()}\n")
fd.flush()
log_line(log, f"### gstack-detach LOCK '{name}' ACQUIRED ### {_now()}")
return fd
def child_run(args, log):
lock_fd = acquire_lock(args.lock, log) if args.lock else None
cmd = args.cmd
if shutil.which("caffeinate"): # macOS: block idle-sleep for the run
cmd = ["caffeinate", "-i", *cmd]
log_line(log, f"### gstack-detach START label={args.label} pgid={os.getpgid(0)} ### {_now()}")
with open(log, "ab", buffering=0) as f:
# start_new_session: the command runs in its OWN process group so the
# watchdog can killpg() it without also killing this supervisor (which
# must survive to write the EXIT sentinel).
proc = subprocess.Popen(
cmd, stdout=f, stderr=subprocess.STDOUT, stdin=subprocess.DEVNULL, start_new_session=True
)
if args.timeout and args.timeout > 0:
try:
code = proc.wait(timeout=args.timeout)
except subprocess.TimeoutExpired:
log_line(log, f"### gstack-detach WATCHDOG fired after {args.timeout}s — killing ### {_now()}")
try:
os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
except Exception:
pass
time.sleep(5)
try:
proc.kill()
except Exception:
pass
code = "timeout"
else:
code = proc.wait()
log_line(log, f"### gstack-detach EXIT={code} ### {_now()}")
if lock_fd:
try:
lock_fd.close()
except Exception:
pass
def main():
ap = argparse.ArgumentParser(add_help=True)
ap.add_argument("--log")
ap.add_argument("--lock")
ap.add_argument("--timeout", type=int, default=0)
ap.add_argument("--label", default="job")
ap.add_argument("cmd", nargs=argparse.REMAINDER)
args = ap.parse_args()
cmd = args.cmd
if cmd and cmd[0] == "--":
cmd = cmd[1:]
if not cmd:
print("gstack-detach: no command given (usage: gstack-detach [opts] -- CMD...)", file=sys.stderr)
sys.exit(2)
args.cmd = cmd
log = args.log or run_scoped_log(args.label)
os.makedirs(os.path.dirname(log) or ".", exist_ok=True)
open(log, "ab").close()
# Detach: fork so the launching shell returns immediately, then setsid in the
# child to escape the harness's process group / controlling terminal.
if os.fork() > 0:
# flush BEFORE os._exit — os._exit skips stdio buffer flush, which would
# otherwise drop this line and leave the caller without the log path.
print(f"gstack-detach LOG {log}", flush=True)
os._exit(0)
os.setsid()
devnull = os.open(os.devnull, os.O_RDWR)
os.dup2(devnull, 0)
lf = os.open(log, os.O_WRONLY | os.O_APPEND | os.O_CREAT, 0o644)
os.dup2(lf, 1)
os.dup2(lf, 2)
try:
child_run(args, log)
except Exception as e: # never leave the log without a terminal marker
log_line(log, f"### gstack-detach ERROR {e!r} ### {_now()}")
log_line(log, f"### gstack-detach EXIT=error ### {_now()}")
os._exit(0)
if __name__ == "__main__":
main()
+74 -5
View File
@@ -17,6 +17,9 @@
# --check-mismatch detect meaningful gaps between declared and observed.
# --migrate migrate builder-profile.jsonl → developer-profile.json.
# Idempotent; archives the source file on success.
# --log-session append a session entry (from /office-hours) to
# sessions[] and update aggregates. Required fields:
# date, mode. Silent skip on invalid input.
#
# Profile file: ~/.gstack/developer-profile.json (unified schema — see
# docs/designs/PLAN_TUNING_V0.md). Event file: ~/.gstack/projects/{SLUG}/
@@ -25,7 +28,8 @@ set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
# GSTACK_STATE_ROOT takes precedence over GSTACK_HOME (test isolation per D16).
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
PROFILE_FILE="$GSTACK_HOME/developer-profile.json"
LEGACY_FILE="$GSTACK_HOME/builder-profile.jsonl"
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
@@ -154,6 +158,65 @@ ensure_profile() {
EOF
}
# -----------------------------------------------------------------------
# Record session: append a session entry from /office-hours to sessions[]
# and update aggregates (signals_accumulated, resources_shown, topics).
# Fix for #1671: the writer side of the v1.0.0.0 migration. Reader and
# writer now share the same file.
# Silent skip on invalid input (matches gstack-timeline-log:22-26 pattern).
# -----------------------------------------------------------------------
do_log_session() {
local INPUT="${1:-}"
if [ -z "$INPUT" ]; then
return 0
fi
# Validate: input must be parseable JSON with required fields (date, mode).
if ! printf '%s' "$INPUT" | bun -e "
const j = JSON.parse(await Bun.stdin.text());
if (!j.date || !j.mode) process.exit(1);
" 2>/dev/null; then
return 0
fi
ensure_profile
local TMPOUT
TMPOUT=$(mktemp "$GSTACK_HOME/developer-profile.json.XXXXXX.tmp")
trap 'rm -f "$TMPOUT"' EXIT
PROFILE_FILE_PATH="$PROFILE_FILE" RECORD_INPUT="$INPUT" TMPOUT_PATH="$TMPOUT" bun -e "
const fs = require('fs');
const entry = JSON.parse(process.env.RECORD_INPUT);
if (!entry.ts) entry.ts = new Date().toISOString();
const profile = JSON.parse(fs.readFileSync(process.env.PROFILE_FILE_PATH, 'utf-8'));
profile.sessions = profile.sessions || [];
profile.sessions.push(entry);
profile.signals_accumulated = profile.signals_accumulated || {};
for (const s of (entry.signals || [])) {
profile.signals_accumulated[s] = (profile.signals_accumulated[s] || 0) + 1;
}
profile.resources_shown = profile.resources_shown || [];
const resSet = new Set(profile.resources_shown);
for (const r of (entry.resources_shown || [])) resSet.add(r);
profile.resources_shown = Array.from(resSet);
profile.topics = profile.topics || [];
const topicSet = new Set(profile.topics);
for (const t of (entry.topics || [])) topicSet.add(t);
profile.topics = Array.from(topicSet);
fs.writeFileSync(process.env.TMPOUT_PATH, JSON.stringify(profile, null, 2));
"
mv "$TMPOUT" "$PROFILE_FILE"
trap - EXIT
"$SCRIPT_DIR/gstack-brain-enqueue" "developer-profile.json" 2>/dev/null &
}
# -----------------------------------------------------------------------
# Read: emit legacy KEY: VALUE output for /office-hours compat.
# -----------------------------------------------------------------------
@@ -168,14 +231,19 @@ do_read() {
else if (count >= 4) tier = 'regular';
else if (count >= 1) tier = 'welcome_back';
const last = sessions[count - 1] || {};
const prev = sessions[count - 2] || {};
// LAST_* / CROSS_PROJECT must reflect real sessions, not resource-tracking
// events (the Phase 6 auto-append). Without this filter, a session's
// resources entry written immediately after the real session would clobber
// LAST_PROJECT/LAST_ASSIGNMENT/LAST_DESIGN_TITLE.
const realSessions = sessions.filter(e => e.mode !== 'resources');
const last = realSessions[realSessions.length - 1] || {};
const prev = realSessions[realSessions.length - 2] || {};
const crossProject = prev.project_slug && last.project_slug
? prev.project_slug !== last.project_slug
: false;
const designs = sessions.map(e => e.design_doc || '').filter(Boolean);
const designTitles = sessions
const designs = realSessions.map(e => e.design_doc || '').filter(Boolean);
const designTitles = realSessions
.map(e => (e.design_doc ? (e.project_slug || 'unknown') : ''))
.filter(Boolean);
@@ -441,6 +509,7 @@ case "$CMD" in
--vibe) do_vibe ;;
--check-mismatch) do_check_mismatch ;;
--migrate) do_migrate ;;
--log-session) do_log_session "$@" ;;
--help|-h) sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||' ;;
*)
echo "gstack-developer-profile: unknown subcommand '$CMD'" >&2
+5 -2
View File
@@ -57,7 +57,7 @@ while IFS= read -r f; do
*.md) DOCS=true ;;
# Config
package.json|package-lock.json|yarn.lock|bun.lockb) CONFIG=true ;;
package.json|package-lock.json|yarn.lock|bun.lock|bun.lockb) CONFIG=true ;;
Gemfile|Gemfile.lock) CONFIG=true ;;
*.yml|*.yaml) CONFIG=true ;;
.github/*) CONFIG=true ;;
@@ -75,7 +75,10 @@ while IFS= read -r f; do
# Backend: everything else that's code (excluding views/components already matched)
*.rb|*.py|*.go|*.rs|*.java|*.php|*.ex|*.exs) BACKEND=true ;;
*.ts|*.js) BACKEND=true ;; # Non-component TS/JS is backend
# Non-component TS/JS is backend. Include ESM/CJS (.mjs/.cjs) and
# explicit-module TS (.mts/.cts) — #1810: these matched no category, so an
# ESM/CJS-only PR skipped the backend reviewer entirely.
*.ts|*.js|*.mjs|*.cjs|*.mts|*.cts) BACKEND=true ;;
esac
done <<< "$FILES"
+181
View File
@@ -0,0 +1,181 @@
#!/usr/bin/env bash
# gstack-distill-apply — apply a single distillation proposal after user Y.
#
# Plan-tune cathedral T11. Reads distillation-proposals.json, applies the
# Nth proposal to the right surface:
#
# preference → gstack-question-preference --write
# declared-nudge → atomic update to ~/.gstack/developer-profile.json declared
# memory-nugget → append to ~/.gstack/free-text-memory.json (local fallback)
#
# Always confirm before calling this from the skill — the bin assumes the user
# already approved (Codex #15 trust boundary). The skill template (/plan-tune
# distill review section) handles the confirm UX.
#
# gbrain integration: when gbrain is configured, the skill template ALSO
# invokes mcp__gbrain__put_page / extract_facts / add_tag in the same turn
# (those are MCP tools, not CLI-callable). Pass --gbrain-published true to
# mark the proposal as mirrored to gbrain. The local file always gets the
# write so it's the durable source-of-truth even on machines without gbrain.
#
# Usage:
# gstack-distill-apply --proposal <N> # apply Nth proposal
# gstack-distill-apply --proposal <N> --gbrain-published true
# gstack-distill-apply --list # show pending proposals
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
SLUG="${SLUG:-unknown}"
PROJECT_DIR="$GSTACK_HOME/projects/$SLUG"
PROPOSAL_FILE="$PROJECT_DIR/distillation-proposals.json"
MEMORY_FILE="$GSTACK_HOME/free-text-memory.json"
PROFILE_FILE="$GSTACK_HOME/developer-profile.json"
ACTION="apply"
PROPOSAL_IDX=""
GBRAIN_PUBLISHED="false"
while [ $# -gt 0 ]; do
case "$1" in
--proposal) PROPOSAL_IDX="$2"; shift 2 ;;
--gbrain-published) GBRAIN_PUBLISHED="$2"; shift 2 ;;
--list) ACTION="list"; shift ;;
--help|-h)
sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||'
exit 0
;;
*) echo "unknown arg: $1" >&2; exit 1 ;;
esac
done
if [ ! -f "$PROPOSAL_FILE" ]; then
echo "NO_PROPOSALS: $PROPOSAL_FILE missing — run gstack-distill-free-text first"
exit 0
fi
if [ "$ACTION" = "list" ]; then
PROPOSAL_FILE_PATH="$PROPOSAL_FILE" bun -e '
const fs = require("fs");
const p = JSON.parse(fs.readFileSync(process.env.PROPOSAL_FILE_PATH, "utf-8"));
const proposals = p.proposals || [];
if (proposals.length === 0) { console.log("(no proposals)"); process.exit(0); }
console.log("GENERATED: " + p.generated_at);
console.log("SOURCE_EVENTS: " + (p.source_event_count || 0));
proposals.forEach((pr, i) => {
console.log("");
console.log("[" + i + "] " + (pr.kind || "?") + " (confidence: " + (pr.confidence || "?") + ")");
if (pr.rationale) console.log(" rationale: " + pr.rationale);
if (pr.kind === "preference") {
console.log(" question_id: " + pr.question_id);
console.log(" preference: " + pr.preference);
} else if (pr.kind === "declared-nudge") {
console.log(" dimension: " + pr.dimension);
console.log(" direction: " + pr.direction + " (" + (pr.magnitude || "?") + ")");
} else if (pr.kind === "memory-nugget") {
console.log(" nugget: " + pr.nugget);
console.log(" signal_keys: " + JSON.stringify(pr.applies_to_signal_keys || []));
}
if (pr.source_quotes && pr.source_quotes.length) {
console.log(" quotes:");
pr.source_quotes.forEach((q) => console.log(" - \"" + q + "\""));
}
});
'
exit 0
fi
if [ -z "$PROPOSAL_IDX" ]; then
echo "--proposal <N> required" >&2
exit 1
fi
# Apply via bun. Each kind has its own surface.
mkdir -p "$PROJECT_DIR"
PROPOSAL_IDX="$PROPOSAL_IDX" \
PROPOSAL_FILE_PATH="$PROPOSAL_FILE" \
MEMORY_FILE_PATH="$MEMORY_FILE" \
PROFILE_FILE_PATH="$PROFILE_FILE" \
PREF_BIN="$SCRIPT_DIR/gstack-question-preference" \
GBRAIN_PUBLISHED="$GBRAIN_PUBLISHED" \
bun -e '
const fs = require("fs");
const { spawnSync } = require("child_process");
const idx = parseInt(process.env.PROPOSAL_IDX, 10);
const p = JSON.parse(fs.readFileSync(process.env.PROPOSAL_FILE_PATH, "utf-8"));
const proposals = p.proposals || [];
if (!Number.isInteger(idx) || idx < 0 || idx >= proposals.length) {
process.stderr.write("invalid --proposal index " + idx + " (have " + proposals.length + ")\n");
process.exit(1);
}
const pr = proposals[idx];
const stamp = new Date().toISOString();
// Memory-nugget: always write to local file (durable source-of-truth even
// when gbrain is configured — gbrain is mirror, file is canon for the
// PreToolUse hook injection path in Layer 8).
if (pr.kind === "memory-nugget") {
const memPath = process.env.MEMORY_FILE_PATH;
let mem = { nuggets: [] };
try { mem = JSON.parse(fs.readFileSync(memPath, "utf-8")); } catch {}
if (!Array.isArray(mem.nuggets)) mem.nuggets = [];
mem.nuggets.push({
nugget: pr.nugget,
applies_to_signal_keys: pr.applies_to_signal_keys || [],
applied_at: stamp,
gbrain_published: process.env.GBRAIN_PUBLISHED === "true",
source_quotes: pr.source_quotes || [],
});
const tmp = memPath + ".tmp";
fs.writeFileSync(tmp, JSON.stringify(mem, null, 2));
fs.renameSync(tmp, memPath);
console.log("APPLIED: memory-nugget appended to " + memPath);
}
// Preference: route through gstack-question-preference for the user-origin
// gate + event audit trail. source=plan-tune is the allowed value since
// the user opt-in came from inside /plan-tune.
if (pr.kind === "preference") {
const res = spawnSync(process.env.PREF_BIN, [
"--write",
JSON.stringify({
question_id: pr.question_id,
preference: pr.preference,
source: "plan-tune",
free_text: (pr.source_quotes || []).join(" | ").slice(0, 300),
}),
], { encoding: "utf-8", stdio: ["ignore", "pipe", "pipe"], timeout: 5000 });
if (res.status !== 0) {
process.stderr.write("preference apply failed: " + (res.stderr || res.stdout) + "\n");
process.exit(1);
}
console.log("APPLIED: preference " + pr.question_id + " → " + pr.preference);
}
// Declared-nudge: atomic update to developer-profile.json declared. Magnitude
// tiers: small=0.05, medium=0.10, large=0.15. Clamp to [0, 1].
if (pr.kind === "declared-nudge") {
const mag = { small: 0.05, medium: 0.10, large: 0.15 }[pr.magnitude || "small"] || 0.05;
const delta = pr.direction === "down" ? -mag : mag;
const profilePath = process.env.PROFILE_FILE_PATH;
let profile = {};
try { profile = JSON.parse(fs.readFileSync(profilePath, "utf-8")); } catch {}
profile.declared = profile.declared || {};
const cur = typeof profile.declared[pr.dimension] === "number" ? profile.declared[pr.dimension] : 0.5;
const next = Math.max(0, Math.min(1, cur + delta));
profile.declared[pr.dimension] = +next.toFixed(3);
profile.declared_at = stamp;
const tmp = profilePath + ".tmp";
fs.writeFileSync(tmp, JSON.stringify(profile, null, 2));
fs.renameSync(tmp, profilePath);
console.log("APPLIED: declared." + pr.dimension + " " + cur + " → " + profile.declared[pr.dimension]);
}
// Mark the proposal as applied so /plan-tune list shows it consumed.
pr.applied_at = stamp;
pr.gbrain_published = process.env.GBRAIN_PUBLISHED === "true";
const tmp = process.env.PROPOSAL_FILE_PATH + ".tmp";
fs.writeFileSync(tmp, JSON.stringify(p, null, 2));
fs.renameSync(tmp, process.env.PROPOSAL_FILE_PATH);
'
+272
View File
@@ -0,0 +1,272 @@
#!/usr/bin/env bash
# gstack-distill-free-text — Layer 8 "dream cycle" batch distiller.
#
# Reads auq-other free-text events from this project's question-log.jsonl,
# sends them to Claude via the Anthropic SDK, and writes structured proposals
# the user can review via /plan-tune distill. Proposals require explicit
# user Y before applying — never autonomous (Codex #15 trust boundary).
#
# Usage:
# gstack-distill-free-text # sync, prompts at end
# gstack-distill-free-text --background # spawn detached; results
# # surface on next /plan-tune
# gstack-distill-free-text --dry-run # show prompt, no API call
# gstack-distill-free-text --status # show last-run stats
#
# No rate cap — the natural rate of free-text events (rare; user has to type
# "Other" then content) bounds this loop already. Each Haiku call is ~$0.01,
# so even a runaway at one-per-minute would be ~$14/day worst case. The
# cumulative cost log at $GSTACK_STATE_ROOT/distill-cost.jsonl gives full
# auditability via --status when you want it.
# Per D6: Anthropic SDK direct call, fail-loud on missing ANTHROPIC_API_KEY.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
SLUG="${SLUG:-unknown}"
PROJECT_DIR="$GSTACK_HOME/projects/$SLUG"
LOG_FILE="$PROJECT_DIR/question-log.jsonl"
PROPOSAL_FILE="$PROJECT_DIR/distillation-proposals.json"
COST_LOG="$GSTACK_HOME/distill-cost.jsonl"
mkdir -p "$PROJECT_DIR"
MODE="sync"
case "${1:-}" in
--background) MODE="background" ;;
--dry-run) MODE="dry-run" ;;
--status) MODE="status" ;;
--help|-h)
sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||'
exit 0
;;
'') ;;
*) echo "unknown arg: $1" >&2; exit 1 ;;
esac
# --- Status subcommand --------------------------------------------------
if [ "$MODE" = "status" ]; then
COST_LOG_PATH="$COST_LOG" SLUG_PATH="$SLUG" bun -e '
const fs = require("fs");
const slug = process.env.SLUG_PATH;
const path = process.env.COST_LOG_PATH;
if (!fs.existsSync(path)) { console.log("no distill runs yet"); process.exit(0); }
const lines = fs.readFileSync(path, "utf-8").trim().split("\n").filter(Boolean);
const mine = lines.map((l) => JSON.parse(l)).filter((e) => e.slug === slug);
if (mine.length === 0) { console.log("no distill runs yet for slug=" + slug); process.exit(0); }
const totalUsd = mine.reduce((a, e) => a + (e.cost_usd_est || 0), 0);
const todayIso = new Date().toISOString().slice(0, 10);
const today = mine.filter((e) => (e.ts || "").startsWith(todayIso));
const todayUsd = today.reduce((a, e) => a + (e.cost_usd_est || 0), 0);
console.log("RUNS: " + mine.length);
console.log("TODAY: " + today.length + " run(s), $" + todayUsd.toFixed(4));
console.log("ESTIMATED_TOTAL_USD: $" + totalUsd.toFixed(4));
const last = mine[mine.length - 1];
console.log("LAST_RUN: " + (last.ts || "?") + " | " + (last.proposals_count || 0) + " proposals");
'
exit 0
fi
# --- Background mode: detach + invoke self synchronously ---------------
if [ "$MODE" = "background" ]; then
nohup "$0" >/dev/null 2>&1 &
echo "DISTILL_SPAWNED: pid=$!"
exit 0
fi
# No rate cap. Natural input rate (free-text events are rare) + Haiku price
# (~$0.01/run) keep this bounded. Use --status to audit spend.
# --- Gather unprocessed auq-other events from this project -------------
if [ ! -f "$LOG_FILE" ]; then
echo "NO_LOG: no question-log.jsonl in $PROJECT_DIR"
exit 0
fi
EVENTS_JSON=$(LOG_FILE_PATH="$LOG_FILE" bun -e '
const fs = require("fs");
const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").filter(Boolean);
const out = [];
for (const l of lines) {
try {
const e = JSON.parse(l);
if (e.source === "auq-other" && !e.distilled_at && e.free_text) {
out.push({
ts: e.ts,
question_id: e.question_id,
question_summary: e.question_summary,
free_text: e.free_text,
session_id: e.session_id,
});
}
} catch {}
}
process.stdout.write(JSON.stringify(out));
')
EVENT_COUNT=$(printf '%s' "$EVENTS_JSON" | bun -e 'const a = JSON.parse(await Bun.stdin.text()); console.log(a.length);')
if [ "$EVENT_COUNT" -eq 0 ]; then
echo "NO_FREE_TEXT: nothing to distill"
exit 0
fi
# --- Build distill prompt ---------------------------------------------
# Heredoc into temp file (avoids $(cat <<'PROMPT'...) which choked the
# bash parser on apostrophes elsewhere in the script).
DISTILL_PROMPT_FILE=$(mktemp)
trap 'rm -f "$DISTILL_PROMPT_FILE"' EXIT
cat > "$DISTILL_PROMPT_FILE" <<'PROMPT'
You are gstack dream-cycle distiller. Below are free-text responses the
user typed into AskUserQuestion prompts (option "Other") across recent gstack
sessions. For each response, extract structured signal that should update the
user plan-tune profile or preferences.
Return strict JSON with this shape:
{
"proposals": [
{
"kind": "preference" | "declared-nudge" | "memory-nugget",
"confidence": 0.0-1.0,
"source_quotes": ["<verbatim quote 1>", "<verbatim quote 2>"],
"question_id": "<id>",
"preference": "never-ask" | "always-ask" | "ask-only-for-one-way",
"dimension": "scope_appetite | risk_tolerance | detail_preference | autonomy | architecture_care",
"direction": "up | down",
"magnitude": "small | medium | large",
"rationale": "<one sentence>",
"nugget": "<one-line memory>",
"applies_to_signal_keys": ["scope-appetite", "..."]
}
]
}
Rules:
- Reject any proposal where confidence < 0.7.
- Quote VERBATIM from the user free_text. Never paraphrase a source quote.
- A single user response may produce multiple proposals.
- If nothing meaningful to extract, return {"proposals": []}.
- No commentary outside the JSON.
PROMPT
DISTILL_PROMPT=$(cat "$DISTILL_PROMPT_FILE")
# --- Dry-run: emit prompt + events, exit ------------------------------
if [ "$MODE" = "dry-run" ]; then
echo "=== DISTILL PROMPT ==="
echo "$DISTILL_PROMPT"
echo
echo "=== EVENTS ($EVENT_COUNT) ==="
echo "$EVENTS_JSON" | bun -e 'console.log(JSON.stringify(JSON.parse(await Bun.stdin.text()), null, 2));'
exit 0
fi
# --- SDK call: fail-loud on missing key -------------------------------
if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
cat <<EOF >&2
gstack-distill-free-text: ANTHROPIC_API_KEY not set.
Dream-cycle distillation needs an API key for the SDK call. Set
ANTHROPIC_API_KEY in your environment, or run with --dry-run to see
what would be sent without actually calling.
Note: this is a separate billing/auth surface from your interactive
Claude Code session (per Codex correction in D6).
EOF
exit 1
fi
# Run the SDK call in bun. Emits JSON: {proposals_count, cost_usd_est}.
RESULT=$(EVENTS_JSON="$EVENTS_JSON" DISTILL_PROMPT="$DISTILL_PROMPT" \
PROPOSAL_FILE_PATH="$PROPOSAL_FILE" LOG_FILE_PATH="$LOG_FILE" \
ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
bun --cwd "$ROOT_DIR" -e '
const fs = require("fs");
const Anthropic = require("@anthropic-ai/sdk").default;
const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
const events = JSON.parse(process.env.EVENTS_JSON);
const prompt = process.env.DISTILL_PROMPT + "\n\nFREE-TEXT RESPONSES (JSON array):\n" + JSON.stringify(events, null, 2);
// Pricing (Haiku 4.5 — cheap, fast, sufficient for structured extraction).
// Per token, USD: input $0.001/1k = 1e-6, output $0.005/1k = 5e-6.
const INPUT_PER_TOKEN = 1e-6;
const OUTPUT_PER_TOKEN = 5e-6;
const resp = await client.messages.create({
model: "claude-haiku-4-5-20251001",
max_tokens: 4096,
messages: [{ role: "user", content: prompt }],
});
const text = resp.content.map((b) => (b.type === "text" ? b.text : "")).join("");
// Strip optional fenced code blocks the model may wrap JSON in.
const stripped = text.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim();
let parsed;
try { parsed = JSON.parse(stripped); } catch (e) {
process.stderr.write("DISTILL: model returned non-JSON: " + text.slice(0, 200) + "\n");
process.exit(1);
}
const proposals = Array.isArray(parsed.proposals) ? parsed.proposals : [];
// Keep only proposals with confidence >= 0.7 (model is told this rule;
// double-check in case it slipped).
const filtered = proposals.filter((p) => typeof p.confidence === "number" && p.confidence >= 0.7);
// Write proposals file (overwrite — only the latest run is reviewable).
fs.writeFileSync(process.env.PROPOSAL_FILE_PATH, JSON.stringify({
generated_at: new Date().toISOString(),
source_event_count: events.length,
proposals: filtered,
}, null, 2));
// Mark source events as distilled_at so they do not re-propose.
// Update question-log.jsonl in place: read all, rewrite with distilled_at
// set on the matching events. Match by ts + question_id.
const logPath = process.env.LOG_FILE_PATH;
const distilledAt = new Date().toISOString();
const matchKeys = new Set(events.map((e) => (e.ts || "") + "::" + (e.question_id || "")));
const lines = fs.readFileSync(logPath, "utf-8").split("\n");
const out = [];
for (const ln of lines) {
if (!ln.trim()) { out.push(ln); continue; }
try {
const e = JSON.parse(ln);
const key = (e.ts || "") + "::" + (e.question_id || "");
if (matchKeys.has(key)) {
e.distilled_at = distilledAt;
out.push(JSON.stringify(e));
} else {
out.push(ln);
}
} catch { out.push(ln); }
}
fs.writeFileSync(logPath, out.join("\n"));
// Cost estimate from usage tokens.
const usage = resp.usage || {};
const inTok = usage.input_tokens || 0;
const outTok = usage.output_tokens || 0;
const cost = inTok * INPUT_PER_TOKEN + outTok * OUTPUT_PER_TOKEN;
process.stdout.write(JSON.stringify({
proposals_count: filtered.length,
rejected_low_confidence: proposals.length - filtered.length,
input_tokens: inTok,
output_tokens: outTok,
cost_usd_est: cost,
}));
')
# Append cost log line.
TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
echo "{\"ts\":\"$TS\",\"slug\":\"$SLUG\",$(echo "$RESULT" | sed 's/^{//; s/}$//')}" >> "$COST_LOG"
echo "DISTILL_COMPLETE:"
echo " proposals_file: $PROPOSAL_FILE"
echo " $RESULT"
+25 -1
View File
@@ -18,7 +18,8 @@
* "gstack_brain_sync_mode": "off"|"artifacts-only"|"full",
* "gstack_brain_git": true|false,
* "gstack_artifacts_remote": "https://..." | "",
* "gbrain_local_status": "ok"|"no-cli"|"missing-config"|"broken-config"|"broken-db"
* "gbrain_local_status": "ok"|"no-cli"|"missing-config"|"broken-config"|"broken-db",
* "gbrain_pooler_mode": "transaction"|"session"|null
* }
*
* Backward compatibility (per plan codex #5): the 9 pre-existing fields stay
@@ -42,6 +43,7 @@ import {
resolveGbrainBin,
readGbrainVersion,
} from "../lib/gbrain-local-status";
import { isTransactionModePooler } from "../lib/gbrain-exec";
const STATE_DIR = process.env.GSTACK_HOME || join(userHome(), ".gstack");
const SCRIPT_DIR = __dirname;
@@ -98,6 +100,17 @@ function detectConfig(): { exists: boolean; engine: "pglite" | "postgres" | null
return { exists: true, engine: null };
}
// --- pooler mode detection (#1435) ---
//
// Reads DATABASE_URL from ~/.gbrain/config.json and checks whether it targets
// a PgBouncer transaction-mode pooler (port 6543). Surfaced so /sync-gbrain
// and /setup-gbrain can advise users when search may require GBRAIN_PREPARE.
function detectPoolerMode(): "transaction" | "session" | "unknown" | null {
const parsed = tryReadJSON(GBRAIN_CONFIG) as { database_url?: string } | null;
if (!parsed?.database_url) return null;
return isTransactionModePooler(parsed.database_url) ? "transaction" : "session";
}
// --- gbrain doctor health (any nonzero exit or non-"ok"/"warnings" status → false) ---
//
// Uses --fast to avoid hanging on a dead DB. Per the local-status classifier
@@ -215,9 +228,20 @@ function main(): void {
gstack_brain_git: detectBrainGit(),
gstack_artifacts_remote: detectArtifactsRemote(),
gbrain_local_status: localEngineStatus({ noCache }),
gbrain_pooler_mode: detectPoolerMode(),
};
process.stdout.write(JSON.stringify(out, null, 2) + "\n");
}
// --is-ok: live engine-status gate. Exits 0 iff gbrain is usable ("ok"), 1
// otherwise. Runs detection live (never reads the possibly-stale
// gbrain-detection.json), so callers — setup, bin/dev-setup, and
// `gstack-config gbrain-refresh` — can decide whether to render the gbrain
// :user variant without duplicating the JSON grep. Prints nothing on stdout.
if (process.argv.includes("--is-ok")) {
const noCache = process.env.GSTACK_DETECT_NO_CACHE === "1";
process.exit(localEngineStatus({ noCache }) === "ok" ? 0 : 1);
}
main();
+110 -11
View File
@@ -19,9 +19,14 @@
# - git
# - network reachability to https://github.com
#
# The pinned commit is declared here rather than resolved dynamically so
# upgrades are explicit and reviewable. Update PINNED_COMMIT when gstack
# verifies compatibility with a new gbrain release.
# gbrain installs at the latest default-branch HEAD by default — the hard pin
# was removed in #1744 (it had drifted ~23 versions behind). Pass
# --pinned-commit <sha> to install a specific commit for reproducibility. A
# minimum-version floor (MIN_GBRAIN_VERSION) hard-fails the install when the
# resulting gbrain is too old for gstack's sync integration, and a fast
# `gbrain doctor` self-test hard-fails a broken install when gbrain is already
# configured. This keeps the version gate that the pin used to provide without
# freezing users 23 releases behind.
#
# Env:
# GBRAIN_INSTALL_DIR — override default install path (~/gbrain)
@@ -33,8 +38,14 @@
set -euo pipefail
# --- defaults ---
PINNED_COMMIT="08b3698e90532b7b66c445e6b1d8cdfe71822802" # gbrain v0.18.2
PINNED_TAG="v0.18.2"
# No version pin by default — install the latest default-branch HEAD (#1744).
# --pinned-commit <sha> overrides for reproducibility.
PINNED_COMMIT=""
PINNED_TAG=""
# Minimum gbrain version gstack's integration is known to work with. The
# `sources list --json` wrapped-object shape + federated sources landed by 0.20;
# older predates the surface gstack drives. Hard-fail below this floor (#1744).
MIN_GBRAIN_VERSION="0.20.0"
GBRAIN_REPO_URL="https://github.com/garrytan/gbrain.git"
DEFAULT_INSTALL_DIR="${GBRAIN_INSTALL_DIR:-$HOME/gbrain}"
INSTALL_DIR="$DEFAULT_INSTALL_DIR"
@@ -113,7 +124,7 @@ elif [ -n "$DETECTED_CLONE" ]; then
else
# Fresh clone path.
if $DRY_RUN; then
log "DRY RUN: would clone $GBRAIN_REPO_URL @ $PINNED_COMMIT → $INSTALL_DIR"
log "DRY RUN: would clone $GBRAIN_REPO_URL ${PINNED_COMMIT:+@ $PINNED_COMMIT }→ $INSTALL_DIR (latest HEAD unless --pinned-commit)"
exit 0
fi
if [ -d "$INSTALL_DIR" ]; then
@@ -121,8 +132,12 @@ else
fi
log "cloning $GBRAIN_REPO_URL → $INSTALL_DIR"
git clone --quiet "$GBRAIN_REPO_URL" "$INSTALL_DIR"
( cd "$INSTALL_DIR" && git checkout --quiet "$PINNED_COMMIT" )
log "pinned to $PINNED_COMMIT${PINNED_TAG:+ ($PINNED_TAG)}"
if [ -n "$PINNED_COMMIT" ]; then
( cd "$INSTALL_DIR" && git checkout --quiet "$PINNED_COMMIT" )
log "checked out pinned commit $PINNED_COMMIT${PINNED_TAG:+ ($PINNED_TAG)}"
else
log "installed latest gbrain (default-branch HEAD)"
fi
fi
if $DRY_RUN; then
@@ -131,9 +146,24 @@ if $DRY_RUN; then
fi
# --- install + link ---
# On Windows MSYS/Cygwin shells, bun's postinstall scripts (notably gbrain's
# native-bindings setup) fail to parse path arguments correctly and abort
# `bun install` with a non-zero exit. The package itself installs fine
# without scripts, so detect Windows and pass --ignore-scripts there. The
# `bun link` step below is unaffected.
IS_WINDOWS=0
case "$(uname -s)" in
MINGW*|MSYS*|CYGWIN*|Windows_NT) IS_WINDOWS=1 ;;
esac
if ! $VALIDATE_ONLY; then
log "running bun install in $INSTALL_DIR"
( cd "$INSTALL_DIR" && bun install --silent )
if [ "$IS_WINDOWS" -eq 1 ]; then
log "running bun install --ignore-scripts in $INSTALL_DIR (Windows shell detected)"
( cd "$INSTALL_DIR" && bun install --silent --ignore-scripts )
else
log "running bun install in $INSTALL_DIR"
( cd "$INSTALL_DIR" && bun install --silent )
fi
log "running bun link in $INSTALL_DIR"
( cd "$INSTALL_DIR" && bun link --silent )
fi
@@ -179,5 +209,74 @@ if [ "$actual_norm" != "$expected_norm" ]; then
fi
log "installed gbrain $actual_version from $INSTALL_DIR"
# --- minimum-version floor (#1744) ---
# Unpinning means new installs track gbrain HEAD. Hard-fail if the resulting
# version is below the floor gstack's sync integration needs — same exit-3 posture
# as the PATH-shadow / version-mismatch failures above. A warning here is exactly
# how the data-loss class slipped through, so this gate fails closed.
version_lt() {
# 0 (true) when $1 < $2 by version sort; equal versions are NOT less-than.
[ "$1" = "$2" ] && return 1
[ "$(printf '%s\n%s\n' "$1" "$2" | sort -V | head -1)" = "$1" ]
}
if version_lt "$actual_norm" "$MIN_GBRAIN_VERSION"; then
echo "" >&2
echo "gstack-gbrain-install: gbrain $actual_version is below the minimum gstack-tested version ($MIN_GBRAIN_VERSION)." >&2
echo " gstack's sync integration needs the v0.20+ source/list surface." >&2
echo " Fix: update the gbrain clone at $INSTALL_DIR to a newer release (git pull), then" >&2
echo " re-run /setup-gbrain. Or pass --pinned-commit <sha> to install a specific newer commit." >&2
echo "" >&2
exit 3
fi
# --- functional self-test when gbrain is already configured (#1744) ---
# When a brain config exists (re-install / detected clone), run a fast doctor as
# a hard gate so a broken gbrain is caught at setup, not at data-loss time.
# Pre-init installs skip this (config not written yet); the full
# `/sync-gbrain --dry-run` self-test runs from /setup-gbrain after `gbrain init`.
_GBRAIN_HOME_CHECK="${GBRAIN_HOME:-$HOME/.gbrain}"
if [ -f "$_GBRAIN_HOME_CHECK/config.json" ]; then
if ! gbrain doctor --fast >/dev/null 2>&1; then
echo "" >&2
echo "gstack-gbrain-install: gbrain $actual_version installed but 'gbrain doctor --fast' failed." >&2
echo " Refusing to leave a broken gbrain in place. Run 'gbrain doctor' to see what's wrong," >&2
echo " fix it, then re-run /setup-gbrain." >&2
echo "" >&2
exit 3
fi
log "gbrain doctor --fast passed"
fi
# v1.40.0.0 post-install validation (T6 / codex review #19): --ignore-scripts
# may skip artifacts gbrain needs at runtime, especially on Windows
# MSYS/MINGW where we DID pass --ignore-scripts. `gbrain --version` above
# already confirmed the binary runs; this second probe checks that the
# subcommand surface is reachable (`sources` is the entry point the sync
# stage hits first). If the probe fails, we warn but don't exit non-zero —
# the user may still be able to use other commands.
if ! gbrain sources --help >/dev/null 2>&1; then
echo "" >&2
echo "gstack-gbrain-install: WARNING — gbrain installed but 'gbrain sources --help' did not exit 0." >&2
if [ "$IS_WINDOWS" -eq 1 ]; then
echo " Windows shells skip bun postinstall scripts; some gbrain features may need native build tools." >&2
echo " If /sync-gbrain fails to find subcommands, install gbrain from a non-MSYS shell," >&2
echo " or run: cd $INSTALL_DIR && bun install (without --ignore-scripts)" >&2
else
echo " This may be a transient gbrain CLI issue or a missing native dependency." >&2
echo " If /sync-gbrain fails, re-run: cd $INSTALL_DIR && bun install" >&2
fi
echo "" >&2
fi
echo ""
echo "Next: gbrain init --pglite (or run /setup-gbrain for the full setup flow)"
if [ -n "${VOYAGE_API_KEY:-}" ]; then
echo "Next: gbrain init --pglite --embedding-model voyage:voyage-code-3 --embedding-dimensions 1024"
echo " (or run /setup-gbrain for the full setup flow)"
else
echo "Next: gbrain init --pglite (or run /setup-gbrain for the full setup flow)"
echo ""
echo "Tip: set VOYAGE_API_KEY before init to use voyage-code-3 (best embedding"
echo "model for code retrieval on Voyage). Without it, gbrain falls back to its"
echo "auto-selected provider (OpenAI when OPENAI_API_KEY is set, etc.)."
fi
+14
View File
@@ -27,8 +27,22 @@
# restore), D16 (pooler URL paste hygiene with redacted preview).
# _gstack_gbrain_validate_varname <name> — returns 0 if usable, 2 otherwise.
# `local LC_ALL=C` is load-bearing twice over:
# 1. In many macOS shells the default locale (e.g. en_US.UTF-8) makes `case`
# glob brackets like `[A-Z]` match lowercase letters too. Without the
# LC_ALL=C pin, names like `lower-case` pass validation and then trip
# `printf -v "$varname"` and `export "$varname"` with "not a valid
# identifier" errors the caller can't easily distinguish from other
# failures.
# 2. `local` is required because this file is documented as a sourced helper
# (see header), so a bare `LC_ALL=C` would mutate the caller's locale for
# the rest of the process — silently affecting downstream `sort`, `tr`,
# and any locale-aware glob in the same shell.
# Together they give ASCII-only bracket semantics on both macOS and Linux
# (matching the documented `[A-Z_][A-Z0-9_]*` contract) without leaking.
_gstack_gbrain_validate_varname() {
local name="$1"
local LC_ALL=C
case "$name" in
[A-Z_][A-Z0-9_]*) return 0 ;;
*) return 2 ;;
+17 -1
View File
@@ -339,7 +339,7 @@ cmd_pooler_url() {
# Prefer the singular Session Pooler config when Supabase returns an
# array (response shape can vary by project state). Fall back to the
# first PRIMARY entry if no "session" pool_mode is present.
local db_user db_host db_port db_name
local db_user db_host db_port db_name pool_mode
local first_or_session
if printf '%s' "$resp" | jq -e 'type == "array"' >/dev/null 2>&1; then
first_or_session=$(printf '%s' "$resp" | jq '[.[] | select(.pool_mode == "session")][0] // .[0]')
@@ -351,11 +351,27 @@ cmd_pooler_url() {
db_host=$(printf '%s' "$first_or_session" | jq -r '.db_host // empty')
db_port=$(printf '%s' "$first_or_session" | jq -r '.db_port // empty')
db_name=$(printf '%s' "$first_or_session" | jq -r '.db_name // empty')
pool_mode=$(printf '%s' "$first_or_session" | jq -r '.pool_mode // empty')
if [ -z "$db_user" ] || [ -z "$db_host" ] || [ -z "$db_port" ] || [ -z "$db_name" ]; then
die "pooler-url: missing pooler config fields (db_user/db_host/db_port/db_name); re-poll or check project state"
fi
# Issue #1301: New Supabase projects' Management API returns a single
# transaction-mode pooler at port 6543, but the shared pooler tenant
# for fresh projects only listens on the session port 5432. Trusting
# db_port verbatim makes `gbrain init` hang to TCP timeout (transaction
# port unreachable) before falling into "tenant not found"-style errors
# that look like auth bugs. Rewrite transaction/6543 -> session/5432.
# Override with GSTACK_SUPABASE_TRUST_API_PORT=1 if a future API version
# starts returning a working transaction port and this rewrite is wrong.
if [ "${GSTACK_SUPABASE_TRUST_API_PORT:-0}" != "1" ] \
&& [ "$pool_mode" = "transaction" ] && [ "$db_port" = "6543" ]; then
echo "pooler-url: API returned transaction pooler (port 6543); shared pooler for new projects listens on session port 5432 — rewriting (set GSTACK_SUPABASE_TRUST_API_PORT=1 to disable)" >&2
db_port=5432
pool_mode="session"
fi
local url="postgresql://${db_user}:${DB_PASS}@${db_host}:${db_port}/${db_name}"
if $json_mode; then
+948 -70
View File
File diff suppressed because it is too large Load Diff
+13 -6
View File
@@ -273,16 +273,23 @@ function resolveClaudeCodeCwd(
return null;
}
function extractCwdFromJsonl(filePath: string): string | null {
export function extractCwdFromJsonl(filePath: string): string | null {
// Read a capped prefix so huge JSONL files don't blow up memory. 64KB
// comfortably fits the largest observed session headers; the old 8KB cap
// would sometimes fall inside a single long line and silently drop the
// project (JSON.parse failure on the truncated tail).
const MAX_BYTES = 64 * 1024;
const MAX_LINES = 30;
try {
// Read only the first 8KB to avoid loading huge JSONL files into memory
const fd = openSync(filePath, "r");
const buf = Buffer.alloc(8192);
const bytesRead = readSync(fd, buf, 0, 8192, 0);
const buf = Buffer.alloc(MAX_BYTES);
const bytesRead = readSync(fd, buf, 0, MAX_BYTES, 0);
closeSync(fd);
const text = buf.toString("utf-8", 0, bytesRead);
const lines = text.split("\n").slice(0, 15);
for (const line of lines) {
// Drop the final segment — it may be an incomplete line at the cap boundary.
const parts = text.split("\n");
const completeLines = parts.length > 1 ? parts.slice(0, -1) : parts;
for (const line of completeLines.slice(0, MAX_LINES)) {
if (!line.trim()) continue;
try {
const obj = JSON.parse(line);
+39
View File
@@ -0,0 +1,39 @@
#!/usr/bin/env bash
# gstack-ios-qa-daemon — Mac-side daemon that brokers tailnet/loopback traffic
# to a connected iPhone running the in-app StateServer over the CoreDevice USB
# tunnel. Single-instance via flock on ~/.gstack/ios-qa-daemon.pid.
#
# Usage:
# gstack-ios-qa-daemon # loopback-only (local USB)
# gstack-ios-qa-daemon --tailnet # additionally open tailnet listener
#
# Environment:
# GSTACK_IOS_DAEMON_PORT — loopback listener port (default 9099)
# GSTACK_IOS_TARGET_UDID — target iOS device UDID (optional; otherwise
# the first paired connected device is used)
# GSTACK_IOS_TARGET_BUNDLE_ID — bundle ID of the iOS app hosting StateServer
# (default com.gstack.iosqa.fixture)
#
# Readiness protocol: prints `READY: port=<n> pid=<pid>` to stdout once both
# listeners are bound. Spawners read stdin with a ~5s timeout to confirm.
#
# Exits cleanly when no active loopback clients are connected AND no remote
# session tokens are outstanding.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
GSTACK_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
ENTRY="$GSTACK_DIR/ios-qa/daemon/src/index.ts"
if [ ! -f "$ENTRY" ]; then
echo "gstack-ios-qa-daemon: missing $ENTRY (gstack install incomplete?)" >&2
exit 1
fi
if ! command -v bun >/dev/null 2>&1; then
echo "gstack-ios-qa-daemon: bun runtime not on PATH — install from https://bun.sh" >&2
exit 1
fi
exec bun run "$ENTRY" "$@"
+28
View File
@@ -0,0 +1,28 @@
#!/usr/bin/env bash
# gstack-ios-qa-mint — manage the tailnet allowlist for remote iOS QA agents.
#
# This is the owner-grant path: it writes identities into the local allowlist
# so a remote agent on the tailnet can self-service mint a session token via
# POST /auth/mint against the daemon.
#
# Run `gstack-ios-qa-mint --help` for full usage.
#
# Allowlist file: ~/.gstack/ios-qa-allowlist.json (mode 0600).
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
GSTACK_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
ENTRY="$GSTACK_DIR/ios-qa/daemon/src/cli-mint.ts"
if [ ! -f "$ENTRY" ]; then
echo "gstack-ios-qa-mint: missing $ENTRY (gstack install incomplete?)" >&2
exit 1
fi
if ! command -v bun >/dev/null 2>&1; then
echo "gstack-ios-qa-mint: bun runtime not on PATH — install from https://bun.sh" >&2
exit 1
fi
exec bun run "$ENTRY" "$@"
+10 -3
View File
@@ -53,18 +53,25 @@ for path in paths:
continue
if line in seen:
continue
# Prefer ISO ts field for sort; fall back to SHA-256.
# Prefer ISO ts field for sort; fall back to SHA-256. The line
# content is the final tiebreaker so the order is total: two
# entries sharing a ts must resolve identically regardless of
# which side they arrive on. Without it, equal-ts entries fall
# back to insertion order (base, ours, theirs), and since ours
# and theirs are swapped depending on which machine runs the
# merge, the two sides produce divergent files that never
# converge.
sort_key = None
try:
obj = json.loads(line)
ts = obj.get('ts') or obj.get('timestamp')
if isinstance(ts, str):
sort_key = (0, ts)
sort_key = (0, ts, line)
except (json.JSONDecodeError, ValueError, TypeError):
pass
if sort_key is None:
h = hashlib.sha256(line.encode('utf-8')).hexdigest()
sort_key = (1, h)
sort_key = (1, h, line)
seen[line] = sort_key
except FileNotFoundError:
# Absent base / absent ours / absent theirs are all valid.
+6 -21
View File
@@ -15,6 +15,7 @@ INPUT="$1"
# Validate and sanitize input
VALIDATED=$(printf '%s' "$INPUT" | bun -e "
import { hasInjection } from '$SCRIPT_DIR/../lib/jsonl-store.ts';
const raw = await Bun.stdin.text();
let j;
try { j = JSON.parse(raw); } catch { process.stderr.write('gstack-learnings-log: invalid JSON, skipping\n'); process.exit(1); }
@@ -47,27 +48,11 @@ if (j.source && !ALLOWED_SOURCES.includes(j.source)) {
process.exit(1);
}
// Content sanitization: strip instruction-like patterns from insight field
// These patterns could be used for prompt injection when learnings are loaded into agent context
if (j.insight) {
const INJECTION_PATTERNS = [
/ignore\s+(all\s+)?previous\s+(instructions|context|rules)/i,
/you\s+are\s+now\s+/i,
/always\s+output\s+no\s+findings/i,
/skip\s+(all\s+)?(security|review|checks)/i,
/override[:\s]/i,
/\bsystem\s*:/i,
/\bassistant\s*:/i,
/\buser\s*:/i,
/do\s+not\s+(report|flag|mention)/i,
/approve\s+(all|every|this)/i,
];
for (const pat of INJECTION_PATTERNS) {
if (pat.test(j.insight)) {
process.stderr.write('gstack-learnings-log: insight contains suspicious instruction-like content, rejected\n');
process.exit(1);
}
}
// Content sanitization: shared injection patterns (lib/jsonl-store.ts, D2A) —
// one audited list across learnings + decisions, no drift.
if (j.insight && hasInjection(j.insight)) {
process.stderr.write('gstack-learnings-log: insight contains suspicious instruction-like content, rejected\n');
process.exit(1);
}
// Inject timestamp if not present
+39 -15
View File
@@ -27,35 +27,53 @@ done
LEARNINGS_FILE="$GSTACK_HOME/projects/$SLUG/learnings.jsonl"
# Collect all JSONL files to search
FILES=()
[ -f "$LEARNINGS_FILE" ] && FILES+=("$LEARNINGS_FILE")
# Collect cross-project JSONL files separately so the trust gate can distinguish
# current-project rows from rows loaded from other projects.
CROSS_FILES=()
if [ "$CROSS_PROJECT" = true ]; then
# Add other projects' learnings (max 5, sorted by mtime)
for f in $(find "$GSTACK_HOME/projects" -name "learnings.jsonl" -not -path "*/$SLUG/*" 2>/dev/null | head -5); do
FILES+=("$f")
done
# Add other projects' learnings (max 5)
while IFS= read -r f; do
CROSS_FILES+=("$f")
[ ${#CROSS_FILES[@]} -ge 5 ] && break
done < <(find "$GSTACK_HOME/projects" -name "learnings.jsonl" -not -path "*/$SLUG/*" 2>/dev/null)
fi
if [ ${#FILES[@]} -eq 0 ]; then
if [ ! -f "$LEARNINGS_FILE" ] && [ ${#CROSS_FILES[@]} -eq 0 ]; then
exit 0
fi
emit_tagged_file() {
local tag="$1"
local file="$2"
local line
while IFS= read -r line || [ -n "$line" ]; do
[ -n "$line" ] && printf '%s\t%s\n' "$tag" "$line"
done < "$file"
}
# Process all files through bun for JSON parsing, decay, dedup, filtering
GSTACK_SEARCH_TYPE="$TYPE" GSTACK_SEARCH_QUERY="$QUERY" GSTACK_SEARCH_LIMIT="$LIMIT" GSTACK_SEARCH_SLUG="$SLUG" GSTACK_SEARCH_CROSS="$CROSS_PROJECT" \
cat "${FILES[@]}" 2>/dev/null | GSTACK_SEARCH_TYPE="$TYPE" GSTACK_SEARCH_QUERY="$QUERY" GSTACK_SEARCH_LIMIT="$LIMIT" GSTACK_SEARCH_SLUG="$SLUG" GSTACK_SEARCH_CROSS="$CROSS_PROJECT" bun -e "
{
[ -f "$LEARNINGS_FILE" ] && emit_tagged_file current "$LEARNINGS_FILE"
if [ ${#CROSS_FILES[@]} -gt 0 ]; then
for f in "${CROSS_FILES[@]}"; do
emit_tagged_file cross "$f"
done
fi
} | GSTACK_SEARCH_TYPE="$TYPE" GSTACK_SEARCH_QUERY="$QUERY" GSTACK_SEARCH_LIMIT="$LIMIT" GSTACK_SEARCH_CROSS="$CROSS_PROJECT" bun -e "
const lines = (await Bun.stdin.text()).trim().split('\n').filter(Boolean);
const now = Date.now();
const type = process.env.GSTACK_SEARCH_TYPE || '';
const queryRaw = (process.env.GSTACK_SEARCH_QUERY || '').toLowerCase();
const queryTokens = queryRaw.split(/\s+/).filter(Boolean);
const limit = parseInt(process.env.GSTACK_SEARCH_LIMIT || '10', 10);
const slug = process.env.GSTACK_SEARCH_SLUG || '';
const entries = [];
for (const line of lines) {
for (const taggedLine of lines) {
try {
const tabIndex = taggedLine.indexOf('\t');
const sourceTag = tabIndex === -1 ? 'current' : taggedLine.slice(0, tabIndex);
const line = tabIndex === -1 ? taggedLine : taggedLine.slice(tabIndex + 1);
const e = JSON.parse(line);
if (!e.key || !e.type) continue;
@@ -69,13 +87,19 @@ for (const line of lines) {
// Determine if this is from the current project or cross-project
// Cross-project entries are tagged for display
const isCrossProject = !line.includes(slug) && process.env.GSTACK_SEARCH_CROSS === 'true';
const isCrossProject = sourceTag === 'cross';
e._crossProject = isCrossProject;
// Trust gate: cross-project learnings only loaded if trusted (user-stated)
// Trust gate: cross-project learnings only loaded if trusted (user-stated).
// This prevents prompt injection from one project's AI-generated learnings
// silently influencing reviews in another project.
if (isCrossProject && e.trusted === false) continue;
// #1745: this is an ALLOWLIST, not a denylist. The old equals-false check
// admitted any row where trusted is missing/undefined (legacy rows written
// before the field existed, hand-edited rows, rows from other tools).
// Require trusted to be exactly true. NOTE: this whole block is a
// double-quoted bun -e string, so bash still does command substitution
// inside it. Keep backticks and dollar-paren out of these comments.
if (isCrossProject && e.trusted !== true) continue;
entries.push(e);
} catch {}
+228 -39
View File
@@ -54,7 +54,7 @@ import {
rmSync,
} from "fs";
import { join, basename, dirname } from "path";
import { execSync, execFileSync, spawnSync, spawn, type ChildProcess } from "child_process";
import { execFileSync, spawnSync, spawn, type ChildProcess } from "child_process";
import { homedir } from "os";
import { createHash } from "crypto";
@@ -64,6 +64,8 @@ import {
detectEngineTier,
withErrorContext,
} from "../lib/gstack-memory-helpers";
import { execGbrainText, spawnGbrainAsync } from "../lib/gbrain-exec";
import { checkOwnedStagingDir, STAGING_MARKER } from "../lib/staging-guard";
// ── Types ──────────────────────────────────────────────────────────────────
@@ -193,7 +195,7 @@ Options:
--all-history Walk transcripts older than 90 days too.
--sources <list> Comma-separated subset: ${ALL_TYPES.join(",")}
--limit <N> Stop after N pages written (smoke testing).
--no-write Skip gbrain put_page calls (still updates state file).
--no-write Skip gbrain put calls (still updates state file).
Used by tests + dry runs without actual ingest.
--scan-secrets Opt-in per-file gitleaks scan during prepare. Off by
default; gstack-brain-sync already gates the git-push
@@ -809,16 +811,14 @@ let _gbrainAvailability: boolean | null = null;
function gbrainAvailable(): boolean {
if (_gbrainAvailability !== null) return _gbrainAvailability;
try {
execSync("command -v gbrain", { stdio: "ignore" });
// Probe `--help` for the `import` subcommand. gbrain v0.20.0+ ships
// `import <dir>` (batch markdown import via path-authoritative slugs).
// If absent, we surface a single clean error here rather than failing
// the whole stage with a confusing usage message from gbrain itself.
const help = execFileSync("gbrain", ["--help"], {
encoding: "utf-8",
timeout: 5000,
stdio: ["ignore", "pipe", "pipe"],
});
// `gbrain --help` probes only CLI availability, not DB connectivity, so
// it doesn't strictly need DATABASE_URL. But routing through the helper
// keeps the invariant test from chasing exceptions per call site.
const help = execGbrainText(["--help"], { timeout: 5000 });
_gbrainAvailability = /^\s+import\s/m.test(help);
} catch {
_gbrainAvailability = false;
@@ -908,13 +908,23 @@ interface StagingResult {
* Filename = `${slug}.md`. mkdir is recursive. Existing files overwrite.
* Errors per-file are collected; the whole batch is best-effort.
*/
/**
* Staging-relative path for a prepared page's slug. Single source of truth so
* writeStaged() (which mints the map) and the resume-path reconstruction (#1802
* C4) compute identical keys if they diverge, readNewFailures() silently stops
* mapping gbrain's failures back to sources and failed files get marked ingested.
*/
export function stagedRelPath(slug: string): string {
return `${slug}.md`;
}
function writeStaged(prepared: PreparedPage[], stagingDir: string): StagingResult {
mkdirSync(stagingDir, { recursive: true });
const stagedPathToSource = new Map<string, string>();
const errors: Array<{ slug: string; error: string }> = [];
let written = 0;
for (const p of prepared) {
const relPath = `${p.slug}.md`;
const relPath = stagedRelPath(p.slug);
const absPath = join(stagingDir, relPath);
try {
mkdirSync(dirname(absPath), { recursive: true });
@@ -979,7 +989,7 @@ function parseImportJson(stdout: string): ImportJsonResult | null {
* staging-dir-relative filename gbrain saw (e.g. "transcripts/foo.md").
* stagedPathToSource maps that back to the original source file.
*/
function readNewFailures(
export function readNewFailures(
syncFailuresPath: string,
preImportOffset: number,
stagedPathToSource: Map<string, string>,
@@ -1062,7 +1072,7 @@ async function probeMode(args: CliArgs): Promise<ProbeReport> {
}
// Per ED2: ~25-35 min for ~11.7K transcripts = ~150ms/page synchronous
// (gitleaks + render + put_page + embedding). Scale linearly.
// (gitleaks + render + put + embedding). Scale linearly.
const estimateMinutes = Math.max(1, Math.round((newCount + updatedCount) * 0.15 / 60));
return {
@@ -1199,6 +1209,17 @@ function preparePages(
function makeStagingDir(): string {
const dir = join(GSTACK_HOME, `.staging-ingest-${process.pid}-${Date.now()}`);
mkdirSync(dir, { recursive: true });
// Mint the ownership marker (#1802) so cleanupStagingDir() and decideResume()
// can prove this dir was created by us before any recursive delete or resume.
// #1802 C5: fail hard if the marker can't be written — a marker-less dir would
// be refused by the guard forever (leaked, never cleaned). Tear down the
// partial dir and rethrow so the caller fails loudly instead of leaking.
try {
writeFileSync(join(dir, STAGING_MARKER), `${process.pid}\n${Date.now()}\n`, "utf-8");
} catch (err) {
try { rmSync(dir, { recursive: true, force: true }); } catch { /* best-effort */ }
throw err;
}
return dir;
}
@@ -1260,8 +1281,21 @@ function isRemoteHttpMcpMode(): boolean {
* cleanup failure.
*/
function cleanupStagingDir(dir: string): void {
// #1802 deletion chokepoint: never recurse-delete a path we cannot PROVE we
// own. A poisoned resume could otherwise route the repo root here.
const verdict = checkOwnedStagingDir(dir, GSTACK_HOME);
if (!verdict.ok) {
console.error(
`[gbrain] staging cleanup REFUSED: "${dir}" is not an owned staging dir ` +
`(${verdict.reason}). Skipping rm -rf to prevent data loss (#1802).`,
);
return;
}
try {
rmSync(dir, { recursive: true, force: true });
// #1802 C5: delete the realpath-resolved dir the guard validated, not the
// raw input — closes the TOCTOU gap where `dir` is a symlink swapped between
// the check above and this rmSync. canonicalPath is always set when ok.
rmSync(verdict.canonicalPath ?? dir, { recursive: true, force: true });
} catch {
// best-effort
}
@@ -1273,13 +1307,39 @@ function cleanupStagingDir(dir: string): void {
* 1. forward the signal to the child (otherwise gbrain orphans, holds the
* PGLite write lock, and burns CPU observed during 2026-05-10 cold-run
* testing)
* 2. synchronously clean up the staging dir BEFORE process.exit (otherwise
* finally blocks in async callers don't run after process.exit from
* inside a signal handler, leaking the staging dir on every interrupt)
* 2. PRESERVE the staging dir when gbrain has written an import-checkpoint
* pointing at it (the next /sync-gbrain run can resume from
* processedIndex+1). Otherwise synchronously clean up before
* process.exit, since `finally` blocks in ingestPass never run after
* process.exit fires from inside a signal handler.
*
* Resume semantics added for #1611: prior behavior unconditionally cleaned
* up the staging dir on SIGTERM, so the gbrain checkpoint always pointed at
* a missing dir and the next run had to restage from scratch.
*/
let _activeImportChild: ChildProcess | null = null;
let _activeStagingDir: string | null = null;
let _signalHandlersInstalled = false;
/**
* Returns true if gbrain has written ~/.gbrain/import-checkpoint.json with
* `dir` matching the current active staging dir. Indicates the next run
* can resume against this staging dir.
*/
function stagingDirIsCheckpointed(stagingDir: string): boolean {
try {
// Read HOME from env so tests can redirect; homedir() caches.
const home = process.env.HOME || homedir();
const cpPath = join(home, ".gbrain", "import-checkpoint.json");
if (!existsSync(cpPath)) return false;
const raw = readFileSync(cpPath, "utf-8");
const cp = JSON.parse(raw) as { dir?: string };
return cp.dir === stagingDir;
} catch {
return false;
}
}
function installSignalForwarder(): void {
if (_signalHandlersInstalled) return;
_signalHandlersInstalled = true;
@@ -1291,11 +1351,24 @@ function installSignalForwarder(): void {
// child may have already exited between the alive-check and the kill
}
}
// Synchronously clean up the active staging dir before exiting. The async
// `finally` blocks in ingestPass never run after process.exit fires from
// inside this handler, so cleanup has to happen here.
if (_activeStagingDir) {
cleanupStagingDir(_activeStagingDir);
if (stagingDirIsCheckpointed(_activeStagingDir)) {
// Preserve for next-run resume. The orchestrator's decideResume()
// (in gstack-gbrain-sync.ts) will see the checkpoint + dir and
// re-invoke gbrain import against this same staging dir, picking
// up from processedIndex+1. See #1611.
try {
process.stderr.write(
`[memory-ingest] ${signal} received — preserving staging dir for resume: ${_activeStagingDir}\n`,
);
} catch {
// best-effort: stderr may be closed already
}
} else {
// No checkpoint pointing here — the import never reached gbrain or
// crashed before writing one. Clean up so we don't leak the dir.
cleanupStagingDir(_activeStagingDir);
}
_activeStagingDir = null;
}
// Re-raise to default action so the parent actually exits. Without this,
@@ -1311,17 +1384,39 @@ function installSignalForwarder(): void {
* that kill the child on parent SIGTERM/SIGINT. Returns the same shape as
* spawnSync's result so the caller doesn't care which mode was used.
*/
/**
* #1611: the `gbrain import` is the long pole on big brains. Its timeout is
* configurable via GSTACK_INGEST_TIMEOUT_MS (default 30 min, 1min24h) so large
* memory corpora aren't SIGTERM'd mid-import. On timeout we SIGTERM the child,
* which preserves gbrain's import-checkpoint.json (see installSignalForwarder)
* so the next run resumes instead of restarting from scratch.
*/
const DEFAULT_IMPORT_TIMEOUT_MS = 30 * 60 * 1000;
export function resolveImportTimeoutMs(
raw: string | undefined = process.env.GSTACK_INGEST_TIMEOUT_MS,
): number {
if (raw === undefined || raw === "") return DEFAULT_IMPORT_TIMEOUT_MS;
const n = Number.parseInt(raw, 10);
if (!Number.isFinite(n) || Number.isNaN(n) || n < 60_000 || n > 86_400_000) {
console.error(
`[memory-ingest] GSTACK_INGEST_TIMEOUT_MS="${raw}" invalid (need 6000086400000ms); using ${DEFAULT_IMPORT_TIMEOUT_MS}ms`,
);
return DEFAULT_IMPORT_TIMEOUT_MS;
}
return n;
}
function runGbrainImport(
stagingDir: string,
timeoutMs: number,
): Promise<{ status: number | null; stdout: string; stderr: string }> {
): Promise<{ status: number | null; stdout: string; stderr: string; timedOut: boolean }> {
installSignalForwarder();
return new Promise((resolve) => {
const child = spawn(
"gbrain",
["import", stagingDir, "--no-embed", "--json"],
{ stdio: ["ignore", "pipe", "pipe"] },
);
// Seed DATABASE_URL from gbrain's own config so this stage works
// inside Next.js / Prisma / Rails projects with their own
// .env.local (codex review #7 — defense in depth on top of the
// parent gstack-gbrain-sync seeding the bun grandchild's env).
const child = spawnGbrainAsync(["import", stagingDir, "--no-embed", "--json"]);
_activeImportChild = child;
let stdout = "";
let stderr = "";
@@ -1347,6 +1442,7 @@ function runGbrainImport(
status: timedOut ? null : status,
stdout,
stderr,
timedOut,
});
});
child.on("error", (err) => {
@@ -1356,6 +1452,7 @@ function runGbrainImport(
status: null,
stdout,
stderr: stderr + `\n[spawn-error] ${(err as Error).message}`,
timedOut,
});
});
});
@@ -1375,7 +1472,7 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
if (args.noWrite) {
// --no-write: skip the gbrain import call but still record state for
// prepared pages (treat them as ingested for dedup purposes). Matches
// the prior contract from --help: "Skip gbrain put_page calls (still
// the prior contract from --help: "Skip gbrain put calls (still
// updates state file)".
const nowIso = new Date().toISOString();
for (const p of prep.prepared) {
@@ -1445,19 +1542,69 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
// entirely. gstack-brain-sync push will pick the dir up via its allowlist
// and the brain admin's pull job will index transcripts into the remote
// brain. Local PGLite (if any) stays code-only.
//
// Resume branch for #1611: when the orchestrator sets
// GSTACK_INGEST_RESUME_DIR (because gbrain's import-checkpoint.json points
// at an existing dir from a prior SIGTERM'd run), reuse that staging dir
// and skip the prepare/writeStaged phase entirely. gbrain's checkpoint
// tells it where to resume.
const remoteHttpMode = isRemoteHttpMcpMode();
const stagingDir = remoteHttpMode
? makePersistentTranscriptDir()
: makeStagingDir();
const resumeDir = process.env.GSTACK_INGEST_RESUME_DIR;
// #1802 second entry point: this binary is runnable directly, so it must not
// trust GSTACK_INGEST_RESUME_DIR just because it exists — a stale/poisoned env
// could make us `gbrain import` (and later clean up) an arbitrary directory.
// Prove ownership here too, independently of the orchestrator's decideResume.
const resuming = !remoteHttpMode
&& typeof resumeDir === "string"
&& resumeDir.length > 0
&& existsSync(resumeDir)
&& checkOwnedStagingDir(resumeDir, GSTACK_HOME).ok;
if (!remoteHttpMode && resumeDir && resumeDir.length > 0 && !resuming) {
console.error(
`[memory-ingest] ignoring GSTACK_INGEST_RESUME_DIR="${resumeDir}" — not a proven staging dir (#1802); staging fresh.`,
);
}
const stagingDir = resuming
? resumeDir!
: remoteHttpMode
? makePersistentTranscriptDir()
: makeStagingDir();
// Register staging dir with the signal forwarder so SIGTERM/SIGINT can
// synchronously clean it up before process.exit (the async finally block
// below does NOT run after a signal-handler exit). In remote-http mode we
// skip registration — the dir is meant to persist.
// either preserve (when gbrain checkpointed it) or synchronously clean up.
// The async finally block below does NOT run after a signal-handler exit.
// In remote-http mode we skip registration — the dir is meant to persist.
if (!remoteHttpMode) {
_activeStagingDir = stagingDir;
}
// #1802 C3: set when the import-timeout branch leaves a resumable checkpoint
// pointing at this staging dir, so the finally preserves it for the next run
// instead of deleting it (the SIGTERM forwarder's preserve branch only runs
// when the PARENT is signalled, which an internal timeout never does).
let preserveStaging = false;
try {
const staging = writeStaged(prep.prepared, stagingDir);
let staging: StagingResult;
if (resuming) {
// Pages are already on disk from the previous run. Skip writeStaged.
// The "written" count for the verdict reflects what's on disk now;
// gbrain's import will skip already-completed entries via its own
// checkpoint (processedIndex+1).
if (!args.quiet) {
console.error(
`[memory-ingest] resuming previous staging dir ${stagingDir} (skipping prepare phase)`,
);
}
// #1802 C4: reconstruct stagedPathToSource from the prepared pages so
// readNewFailures() can still map gbrain's per-file failures back to
// sources on resume. An empty map made every failed file fall through to
// state-recording — i.e. silently marked ingested despite failing.
const stagedPathToSource = new Map<string, string>();
for (const p of prep.prepared) {
stagedPathToSource.set(stagedRelPath(p.slug), p.source_path);
}
staging = { staging_dir: stagingDir, written: prep.prepared.length, errors: [], stagedPathToSource };
} else {
staging = writeStaged(prep.prepared, stagingDir);
}
failed += staging.errors.length;
if (!args.quiet && staging.errors.length > 0) {
for (const e of staging.errors.slice(0, 5)) {
@@ -1543,13 +1690,42 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
// spawn, parent termination orphans the gbrain process (observed
// during 2026-05-10 cold-run testing — gbrain kept running 15 min
// after the orchestrator timed out).
const importResult = await runGbrainImport(stagingDir, 30 * 60 * 1000);
const importResult = await runGbrainImport(stagingDir, resolveImportTimeoutMs());
const stdout = importResult.stdout || "";
const stderr = importResult.stderr || "";
const importJson = parseImportJson(stdout);
if (importResult.status !== 0) {
// #1611/#1802 C3: on timeout, gbrain may have written
// import-checkpoint.json so the next /sync-gbrain can resume. But an
// INTERNAL timeout (runGbrainImport kills the child and returns here)
// never signals the parent, so the SIGTERM forwarder's preserve branch
// doesn't run — and the finally would otherwise delete the staging dir
// despite a "checkpoint preserved" message. Mirror the forwarder: preserve
// only when gbrain actually checkpointed against this dir; otherwise let
// the finally clean up (nothing to resume) and say so honestly.
if (importResult.timedOut) {
const mins = Math.round(resolveImportTimeoutMs() / 60000);
const checkpointed = stagingDirIsCheckpointed(stagingDir);
const msg = checkpointed
? `gbrain import timed out after ${mins}min; checkpoint preserved — re-run ` +
`/sync-gbrain to resume (raise GSTACK_INGEST_TIMEOUT_MS for big brains)`
: `gbrain import timed out after ${mins}min before writing a checkpoint; ` +
`re-run /sync-gbrain to restage (raise GSTACK_INGEST_TIMEOUT_MS for big brains)`;
if (checkpointed) preserveStaging = true;
console.error(`[memory-ingest] ${msg}`);
return {
written: 0,
skipped_secret: prep.skippedSecret,
skipped_dedup: prep.skippedDedup,
skipped_unattributed: prep.skippedUnattributed,
failed,
duration_ms: Date.now() - t0,
partial_pages: prep.partialPages,
system_error: msg,
};
}
const tail = (stderr.trim().split("\n").pop() || "").slice(0, 300);
const msg = `gbrain import exited ${importResult.status}: ${tail}`;
console.error(`[memory-ingest] ERR: ${msg}`);
@@ -1645,7 +1821,15 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
);
}
} finally {
cleanupStagingDir(stagingDir);
// #1802 D1: in remote-http mode `stagingDir` is the PERSISTENT transcript
// dir (makePersistentTranscriptDir, under ~/.gstack/transcripts/) that
// gstack-brain-sync push must pick up — it is NOT a `.staging-ingest-*` dir
// and must never be deleted here. The remote-http branch above already
// documents this intent ("Skip the ... cleanupStagingDir paths"), but a
// `finally` runs on its `return`, so the gate has to live here. Gating on
// mode (rather than widening the ownership guard) keeps checkOwnedStagingDir
// strict: it only ever sees `.staging-ingest-*` dirs.
if (!remoteHttpMode && !preserveStaging) cleanupStagingDir(stagingDir);
_activeStagingDir = null;
}
@@ -1745,7 +1929,12 @@ async function main(): Promise<void> {
if (result.system_error) process.exit(1);
}
main().catch((err) => {
console.error(`gstack-memory-ingest fatal: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
});
// Guard so the module is import-safe for unit tests (e.g. resolveImportTimeoutMs).
// The orchestrator runs it as `bun gstack-memory-ingest.ts ...`, where
// import.meta.main is true, so the CLI path is unaffected.
if (import.meta.main) {
main().catch((err) => {
console.error(`gstack-memory-ingest fatal: ${err instanceof Error ? err.message : String(err)}`);
process.exit(1);
});
}
+31 -6
View File
@@ -24,6 +24,7 @@
* gstack-model-benchmark --prompt "hi" --models claude,gpt,gemini --dry-run
*/
import '../lib/conductor-env-shim';
import * as fs from 'fs';
import * as path from 'path';
import { runBenchmark, formatTable, formatJson, formatMarkdown, type BenchmarkInput } from '../test/helpers/benchmark-runner';
@@ -39,16 +40,40 @@ const ADAPTER_FACTORIES = {
type OutputFormat = 'table' | 'json' | 'markdown';
const CLI_ARGS = process.argv.slice(2);
const VALUE_FLAGS = new Set(['--models', '--prompt', '--workdir', '--timeout-ms', '--output']);
function arg(name: string, def?: string): string | undefined {
const idx = process.argv.findIndex(a => a === name || a.startsWith(name + '='));
const idx = CLI_ARGS.findIndex(a => a === name || a.startsWith(name + '='));
if (idx < 0) return def;
const eqIdx = process.argv[idx].indexOf('=');
if (eqIdx >= 0) return process.argv[idx].slice(eqIdx + 1);
return process.argv[idx + 1];
const eqIdx = CLI_ARGS[idx].indexOf('=');
if (eqIdx >= 0) return CLI_ARGS[idx].slice(eqIdx + 1);
return CLI_ARGS[idx + 1];
}
function flag(name: string): boolean {
return process.argv.includes(name);
return CLI_ARGS.includes(name);
}
function positionalArgs(args: string[]): string[] {
const positional: string[] = [];
for (let i = 0; i < args.length; i++) {
const current = args[i];
if (current === '--') {
positional.push(...args.slice(i + 1));
break;
}
if (current.startsWith('--')) {
const eqIdx = current.indexOf('=');
const flagName = eqIdx >= 0 ? current.slice(0, eqIdx) : current;
if (eqIdx < 0 && VALUE_FLAGS.has(flagName) && i + 1 < args.length) {
i++;
}
continue;
}
positional.push(current);
}
return positional;
}
function parseProviders(s: string | undefined): Array<'claude' | 'gpt' | 'gemini'> {
@@ -78,7 +103,7 @@ function resolvePrompt(positional: string | undefined): string {
}
async function main(): Promise<void> {
const positional = process.argv.slice(2).find(a => !a.startsWith('--'));
const positional = positionalArgs(CLI_ARGS)[0];
const prompt = resolvePrompt(positional);
const providers = parseProviders(arg('--models'));
const workdir = arg('--workdir', process.cwd())!;
+61 -20
View File
@@ -10,7 +10,14 @@
//
// Usage:
// gstack-next-version --base <branch> --bump <major|minor|patch|micro> \
// --current-version <X.Y.Z.W> [--workspace-root <path>|null] [--json]
// --current-version <X.Y.Z.W> [--workspace-root <path>|null] \
// [--version-path <path>] [--json]
//
// VERSION path resolution (monorepo support):
// 1. --version-path <path> CLI flag (highest priority)
// 2. .gstack/version-path file at the repo root (single-line relative path,
// committed so all collaborators benefit)
// 3. "VERSION" at the repo root (default, backward-compatible)
//
// Exit codes:
// 0 — emitted JSON successfully (may include "offline":true or "host":"unknown")
@@ -45,6 +52,7 @@ type Output = {
version: string;
current_version: string;
base_version: string;
version_path: string;
bump: Bump;
host: "github" | "gitlab" | "unknown";
offline: boolean;
@@ -114,6 +122,28 @@ function runCommand(cmd: string, args: string[], timeoutMs = 15000): { ok: boole
};
}
// VERSION-path resolution for monorepos. Priority: CLI flag > .gstack/version-path
// at repo root > "VERSION". Pure function; takes the repo root as an argument so
// tests can drive it with a fixture dir without mocking git.
function resolveVersionPath(override: string | undefined, repoRoot: string): string {
if (override) return override.trim();
const configFile = join(repoRoot, ".gstack", "version-path");
if (existsSync(configFile)) {
try {
const firstLine = readFileSync(configFile, "utf8").split("\n")[0]?.trim() ?? "";
if (firstLine) return firstLine;
} catch {
// fall through to default
}
}
return "VERSION";
}
function repoToplevel(): string {
const r = runCommand("git", ["rev-parse", "--show-toplevel"]);
return r.ok ? r.stdout.trim() : process.cwd();
}
function detectHost(): "github" | "gitlab" | "unknown" {
const remote = runCommand("git", ["remote", "get-url", "origin"]);
if (remote.ok) {
@@ -128,19 +158,19 @@ function detectHost(): "github" | "gitlab" | "unknown" {
return "unknown";
}
function readBaseVersion(base: string, warnings: string[]): string {
function readBaseVersion(base: string, versionPath: string, warnings: string[]): string {
// git fetch is best-effort; we tolerate failure and fall back to whatever
// origin/<base> currently points at.
runCommand("git", ["fetch", "origin", base, "--quiet"], 10000);
const r = runCommand("git", ["show", `origin/${base}:VERSION`]);
const r = runCommand("git", ["show", `origin/${base}:${versionPath}`]);
if (!r.ok) {
warnings.push(`could not read VERSION at origin/${base}; assuming 0.0.0.0`);
warnings.push(`could not read ${versionPath} at origin/${base}; assuming 0.0.0.0`);
return "0.0.0.0";
}
return r.stdout.trim();
}
async function fetchGithubClaimed(base: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
async function fetchGithubClaimed(base: string, versionPath: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
const list = runCommand("gh", [
"pr",
"list",
@@ -187,14 +217,18 @@ async function fetchGithubClaimed(base: string, excludePR: number | null, warnin
const pr = queue.shift();
if (!pr) return;
// gh passes branch name via argv, not shell — safe.
// encodeURI handles spaces in subproject paths (e.g. "Tinas Second Brain/...")
// while leaving "/" untouched so the GitHub Contents API gets the path intact.
const content = runCommand("gh", [
"api",
`repos/{owner}/{repo}/contents/VERSION?ref=${encodeURIComponent(pr.headRefName)}`,
`repos/{owner}/{repo}/contents/${encodeURI(versionPath)}?ref=${encodeURIComponent(pr.headRefName)}`,
"-q",
".content",
]);
if (!content.ok) {
warnings.push(`PR #${pr.number}: could not fetch VERSION (fork or private)`);
warnings.push(
`PR #${pr.number}: could not fetch ${versionPath} (fork, private, or wrong path — try --version-path or .gstack/version-path)`,
);
continue;
}
let versionStr: string;
@@ -215,7 +249,7 @@ async function fetchGithubClaimed(base: string, excludePR: number | null, warnin
return { claimed: results, offline: false };
}
async function fetchGitlabClaimed(base: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
async function fetchGitlabClaimed(base: string, versionPath: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
const list = runCommand("glab", [
"mr",
"list",
@@ -243,12 +277,15 @@ async function fetchGitlabClaimed(base: string, excludePR: number | null, warnin
}
const results: ClaimedPR[] = [];
for (const mr of mrs) {
// GitLab files API takes the full path URL-encoded (slashes become %2F).
const content = runCommand("glab", [
"api",
`projects/:id/repository/files/VERSION?ref=${encodeURIComponent(mr.source_branch)}`,
`projects/:id/repository/files/${encodeURIComponent(versionPath)}?ref=${encodeURIComponent(mr.source_branch)}`,
]);
if (!content.ok) {
warnings.push(`MR !${mr.iid}: could not fetch VERSION`);
warnings.push(
`MR !${mr.iid}: could not fetch ${versionPath} (wrong path? — try --version-path or .gstack/version-path)`,
);
continue;
}
try {
@@ -285,7 +322,7 @@ function currentRepoSlug(): string {
return m ? m[1] : "";
}
function scanSiblings(root: string | null, claimed: ClaimedPR[], warnings: string[]): Sibling[] {
function scanSiblings(root: string | null, versionPath: string, claimed: ClaimedPR[], warnings: string[]): Sibling[] {
if (!root || !existsSync(root)) return [];
const mySlug = currentRepoSlug();
if (!mySlug) {
@@ -308,7 +345,7 @@ function scanSiblings(root: string | null, claimed: ClaimedPR[], warnings: strin
continue;
}
if (!existsSync(join(p, ".git")) && !existsSync(join(p, ".git/HEAD"))) continue;
const versionFile = join(p, "VERSION");
const versionFile = join(p, versionPath);
if (!existsSync(versionFile)) continue;
let version: string;
try {
@@ -346,12 +383,13 @@ function markActiveSiblings(siblings: Sibling[], baseVersion: Version): Sibling[
});
}
function parseArgs(argv: string[]): { base: string; bump: Bump; current: string; workspaceRoot?: string; excludePR: number | null; help: boolean } {
function parseArgs(argv: string[]): { base: string; bump: Bump; current: string; workspaceRoot?: string; excludePR: number | null; versionPath?: string; help: boolean } {
let base = "";
let bump: Bump | "" = "";
let current = "";
let workspaceRoot: string | undefined;
let excludePR: number | null = null;
let versionPath: string | undefined;
let help = false;
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
@@ -359,6 +397,7 @@ function parseArgs(argv: string[]): { base: string; bump: Bump; current: string;
else if (a === "--bump") bump = (argv[++i] ?? "") as Bump;
else if (a === "--current-version") current = argv[++i] ?? "";
else if (a === "--workspace-root") workspaceRoot = argv[++i];
else if (a === "--version-path") versionPath = argv[++i];
else if (a === "--exclude-pr") {
const n = Number(argv[++i]);
excludePR = Number.isFinite(n) && n > 0 ? n : null;
@@ -375,7 +414,7 @@ function parseArgs(argv: string[]): { base: string; bump: Bump; current: string;
console.error(`Error: --bump must be major|minor|patch|micro (got ${bump})`);
process.exit(2);
}
return { base, bump: bump as Bump, current, workspaceRoot, excludePR, help: false };
return { base, bump: bump as Bump, current, workspaceRoot, excludePR, versionPath, help: false };
}
// Auto-detect: if --exclude-pr wasn't passed, check whether the current branch
@@ -392,13 +431,14 @@ async function main() {
const args = parseArgs(process.argv.slice(2));
if (args.help) {
console.log(
"Usage: gstack-next-version --base <branch> --bump <level> --current-version <X.Y.Z.W> [--workspace-root <path|null>]",
"Usage: gstack-next-version --base <branch> --bump <level> --current-version <X.Y.Z.W> [--workspace-root <path|null>] [--version-path <path>]",
);
process.exit(0);
}
const warnings: string[] = [];
const host = detectHost();
const baseVersion = args.current || readBaseVersion(args.base, warnings);
const versionPath = resolveVersionPath(args.versionPath, repoToplevel());
const baseVersion = args.current || readBaseVersion(args.base, versionPath, warnings);
const baseParsed = parseVersion(baseVersion);
if (!baseParsed) {
console.error(`Error: could not parse base version '${baseVersion}'`);
@@ -413,9 +453,9 @@ async function main() {
let claimed: ClaimedPR[] = [];
let offline = false;
if (host === "github") {
({ claimed, offline } = await fetchGithubClaimed(args.base, excludePR, warnings));
({ claimed, offline } = await fetchGithubClaimed(args.base, versionPath, excludePR, warnings));
} else if (host === "gitlab") {
({ claimed, offline } = await fetchGitlabClaimed(args.base, excludePR, warnings));
({ claimed, offline } = await fetchGitlabClaimed(args.base, versionPath, excludePR, warnings));
} else {
warnings.push("host unknown; queue-awareness unavailable");
}
@@ -433,7 +473,7 @@ async function main() {
const { version: picked, reason } = pickNextSlot(baseParsed, claimedVersions, args.bump);
const workspaceRoot = resolveWorkspaceRoot(args.workspaceRoot);
const siblings = markActiveSiblings(scanSiblings(workspaceRoot, claimed, warnings), baseParsed);
const siblings = markActiveSiblings(scanSiblings(workspaceRoot, versionPath, claimed, warnings), baseParsed);
const activeSiblings = siblings.filter((s) => s.is_active);
// If an active sibling outranks our pick, bump past it (same bump level).
@@ -453,6 +493,7 @@ async function main() {
version: fmtVersion(finalVersion),
current_version: args.current || baseVersion,
base_version: baseVersion,
version_path: versionPath,
bump: args.bump,
host,
offline,
@@ -466,7 +507,7 @@ async function main() {
}
// Pure-function exports for testing
export { parseVersion, fmtVersion, bumpVersion, cmpVersion, pickNextSlot, markActiveSiblings };
export { parseVersion, fmtVersion, bumpVersion, cmpVersion, pickNextSlot, markActiveSiblings, resolveVersionPath };
// Only run main() when invoked as a script, not when imported by tests.
if (import.meta.main) {
+6 -2
View File
@@ -9,7 +9,7 @@
# CI / container env where HOME may be unset.
#
# Chains:
# GSTACK_STATE_ROOT: GSTACK_HOME -> CLAUDE_PLUGIN_DATA -> $HOME/.gstack -> .gstack
# GSTACK_STATE_ROOT: GSTACK_HOME -> CLAUDE_PLUGIN_DATA (only when CLAUDE_PLUGIN_ROOT=*gstack*) -> $HOME/.gstack -> .gstack
# PLAN_ROOT: GSTACK_PLAN_DIR -> CLAUDE_PLANS_DIR -> $HOME/.claude/plans -> .claude/plans
# TMP_ROOT: TMPDIR -> TMP -> .gstack/tmp (and mkdir -p, best-effort)
#
@@ -21,7 +21,11 @@ set -u
# State root: where gstack writes projects/, sessions/, analytics/.
if [ -n "${GSTACK_HOME:-}" ]; then
_state_root="$GSTACK_HOME"
elif [ -n "${CLAUDE_PLUGIN_DATA:-}" ]; then
elif [ -n "${CLAUDE_PLUGIN_DATA:-}" ] && echo "${CLAUDE_PLUGIN_ROOT:-}" | grep -qi "gstack"; then
# Guard: only trust CLAUDE_PLUGIN_DATA when CLAUDE_PLUGIN_ROOT confirms we are
# running as the gstack plugin. Without this, a CLAUDE_PLUGIN_DATA from another
# plugin (e.g. codex) that leaked into the session env via CLAUDE_ENV_FILE would
# be picked up, writing all gstack state into the wrong directory.
_state_root="$CLAUDE_PLUGIN_DATA"
elif [ -n "${HOME:-}" ]; then
_state_root="$HOME/.gstack"
+82 -3
View File
@@ -28,7 +28,8 @@
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null)"
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
# GSTACK_STATE_ROOT takes precedence over GSTACK_HOME (test isolation per D16).
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
mkdir -p "$GSTACK_HOME/projects/$SLUG"
INPUT="$1"
@@ -49,12 +50,48 @@ if (!j.skill || !/^[a-z0-9-]+\$/.test(j.skill)) {
process.exit(1);
}
// Required: question_id (kebab-case, <=64 chars)
// Required: question_id (kebab-case, <=64 chars).
// Cathedral T5: hook-sourced events use 'hook-<10-char-hash>' which is
// kebab-case-compatible and passes the same regex.
if (!j.question_id || !/^[a-z0-9-]+\$/.test(j.question_id) || j.question_id.length > 64) {
process.stderr.write('gstack-question-log: invalid question_id, must be kebab-case <=64 chars\n');
process.exit(1);
}
// Optional: source — tags which writer produced this event.
// 'agent' (default) — preamble-driven write from inside the running agent
// 'hook' — PostToolUse hook captured it deterministically (T5)
// 'auq-other' — user picked 'Other' and typed free text (Layer 8)
// 'auto-decided' — PreToolUse enforcement hook substituted the answer (T6)
// 'codex-import-marker' / 'codex-import-pattern' — T9 backfill from Codex
const ALLOWED_SOURCES = ['agent', 'hook', 'auq-other', 'auto-decided', 'codex-import-marker', 'codex-import-pattern'];
if (j.source !== undefined) {
if (!ALLOWED_SOURCES.includes(j.source)) {
process.stderr.write('gstack-question-log: invalid source, must be one of: ' + ALLOWED_SOURCES.join(', ') + '\n');
process.exit(1);
}
} else {
j.source = 'agent';
}
// Optional: tool_use_id — Claude Code hook stdin field; used for dedup.
if (j.tool_use_id !== undefined) {
if (typeof j.tool_use_id !== 'string' || j.tool_use_id.length > 128) {
process.stderr.write('gstack-question-log: tool_use_id must be string <=128 chars\n');
process.exit(1);
}
}
// Optional: free_text — sanitize (no newlines, <=300 chars).
if (j.free_text !== undefined) {
if (typeof j.free_text !== 'string') {
process.stderr.write('gstack-question-log: free_text must be string\n');
process.exit(1);
}
if (j.free_text.length > 300) j.free_text = j.free_text.slice(0, 300);
j.free_text = j.free_text.replace(/\n+/g, ' ');
}
// Required: question_summary (non-empty, <=200 chars, no newlines)
if (typeof j.question_summary !== 'string' || !j.question_summary.length) {
process.stderr.write('gstack-question-log: question_summary required\n');
@@ -164,7 +201,49 @@ if [ $VALIDATE_RC -ne 0 ] || [ -z "$VALIDATED" ]; then
exit 1
fi
echo "$VALIDATED" >> "$GSTACK_HOME/projects/$SLUG/question-log.jsonl"
LOG_FILE="$GSTACK_HOME/projects/$SLUG/question-log.jsonl"
# Cathedral T5: composite-source dedup. If this exact (source, tool_use_id)
# was already logged within the last 100 lines, skip — protects against
# hook + agent both writing the same fire (D3 plan-tune cathedral decision).
# Lookup is bounded so the bin stays cheap on hot paths.
DEDUP_SKIP=""
if [ -f "$LOG_FILE" ]; then
DEDUP_SKIP=$(VALIDATED_JSON="$VALIDATED" LOG_FILE_PATH="$LOG_FILE" bun -e '
const fs = require("fs");
const j = JSON.parse(process.env.VALIDATED_JSON);
if (!j.tool_use_id) { console.log(""); process.exit(0); }
const want = j.source + ":" + j.tool_use_id;
const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").slice(-100);
for (const ln of lines) {
try {
const p = JSON.parse(ln);
if (p.source && p.tool_use_id && (p.source + ":" + p.tool_use_id) === want) {
console.log("dup");
process.exit(0);
}
} catch {}
}
console.log("");
' 2>/dev/null)
fi
if [ "$DEDUP_SKIP" = "dup" ]; then
echo "DEDUP: skipped (source=$(echo "$VALIDATED" | bun -e 'const j=JSON.parse(await Bun.stdin.text()); console.log(j.source);'), tool_use_id duplicate)"
exit 0
fi
echo "$VALIDATED" >> "$LOG_FILE"
# Cathedral T5: fire-and-forget --derive so inferred dimensions stay current
# without per-event latency (D17). Sub-second op; output suppressed; never
# blocks the hook caller. Skipped via GSTACK_QUESTION_LOG_NO_DERIVE=1 for
# tests that don't want the side effect.
if [ -z "${GSTACK_QUESTION_LOG_NO_DERIVE:-}" ]; then
(
nohup "$SCRIPT_DIR/gstack-developer-profile" --derive >/dev/null 2>&1 &
) >/dev/null 2>&1
fi
# NOTE: question-log.jsonl is deliberately NOT enqueued for gbrain-sync.
# Per Codex v2 review, audit/derivation data stays local alongside the
+17 -1
View File
@@ -23,7 +23,8 @@ set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
# GSTACK_STATE_ROOT takes precedence over GSTACK_HOME (test isolation per D16).
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
SLUG="${SLUG:-unknown}"
PREF_FILE="$GSTACK_HOME/projects/$SLUG/question-preferences.json"
@@ -68,6 +69,21 @@ do_check() {
return;
}
// Split-chain carve-out: per-option calls in N-option splits emit
// question_ids of the form <skill>-split-<option-slug>. These are
// NEVER AUTO_DECIDE-eligible regardless of stored preferences — the
// whole point of splitting is restoring user sovereignty over the
// option set. See scripts/resolvers/preamble/generate-ask-user-format.ts
// \"Handling 5+ options — split, never drop\" for the surrounding
// mechanism that generates these ids.
if (/-split-/.test(qid)) {
console.log('ASK_NORMALLY');
if (pref === 'never-ask' || pref === 'ask-only-for-one-way') {
console.log('NOTE: split-chain per-option calls always ASK_NORMALLY; your ' + pref + ' preference does not apply to options inside a sequential split.');
}
return;
}
switch (pref) {
case 'never-ask':
console.log('AUTO_DECIDE');
+241
View File
@@ -0,0 +1,241 @@
#!/usr/bin/env bun
/**
* gstack-redact — scan text for secrets/PII/legal content via the shared engine.
*
* Skill-facing CLI over lib/redact-engine.ts. Reads from stdin (default) or
* --from-file, scans, and prints findings as JSON (--json) or a human table.
*
* Exit codes (consumed by skill bash to gate dispatch/file/edit/commit):
* 0 clean (no HIGH, no MEDIUM)
* 2 MEDIUM present (no HIGH) — skill runs the per-finding AskUserQuestion
* 3 HIGH present — skill blocks
*
* WARN findings (tool-fence-degraded credentials) never change the exit code.
*
* Flags:
* --json Emit JSON {findings, counts, repoVisibility, oversize}
* --repo-visibility V public | private | unknown (default unknown=public-strict wording)
* --from-file PATH Read input from PATH instead of stdin
* --allowlist PATH Newline-delimited exact spans to suppress
* --self-email EMAIL Suppress this email (the invoking user's own)
* --repo-public-emails PATH Newline-delimited repo-public emails to suppress
* --auto-redact IDS Comma-separated finding ids to auto-redact;
* prints the redacted body to stdout + diff to stderr.
* --max-bytes N Override the fail-closed size cap (default 1 MiB).
*
* Security note: this is a GUARDRAIL, not airtight enforcement. A determined
* user can always bypass it (direct gh/git). It catches accidents.
*/
import * as fs from "fs";
import * as path from "path";
import { spawnSync } from "child_process";
import {
scan,
applyRedactions,
exitCodeFor,
type RepoVisibility,
type ScanOptions,
type Finding,
} from "../lib/redact-engine";
const MAX_STDIN_BYTES = 16 * 1024 * 1024; // hard ceiling before the engine cap
// ── pre-push hook install/uninstall (chains any existing hook) ────────────────
const MANAGED_MARKER = "# gstack-redact pre-push (managed)";
function hooksPath(): string {
const r = spawnSync("git", ["rev-parse", "--git-path", "hooks"], { encoding: "utf8" });
if (r.status !== 0) {
process.stderr.write("gstack-redact: not in a git repo\n");
process.exit(1);
}
return r.stdout.trim();
}
function installPrepushHook(): void {
const dir = hooksPath();
fs.mkdirSync(dir, { recursive: true });
const hookPath = path.join(dir, "pre-push");
const prepushBin = path.join(import.meta.dir, "gstack-redact-prepush");
// If a non-managed hook exists, preserve it as pre-push.local and chain it.
if (fs.existsSync(hookPath)) {
const existing = fs.readFileSync(hookPath, "utf8");
if (existing.includes(MANAGED_MARKER)) {
process.stdout.write("gstack-redact: pre-push hook already installed.\n");
return;
}
const localPath = path.join(dir, "pre-push.local");
fs.renameSync(hookPath, localPath);
fs.chmodSync(localPath, 0o755);
process.stdout.write("gstack-redact: preserved existing hook as pre-push.local (chained).\n");
}
// stdin is single-consume: capture it once, feed both the chained hook and ours.
const wrapper = `#!/usr/bin/env bash
${MANAGED_MARKER}
set -euo pipefail
_input="$(cat)"
_local="$(git rev-parse --git-path hooks/pre-push.local)"
if [ -x "$_local" ]; then
printf '%s' "$_input" | "$_local" "$@" || exit $?
fi
printf '%s' "$_input" | bun "${prepushBin}" "$@"
`;
fs.writeFileSync(hookPath, wrapper, { mode: 0o755 });
fs.chmodSync(hookPath, 0o755);
process.stdout.write(`gstack-redact: installed pre-push hook at ${hookPath}\n`);
}
function uninstallPrepushHook(): void {
const dir = hooksPath();
const hookPath = path.join(dir, "pre-push");
const localPath = path.join(dir, "pre-push.local");
if (!fs.existsSync(hookPath) || !fs.readFileSync(hookPath, "utf8").includes(MANAGED_MARKER)) {
process.stdout.write("gstack-redact: no managed pre-push hook to remove.\n");
return;
}
if (fs.existsSync(localPath)) {
fs.renameSync(localPath, hookPath); // restore the chained original
process.stdout.write("gstack-redact: removed managed hook, restored pre-push.local.\n");
} else {
fs.unlinkSync(hookPath);
process.stdout.write("gstack-redact: removed managed pre-push hook.\n");
}
}
function arg(name: string): string | undefined {
const i = process.argv.indexOf(name);
return i >= 0 ? process.argv[i + 1] : undefined;
}
function flag(name: string): boolean {
return process.argv.includes(name);
}
function readInput(): string {
const file = arg("--from-file");
if (file) {
const st = fs.statSync(file);
if (st.size > MAX_STDIN_BYTES) {
// Don't even read it — fail closed at the CLI boundary.
process.stderr.write(`gstack-redact: input file too large (${st.size} bytes)\n`);
process.exit(3);
}
return fs.readFileSync(file, "utf8");
}
// stdin
const chunks: Buffer[] = [];
let total = 0;
const fd = 0;
const buf = Buffer.alloc(65536);
while (true) {
let n = 0;
try {
n = fs.readSync(fd, buf, 0, buf.length, null);
} catch (e: any) {
if (e.code === "EAGAIN") continue;
if (e.code === "EOF") break;
throw e;
}
if (n === 0) break;
total += n;
if (total > MAX_STDIN_BYTES) {
process.stderr.write("gstack-redact: stdin too large\n");
process.exit(3);
}
chunks.push(Buffer.from(buf.subarray(0, n)));
}
return Buffer.concat(chunks).toString("utf8");
}
function readLines(path: string | undefined): string[] | undefined {
if (!path || !fs.existsSync(path)) return undefined;
return fs
.readFileSync(path, "utf8")
.split("\n")
.map((l) => l.trim())
.filter(Boolean);
}
function buildOpts(): ScanOptions {
const vis = (arg("--repo-visibility") as RepoVisibility) || "unknown";
const maxBytes = arg("--max-bytes");
// #1824: validate the RAW string, not the parse result. parseInt("123abc")
// is 123 and parseInt("foo") is NaN — both silently corrupt the fail-closed
// oversize guard. Require a clean positive integer or reject before scanning.
let maxBytesOpt: number | undefined;
if (maxBytes !== undefined) {
if (!/^\d+$/.test(maxBytes) || Number(maxBytes) <= 0) {
process.stderr.write(
`gstack-redact: --max-bytes must be a positive integer (got "${maxBytes}")\n`,
);
process.exit(1);
}
maxBytesOpt = Number(maxBytes);
}
return {
repoVisibility: ["public", "private", "unknown"].includes(vis) ? vis : "unknown",
allowlist: readLines(arg("--allowlist")),
selfEmail: arg("--self-email"),
repoPublicEmails: readLines(arg("--repo-public-emails")),
...(maxBytesOpt !== undefined ? { maxBytes: maxBytesOpt } : {}),
};
}
function humanTable(findings: Finding[]): string {
if (!findings.length) return " (no findings)";
const rows = findings.map(
(f) =>
` ${f.severity.padEnd(6)} ${f.id.padEnd(24)} ${String(f.line).padStart(4)}:${String(
f.col,
).padEnd(3)} ${f.preview}`,
);
return rows.join("\n");
}
function main() {
// Subcommands (positional, not flags).
const sub = process.argv[2];
if (sub === "install-prepush-hook") return installPrepushHook();
if (sub === "uninstall-prepush-hook") return uninstallPrepushHook();
const opts = buildOpts();
const input = readInput();
// Auto-redact mode: print redacted body to stdout, diff to stderr, exit 0.
const autoIds = arg("--auto-redact");
if (autoIds) {
const { body, diff, skipped } = applyRedactions(input, autoIds.split(","), opts);
process.stdout.write(body);
if (diff) process.stderr.write(diff + "\n");
if (skipped.length) {
process.stderr.write(
`\ngstack-redact: ${skipped.length} finding(s) could not be auto-redacted (structural) — edit manually:\n` +
skipped.map((f) => ` ${f.id} @ ${f.line}:${f.col}`).join("\n") +
"\n",
);
}
process.exit(0);
}
const result = scan(input, opts);
const code = exitCodeFor(result);
if (flag("--json")) {
process.stdout.write(JSON.stringify(result, null, 2) + "\n");
} else {
const vis = result.repoVisibility.toUpperCase();
process.stdout.write(`gstack-redact scan — repo ${vis}\n`);
if (result.oversize) {
process.stdout.write(" BLOCKED — input too large to scan safely (fail-closed)\n");
} else {
process.stdout.write(humanTable(result.findings) + "\n");
const { HIGH, MEDIUM, LOW, WARN } = result.counts;
process.stdout.write(` HIGH=${HIGH} MEDIUM=${MEDIUM} LOW=${LOW} WARN=${WARN}\n`);
}
}
process.exit(code);
}
main();
+146
View File
@@ -0,0 +1,146 @@
#!/usr/bin/env bun
/**
* gstack-redact-prepush — git pre-push hook that scans the diff being pushed for
* HIGH-severity credentials and blocks the push on a hit.
*
* THIS IS A GUARDRAIL, NOT ENFORCEMENT. `git push --no-verify` bypasses it, as
* does `GSTACK_REDACT_PREPUSH=skip`. It catches accidental credential pushes,
* the most common real-world leak. It does NOT scan history, binary/LFS/submodule
* files, or non-added lines. History scanning is /cso's job.
*
* Git pre-push interface: refs are read from STDIN, one per line:
* <local ref> <local sha> <remote ref> <remote sha>
* We scan the ADDED lines of <remote sha>..<local sha> per ref (what's being
* pushed). Special cases:
* - remote sha all-zeroes → new branch: diff against merge-base with the
* remote's default branch (fallback: scan all commits unique to local ref).
* - local sha all-zeroes → branch delete: nothing to scan, skip.
* - force-push → remote..local still gives the net new content.
*
* Behavior:
* - HIGH finding in added lines → print + exit 1 (block), for public AND private.
* - MEDIUM → warn (non-blocking). LOW/WARN → silent.
* - GSTACK_REDACT_PREPUSH=skip → log + exit 0 (escape valve).
*
* Installed/uninstalled via `gstack-redact install-prepush-hook` (see the
* gstack-redact CLI), which chains any pre-existing hook.
*/
import { spawnSync } from "child_process";
import * as fs from "fs";
import * as os from "os";
import * as path from "path";
import { scan, type Finding } from "../lib/redact-engine";
const ZERO = /^0+$/;
// The canonical empty-tree object; diffing against it yields all content as added.
const EMPTY_TREE = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
function git(args: string[]): string {
const r = spawnSync("git", args, { encoding: "utf8", maxBuffer: 64 * 1024 * 1024 });
return r.status === 0 ? (r.stdout ?? "") : "";
}
function defaultRemoteBranch(): string {
// origin/HEAD → origin/main, fall back to main/master.
const sym = git(["symbolic-ref", "refs/remotes/origin/HEAD"]).trim();
if (sym) return sym.replace("refs/remotes/", "");
for (const b of ["origin/main", "origin/master"]) {
if (git(["rev-parse", "--verify", b]).trim()) return b;
}
return "origin/main";
}
/** Return the added-line text for a ref update being pushed. */
function addedLinesFor(localSha: string, remoteSha: string): string {
let range: string;
if (ZERO.test(remoteSha)) {
// New branch: prefer what's unique to localSha vs the remote default branch.
// With no merge-base (e.g. no remote yet), diff against the empty tree so ALL
// branch content is scanned as added — fail-safe (scans more, never less).
const base = git(["merge-base", localSha, defaultRemoteBranch()]).trim();
range = base ? `${base}..${localSha}` : `${EMPTY_TREE}..${localSha}`;
} else {
// Existing branch (incl. force-push): net new content remote..local.
range = `${remoteSha}..${localSha}`;
}
// -U0: only changed lines; we keep lines starting with '+' (added), drop the
// +++ file header. Unified diff added lines start with a single '+'.
const diff = git(["diff", "--unified=0", "--no-color", range]);
const added: string[] = [];
for (const line of diff.split("\n")) {
if (line.startsWith("+") && !line.startsWith("+++")) {
added.push(line.slice(1));
}
}
return added.join("\n");
}
function logSkip(reason: string): void {
try {
const home = process.env.GSTACK_HOME || path.join(os.homedir(), ".gstack");
const dir = path.join(home, "security");
fs.mkdirSync(dir, { recursive: true });
fs.appendFileSync(
path.join(dir, "prepush-skip.jsonl"),
JSON.stringify({ ts: new Date().toISOString(), reason }) + "\n",
);
} catch {
// best-effort; never block a push because logging failed
}
}
function main() {
if ((process.env.GSTACK_REDACT_PREPUSH || "").toLowerCase() === "skip") {
logSkip(process.env.GSTACK_REDACT_PREPUSH_REASON || "env-skip");
process.stderr.write("gstack-redact-prepush: skipped via GSTACK_REDACT_PREPUSH=skip\n");
process.exit(0);
}
const stdin = fs.readFileSync(0, "utf8");
const refs = stdin
.split("\n")
.map((l) => l.trim())
.filter(Boolean)
.map((l) => l.split(/\s+/));
const allHigh: Finding[] = [];
let mediumCount = 0;
for (const [, localSha, , remoteSha] of refs) {
if (!localSha || ZERO.test(localSha)) continue; // branch delete → nothing pushed
const added = addedLinesFor(localSha, remoteSha || "0");
if (!added.trim()) continue;
// Visibility doesn't change HIGH behavior; pass private so nothing is treated
// as public-strict (HIGH blocks regardless either way).
const result = scan(added, { repoVisibility: "private" });
for (const f of result.findings) {
if (f.severity === "HIGH") allHigh.push(f);
else if (f.severity === "MEDIUM") mediumCount++;
}
}
if (mediumCount > 0) {
process.stderr.write(
`gstack-redact-prepush: ${mediumCount} MEDIUM finding(s) in pushed diff (PII/internal). ` +
"Not blocking. Review before this becomes public.\n",
);
}
if (allHigh.length > 0) {
process.stderr.write(
"\n⛔ gstack-redact-prepush BLOCKED the push — credential(s) in the pushed diff:\n\n",
);
for (const f of allHigh) {
process.stderr.write(` HIGH ${f.id} ${f.preview}\n`);
}
process.stderr.write(
"\nRotate the credential (a pushed secret is compromised) and remove it from the diff.\n" +
"This is a guardrail: `git push --no-verify` or `GSTACK_REDACT_PREPUSH=skip git push` bypass it.\n",
);
process.exit(1);
}
process.exit(0);
}
main();
+11
View File
@@ -46,6 +46,17 @@ _cleanup_skill_entry() {
fi
}
_link_root_skill_alias() {
local target="$SKILLS_DIR/_gstack-command"
[ -f "$INSTALL_DIR/SKILL.md" ] || return 0
[ -L "$target" ] && rm -f "$target"
mkdir -p "$target"
ln -snf "$INSTALL_DIR/SKILL.md" "$target/SKILL.md"
}
_link_root_skill_alias
# Discover skills (directories with SKILL.md, excluding meta dirs)
SKILL_COUNT=0
for skill_dir in "$INSTALL_DIR"/*/; do
+53
View File
@@ -0,0 +1,53 @@
#!/usr/bin/env bash
# gstack-session-kind — classify the current agent session so skills know whether
# a human can answer an interactive prompt (AskUserQuestion).
#
# Usage: gstack-session-kind → prints one of: spawned | headless | interactive
#
# Used by the preamble (generate-preamble-bash.ts) which echoes
# SESSION_KIND: <value>
# so the AskUserQuestion-failure fallback rule can branch without a shell-out at
# failure time:
# spawned → orchestrator session (OpenClaw). Auto-choose recommended option
# per the skill's SPAWNED_SESSION block. Never prose, never BLOCKED.
# headless → no human present (claude -p evals / CI). BLOCK on AUQ failure.
# interactive → a human is present. Prose-fallback on AUQ failure.
#
# Detection is best-effort. On ANY ambiguity it prints `interactive` — BLOCK only on
# a positive headless signal, since a stray prose message in an unmarked one-shot
# `-p` run just ends the turn (harmless), whereas wrongly BLOCKING a real human is not.
#
# Why env vars and not TTY/entrypoint: an interactive Conductor session reports
# CLAUDE_CODE_ENTRYPOINT=sdk-ts with no TTY — identical to a headless SDK eval. The
# signals that actually discriminate are the host/orchestrator/CI env markers below.
set -euo pipefail
# 1. Orchestrator-spawned session (OpenClaw). Authoritative block lives in the skill;
# we only surface the classification.
if [ -n "${OPENCLAW_SESSION:-}" ]; then
echo "spawned"
exit 0
fi
# 2. Explicit headless override (set by the eval/E2E harness for determinism).
if [ -n "${GSTACK_HEADLESS:-}" ]; then
echo "headless"
exit 0
fi
# 3. Positive interactive-host signals: a human-driven host is present.
# - Conductor app sets CONDUCTOR_* workspace vars.
# - Plain interactive `claude` CLI sets CLAUDE_CODE_ENTRYPOINT=cli.
if [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ] || [ "${CLAUDE_CODE_ENTRYPOINT:-}" = "cli" ]; then
echo "interactive"
exit 0
fi
# 4. CI / automation markers with no interactive host → headless.
if [ -n "${CI:-}" ] || [ -n "${GITHUB_ACTIONS:-}" ]; then
echo "headless"
exit 0
fi
# 5. No positive headless signal → assume a human is present (degrade-safe default).
echo "interactive"
+237 -34
View File
@@ -1,21 +1,44 @@
#!/usr/bin/env bash
# gstack-settings-hook — add/remove SessionStart hooks in Claude Code settings.json
# gstack-settings-hook — manage Claude Code hooks in ~/.claude/settings.json
#
# Usage:
# gstack-settings-hook add <hook-command> # add SessionStart hook
# gstack-settings-hook remove <hook-command> # remove SessionStart hook
# Two shapes:
#
# 1. Legacy (SessionStart only — used by setup --team and gstack-uninstall):
# gstack-settings-hook add <cmd> # adds SessionStart hook
# gstack-settings-hook remove <cmd> # removes matching SessionStart hook
#
# 2. Schema-aware (plan-tune cathedral T3 — supports PreToolUse + PostToolUse):
# gstack-settings-hook add-event --event <SessionStart|PreToolUse|PostToolUse> \
# --command <cmd> --source <tag> [--matcher <regex>] [--timeout <s>]
# gstack-settings-hook remove-source --source <tag>
# gstack-settings-hook diff-event --event ... --command ... --source ... [--matcher ...]
# gstack-settings-hook rollback # restore latest backup
# gstack-settings-hook list-sources # show all gstack-tagged hook entries
#
# Every add-event/remove-source writes a backup to ~/.claude/settings.json.bak.<ts>
# before mutating (Codex correction — silent settings.json mutation is wrong).
#
# Dedup: legacy `add`/`remove` dedupe by the historical `gstack-session-update`
# substring. Schema-aware `add-event` dedupes by (event, matcher, _gstack_source) so
# multiple gstack registrations (plan-tune, ...) don't collide.
#
# Requires: bun (already a gstack hard dependency)
# Writes atomically: .tmp + rename to prevent corruption on crash/disk-full.
set -euo pipefail
ACTION="${1:-}"
HOOK_CMD="${2:-}"
SETTINGS_FILE="${GSTACK_SETTINGS_FILE:-$HOME/.claude/settings.json}"
if [ -z "$ACTION" ] || [ -z "$HOOK_CMD" ]; then
echo "Usage: gstack-settings-hook {add|remove} <hook-command>" >&2
if [ -z "$ACTION" ]; then
cat <<EOF >&2
Usage:
gstack-settings-hook add <hook-command> # legacy SessionStart add
gstack-settings-hook remove <hook-command> # legacy SessionStart remove
gstack-settings-hook add-event --event <name> --command <cmd> --source <tag> [--matcher <re>] [--timeout <s>]
gstack-settings-hook remove-source --source <tag>
gstack-settings-hook diff-event --event <name> --command <cmd> --source <tag> [--matcher <re>] [--timeout <s>]
gstack-settings-hook rollback
gstack-settings-hook list-sources
EOF
exit 1
fi
@@ -24,59 +47,239 @@ if ! command -v bun >/dev/null 2>&1; then
exit 1
fi
backup_settings() {
if [ -f "$SETTINGS_FILE" ]; then
local ts
ts=$(date +%Y%m%d-%H%M%S)
cp "$SETTINGS_FILE" "$SETTINGS_FILE.bak.$ts"
echo "$SETTINGS_FILE.bak.$ts" > "$SETTINGS_FILE.bak-latest"
fi
}
# --- legacy SessionStart add/remove (backwards compat) -----------------
case "$ACTION" in
add)
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" GSTACK_HOOK_CMD="$HOOK_CMD" bun -e "
const fs = require('fs');
HOOK_CMD="${2:-}"
if [ -z "$HOOK_CMD" ]; then
echo "Usage: gstack-settings-hook add <hook-command>" >&2
exit 1
fi
backup_settings
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" GSTACK_HOOK_CMD="$HOOK_CMD" bun -e '
const fs = require("fs");
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
const hookCmd = process.env.GSTACK_HOOK_CMD;
let settings = {};
try { settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8')); } catch {}
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch {}
if (!settings.hooks) settings.hooks = {};
if (!settings.hooks.SessionStart) settings.hooks.SessionStart = [];
// Dedup: check if hook command already registered
const exists = settings.hooks.SessionStart.some(entry =>
entry.hooks && entry.hooks.some(h => h.command && h.command.includes('gstack-session-update'))
entry.hooks && entry.hooks.some(h => h.command && h.command.includes("gstack-session-update"))
);
if (!exists) {
settings.hooks.SessionStart.push({
hooks: [{ type: 'command', command: hookCmd }]
hooks: [{ type: "command", command: hookCmd }]
});
}
const tmp = settingsPath + '.tmp';
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + '\n');
const tmp = settingsPath + ".tmp";
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + "\n");
fs.renameSync(tmp, settingsPath);
" 2>/dev/null
' 2>/dev/null
;;
remove)
HOOK_CMD="${2:-}"
if [ -z "$HOOK_CMD" ]; then
echo "Usage: gstack-settings-hook remove <hook-command>" >&2
exit 1
fi
[ -f "$SETTINGS_FILE" ] || exit 1
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" bun -e "
const fs = require('fs');
backup_settings
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" bun -e '
const fs = require("fs");
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
let settings = {};
try { settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8')); } catch { process.exit(0); }
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch { process.exit(0); }
if (settings.hooks && settings.hooks.SessionStart) {
settings.hooks.SessionStart = settings.hooks.SessionStart.filter(entry =>
!(entry.hooks && entry.hooks.some(h => h.command && h.command.includes('gstack-session-update')))
!(entry.hooks && entry.hooks.some(h => h.command && h.command.includes("gstack-session-update")))
);
if (settings.hooks.SessionStart.length === 0) delete settings.hooks.SessionStart;
if (Object.keys(settings.hooks).length === 0) delete settings.hooks;
}
const tmp = settingsPath + '.tmp';
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + '\n');
const tmp = settingsPath + ".tmp";
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + "\n");
fs.renameSync(tmp, settingsPath);
" 2>/dev/null
' 2>/dev/null
;;
add-event|diff-event)
EVENT=""
COMMAND=""
SOURCE=""
MATCHER=""
TIMEOUT=""
shift
while [ $# -gt 0 ]; do
case "$1" in
--event) EVENT="$2"; shift 2 ;;
--command) COMMAND="$2"; shift 2 ;;
--source) SOURCE="$2"; shift 2 ;;
--matcher) MATCHER="$2"; shift 2 ;;
--timeout) TIMEOUT="$2"; shift 2 ;;
*) echo "unknown flag: $1" >&2; exit 1 ;;
esac
done
if [ -z "$EVENT" ] || [ -z "$COMMAND" ] || [ -z "$SOURCE" ]; then
echo "add-event/diff-event require --event, --command, --source" >&2
exit 1
fi
case "$EVENT" in
SessionStart|PreToolUse|PostToolUse|UserPromptSubmit|Stop|Notification) ;;
*) echo "invalid --event '$EVENT'; must be one of SessionStart|PreToolUse|PostToolUse|UserPromptSubmit|Stop|Notification" >&2; exit 1 ;;
esac
if [ "$ACTION" = "add-event" ]; then
backup_settings
fi
DIFF_ONLY=""
if [ "$ACTION" = "diff-event" ]; then DIFF_ONLY=1; fi
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" \
GSTACK_EVENT="$EVENT" \
GSTACK_COMMAND="$COMMAND" \
GSTACK_SOURCE="$SOURCE" \
GSTACK_MATCHER="$MATCHER" \
GSTACK_TIMEOUT="$TIMEOUT" \
GSTACK_DIFF_ONLY="$DIFF_ONLY" \
bun -e '
const fs = require("fs");
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
const event = process.env.GSTACK_EVENT;
const cmd = process.env.GSTACK_COMMAND;
const source = process.env.GSTACK_SOURCE;
const matcher = process.env.GSTACK_MATCHER || "";
const timeoutRaw = process.env.GSTACK_TIMEOUT || "";
const diffOnly = process.env.GSTACK_DIFF_ONLY === "1";
let settings = {};
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch {}
const before = JSON.stringify(settings, null, 2);
if (!settings.hooks) settings.hooks = {};
if (!settings.hooks[event]) settings.hooks[event] = [];
const matchesEntry = (entry) => {
const sameMatcher = (entry.matcher || "") === matcher;
const sameSource = entry._gstack_source === source;
return sameMatcher && sameSource;
};
let existing = settings.hooks[event].find(matchesEntry);
const hookEntry = { type: "command", command: cmd };
if (timeoutRaw) {
const n = Number(timeoutRaw);
if (Number.isFinite(n) && n > 0) hookEntry.timeout = n;
}
if (existing) {
existing.hooks = [hookEntry];
} else {
const newEntry = { _gstack_source: source, hooks: [hookEntry] };
if (matcher) newEntry.matcher = matcher;
settings.hooks[event].push(newEntry);
}
const after = JSON.stringify(settings, null, 2);
if (diffOnly) {
console.log("--- BEFORE");
console.log(before);
console.log("--- AFTER");
console.log(after);
process.exit(0);
}
const tmp = settingsPath + ".tmp";
fs.writeFileSync(tmp, after + "\n");
fs.renameSync(tmp, settingsPath);
console.log("OK: " + event + " hook registered (source: " + source + ")");
'
;;
remove-source)
SOURCE=""
shift
while [ $# -gt 0 ]; do
case "$1" in
--source) SOURCE="$2"; shift 2 ;;
*) echo "unknown flag: $1" >&2; exit 1 ;;
esac
done
if [ -z "$SOURCE" ]; then
echo "remove-source requires --source <tag>" >&2
exit 1
fi
[ -f "$SETTINGS_FILE" ] || exit 0
backup_settings
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" GSTACK_SOURCE="$SOURCE" bun -e '
const fs = require("fs");
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
const source = process.env.GSTACK_SOURCE;
let settings = {};
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch { process.exit(0); }
if (!settings.hooks) { process.exit(0); }
let removed = 0;
for (const event of Object.keys(settings.hooks)) {
const before = settings.hooks[event].length;
settings.hooks[event] = settings.hooks[event].filter(entry => entry._gstack_source !== source);
removed += before - settings.hooks[event].length;
if (settings.hooks[event].length === 0) delete settings.hooks[event];
}
if (Object.keys(settings.hooks).length === 0) delete settings.hooks;
const tmp = settingsPath + ".tmp";
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + "\n");
fs.renameSync(tmp, settingsPath);
console.log("OK: removed " + removed + " hook entry/entries tagged source=" + source);
'
;;
rollback)
if [ ! -f "$SETTINGS_FILE.bak-latest" ]; then
echo "rollback: no backup pointer at $SETTINGS_FILE.bak-latest" >&2
exit 1
fi
LATEST=$(cat "$SETTINGS_FILE.bak-latest")
if [ ! -f "$LATEST" ]; then
echo "rollback: pointer references missing backup $LATEST" >&2
exit 1
fi
cp "$LATEST" "$SETTINGS_FILE"
echo "OK: restored $SETTINGS_FILE from $LATEST"
;;
list-sources)
[ -f "$SETTINGS_FILE" ] || { echo "(no settings file)"; exit 0; }
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" bun -e '
const fs = require("fs");
let settings = {};
try { settings = JSON.parse(fs.readFileSync(process.env.GSTACK_SETTINGS_PATH, "utf8")); } catch { process.exit(0); }
const hooks = settings.hooks || {};
let any = false;
for (const event of Object.keys(hooks)) {
for (const entry of hooks[event]) {
if (entry._gstack_source) {
any = true;
console.log(event + "\t" + entry._gstack_source + "\t" + (entry.matcher || "(no matcher)"));
}
}
}
if (!any) console.log("(no gstack-tagged hooks)");
'
;;
*)
echo "Unknown action: $ACTION (expected add or remove)" >&2
echo "Unknown action: $ACTION" >&2
exit 1
;;
esac
+8
View File
@@ -31,6 +31,14 @@ fi
# 3. Fallback to basename only when there's truly no git remote configured
SLUG="${SLUG:-$(basename "$PWD" | tr -cd 'a-zA-Z0-9._-')}"
# 3b. Re-sanitize unconditionally before the value is echoed into `eval`/`source`
# output. The compute (2) and fallback (3) paths already filter, but a value
# read straight from the cache file (1) does NOT — a poisoned
# ~/.gstack/slug-cache/<key> would otherwise inject shell into
# `eval "$(gstack-slug)"`. Filtering here honors the [a-zA-Z0-9._-] invariant
# promised in the header on every path, and heals a poisoned cache on write (4).
SLUG=$(printf '%s' "$SLUG" | tr -cd 'a-zA-Z0-9._-')
# 4. Cache the slug for future sessions (atomic write, fail silently)
if [[ -n "$SLUG" ]]; then
mkdir -p "$CACHE_DIR" 2>/dev/null || true
+7 -1
View File
@@ -107,7 +107,13 @@ BATCH="$BATCH]"
[ "$COUNT" -eq 0 ] && exit 0
# ─── POST to edge function ───────────────────────────────────
RESP_FILE="$(mktemp /tmp/gstack-sync-XXXXXX 2>/dev/null || echo "/tmp/gstack-sync-$$")"
# Create response file atomically. If mktemp fails, refuse to continue rather
# than fall back to a predictable $$-based path (race + overwrite footgun).
RESP_FILE="$(mktemp "${TMPDIR:-/tmp}/gstack-sync-XXXXXX")" || {
echo "gstack-telemetry-sync: mktemp failed — skipping this run" >&2
exit 0
}
trap 'rm -f "$RESP_FILE"' EXIT
HTTP_CODE="$(curl -s -w '%{http_code}' --max-time 10 \
-X POST "${SUPABASE_URL}/functions/v1/telemetry-ingest" \
-H "Content-Type: application/json" \
+6 -4
View File
@@ -29,11 +29,13 @@ if [ ! -f "$TIMELINE_FILE" ]; then
exit 0
fi
cat "$TIMELINE_FILE" 2>/dev/null | bun -e "
cat "$TIMELINE_FILE" 2>/dev/null | GSTACK_TIMELINE_SINCE="$SINCE" GSTACK_TIMELINE_BRANCH="$BRANCH" GSTACK_TIMELINE_LIMIT="$LIMIT" bun -e "
const lines = (await Bun.stdin.text()).trim().split('\n').filter(Boolean);
const since = '${SINCE}';
const branch = '${BRANCH}';
const limit = ${LIMIT};
const since = process.env.GSTACK_TIMELINE_SINCE || '';
const branch = process.env.GSTACK_TIMELINE_BRANCH || '';
const limitRaw = process.env.GSTACK_TIMELINE_LIMIT || '20';
const parsedLimit = Number.parseInt(limitRaw, 10);
const limit = Number.isSafeInteger(parsedLimit) && parsedLimit > 0 ? parsedLimit : 20;
let sinceMs = 0;
if (since) {
+4
View File
@@ -232,6 +232,10 @@ SETTINGS_HOOK="$(dirname "$0")/gstack-settings-hook"
SESSION_UPDATE="$(dirname "$0")/gstack-session-update"
if [ -x "$SETTINGS_HOOK" ]; then
"$SETTINGS_HOOK" remove "$SESSION_UPDATE" 2>/dev/null && REMOVED+=("SessionStart hook") || true
# Cathedral T8 cleanup: also remove plan-tune PreToolUse + PostToolUse hooks.
if "$SETTINGS_HOOK" remove-source --source plan-tune-cathedral 2>/dev/null | grep -q "removed [1-9]"; then
REMOVED+=("plan-tune cathedral hooks")
fi
fi
# ─── Remove global state ────────────────────────────────────
+212
View File
@@ -0,0 +1,212 @@
#!/usr/bin/env bun
// gstack-version-bump — deterministic version-state classifier + writer for /ship.
//
// Extracted from ship Step 12 prose (v2 plan T9, hybrid CLI extraction). The
// idempotency classification and the dual-write to VERSION + package.json are
// pure deterministic logic; running them as tested code removes the single
// worst /ship footgun — re-bumping an already-shipped branch — from prose the
// agent could skip or misread when the step lives in a lazy-loaded section.
//
// What STAYS agent judgment (NOT here): the bump-LEVEL decision (micro/patch vs
// minor/major, which may AskUserQuestion on feature signals) and the queue
// collision prompt. The slot pick itself is bin/gstack-next-version. This CLI
// only answers "what state am I in?" and "write this exact version".
//
// Subcommands:
// classify --base <branch> [--version-path <p>]
// Compares VERSION vs origin/<base>:VERSION vs package.json.version.
// Emits JSON: { state, baseVersion, currentVersion, pkgVersion, pkgExists }
// state ∈ FRESH | ALREADY_BUMPED | DRIFT_STALE_PKG | DRIFT_UNEXPECTED
// Exit 0 on a decidable state (incl. DRIFT_UNEXPECTED — it's a real state
// the caller must handle), exit 2 on bad args / unresolvable base.
//
// write --version <X.Y.Z.W> [--version-path <p>]
// Validates the 4-digit pattern, writes VERSION + package.json.version.
// Use for the FRESH bump (or an approved queue rebump). Exit 3 on a
// half-write (VERSION written, package.json failed) so the caller knows
// drift exists; the next classify() will report DRIFT_STALE_PKG.
//
// repair [--version-path <p>]
// DRIFT_STALE_PKG path: sync package.json.version to the current VERSION
// file. No bump. Validates the VERSION pattern first.
//
// Contract: classify NEVER writes. write/repair mutate VERSION + package.json
// only. No git mutation, no network. Mirrors gstack-next-version's reader/writer
// split so /ship composes them.
import { existsSync, readFileSync, writeFileSync } from "node:fs";
import { execFileSync } from "node:child_process";
import { join } from "node:path";
const VERSION_RE = /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/;
const DEFAULT = "0.0.0.0";
type State = "FRESH" | "ALREADY_BUMPED" | "DRIFT_STALE_PKG" | "DRIFT_UNEXPECTED";
function fail(msg: string, code = 2): never {
process.stderr.write(`gstack-version-bump: ${msg}\n`);
process.exit(code);
}
function argVal(args: string[], flag: string): string | undefined {
const i = args.indexOf(flag);
return i >= 0 && i + 1 < args.length ? args[i + 1] : undefined;
}
/** Resolve the VERSION file path: --version-path, else .gstack/version-path, else "VERSION". */
function resolveVersionPath(cwd: string, explicit?: string): string {
if (explicit) return join(cwd, explicit);
const pin = join(cwd, ".gstack", "version-path");
if (existsSync(pin)) {
const p = readFileSync(pin, "utf-8").trim();
if (p) return join(cwd, p);
}
return join(cwd, "VERSION");
}
function readVersionFile(p: string): string {
try {
const v = readFileSync(p, "utf-8").replace(/[\r\n\s]/g, "");
return v || DEFAULT;
} catch {
return DEFAULT;
}
}
/** package.json version + existence, parsed without spawning node. */
function readPkgVersion(cwd: string): { exists: boolean; version: string } {
const pkgPath = join(cwd, "package.json");
if (!existsSync(pkgPath)) return { exists: false, version: "" };
let raw: string;
try {
raw = readFileSync(pkgPath, "utf-8");
} catch {
return { exists: true, version: "" };
}
let parsed: unknown;
try {
parsed = JSON.parse(raw);
} catch {
fail("package.json is not valid JSON. Fix the file before re-running /ship.", 2);
}
const version = (parsed as { version?: unknown })?.version;
return { exists: true, version: typeof version === "string" ? version : "" };
}
function writePkgVersion(cwd: string, version: string): void {
const pkgPath = join(cwd, "package.json");
const raw = readFileSync(pkgPath, "utf-8");
const parsed = JSON.parse(raw) as Record<string, unknown>;
parsed.version = version;
writeFileSync(pkgPath, JSON.stringify(parsed, null, 2) + "\n");
}
function baseVersion(cwd: string, base: string, versionRel: string): string {
// Verify the base ref resolves, mirroring the Step 12 guard.
try {
execFileSync("git", ["rev-parse", "--verify", `origin/${base}`], { cwd, stdio: "ignore" });
} catch {
fail(`Unable to resolve origin/${base}. Run 'git fetch origin' or verify the base branch exists.`, 2);
}
try {
const out = execFileSync("git", ["show", `origin/${base}:${versionRel}`], { cwd }).toString();
const v = out.replace(/[\r\n\s]/g, "");
return v || DEFAULT;
} catch {
// VERSION absent on base (new repo / new file) → treat as 0.0.0.0.
return DEFAULT;
}
}
function classifyState(current: string, base: string, pkgExists: boolean, pkgVersion: string): State {
if (current === base) {
// VERSION unchanged vs base. A diverging package.json means someone hand-edited
// package.json bypassing /ship — unsafe to guess which is authoritative.
if (pkgExists && pkgVersion && pkgVersion !== current) return "DRIFT_UNEXPECTED";
return "FRESH";
}
// VERSION already moved past base.
if (pkgExists && pkgVersion && pkgVersion !== current) return "DRIFT_STALE_PKG";
return "ALREADY_BUMPED";
}
function cmdClassify(args: string[], cwd: string): void {
const base = argVal(args, "--base");
if (!base) fail("classify requires --base <branch>", 2);
const versionPath = resolveVersionPath(cwd, argVal(args, "--version-path"));
const versionRel = argVal(args, "--version-path") ?? "VERSION";
const current = readVersionFile(versionPath);
const baseV = baseVersion(cwd, base!, versionRel);
const pkg = readPkgVersion(cwd);
const state = classifyState(current, baseV, pkg.exists, pkg.version);
process.stdout.write(
JSON.stringify({
state,
baseVersion: baseV,
currentVersion: current,
pkgVersion: pkg.version || null,
pkgExists: pkg.exists,
}) + "\n",
);
// DRIFT_UNEXPECTED is a real, decidable state — the caller stops on it, but the
// classification itself succeeded, so exit 0. (Bad args / unresolvable base are
// the only exit-2 cases.)
}
function cmdWrite(args: string[], cwd: string): void {
const version = argVal(args, "--version");
if (!version) fail("write requires --version <X.Y.Z.W>", 2);
if (!VERSION_RE.test(version!)) {
fail(`NEW_VERSION (${version}) does not match MAJOR.MINOR.PATCH.MICRO. Aborting.`, 2);
}
const versionPath = resolveVersionPath(cwd, argVal(args, "--version-path"));
writeFileSync(versionPath, version + "\n");
if (existsSync(join(cwd, "package.json"))) {
try {
writePkgVersion(cwd, version!);
} catch {
fail(
"failed to update package.json. VERSION was written but package.json is now stale. " +
"Re-run — classify will report DRIFT_STALE_PKG and repair will sync it.",
3,
);
}
}
process.stdout.write(JSON.stringify({ wrote: version, packageJson: existsSync(join(cwd, "package.json")) }) + "\n");
}
function cmdRepair(args: string[], cwd: string): void {
const versionPath = resolveVersionPath(cwd, argVal(args, "--version-path"));
const current = readVersionFile(versionPath);
if (!VERSION_RE.test(current)) {
fail(
`VERSION file contents (${current}) do not match MAJOR.MINOR.PATCH.MICRO. ` +
"Refusing to propagate invalid semver into package.json. Fix VERSION, then re-run /ship.",
2,
);
}
if (!existsSync(join(cwd, "package.json"))) {
fail("repair: no package.json to sync.", 2);
}
try {
writePkgVersion(cwd, current);
} catch {
fail("drift repair failed — could not update package.json.", 3);
}
process.stdout.write(JSON.stringify({ repaired: current }) + "\n");
}
// Exported for unit tests (pure logic, no I/O).
export { classifyState, VERSION_RE, type State };
if (import.meta.main) {
const [sub, ...rest] = process.argv.slice(2);
const cwd = process.cwd();
switch (sub) {
case "classify": cmdClassify(rest, cwd); break;
case "write": cmdWrite(rest, cwd); break;
case "repair": cmdRepair(rest, cwd); break;
default:
fail("usage: gstack-version-bump <classify|write|repair> [flags]", 2);
}
}
+97 -16
View File
@@ -2,13 +2,7 @@
name: browse
preamble-tier: 1
version: 1.1.0
description: |
Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with
elements, verify page state, diff before/after actions, take annotated screenshots, check
responsive layouts, test forms and uploads, handle dialogs, and assert element states.
~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
user flow, or file a bug with evidence. Use when asked to "open in browser", "test the
site", "take a screenshot", or "dogfood this". (gstack)
description: Fast headless browser for QA testing and site dogfooding. (gstack)
triggers:
- browse a page
- headless browser
@@ -22,6 +16,16 @@ allowed-tools:
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs -->
## When to invoke this skill
Navigate any URL, interact with
elements, verify page state, diff before/after actions, take annotated screenshots, check
responsive layouts, test forms and uploads, handle dialogs, and assert element states.
~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
user flow, or file a bug with evidence. Use when asked to "open in browser", "test the
site", "take a screenshot", or "dogfood this".
## Preamble (run first)
```bash
@@ -42,6 +46,16 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
REPO_MODE=${REPO_MODE:-unknown}
echo "REPO_MODE: $REPO_MODE"
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
echo "SESSION_KIND: $_SESSION_KIND"
# Conductor host: AskUserQuestion is unreliable here (native disabled, MCP
# variant flaky), so skills render decisions as prose instead of calling the
# tool. Gated on !headless so an eval/CI run INSIDE Conductor (GSTACK_HEADLESS)
# still BLOCKs rather than rendering prose to nobody.
if [ "$_SESSION_KIND" != "headless" ] && { [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ]; }; then
echo "CONDUCTOR_SESSION: true"
fi
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
echo "LAKE_INTRO: $_LAKE_SEEN"
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
@@ -57,7 +71,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
echo "QUESTION_TUNING: $_QUESTION_TUNING"
mkdir -p ~/.gstack/analytics
if [ "$_TEL" != "off" ]; then
echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
fi
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
if [ -f "$_PF" ]; then
@@ -99,6 +113,19 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
# Claude Code exposes plan mode via system reminders; we detect best-effort
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
# fall back to "inactive". Codex hosts and Claude execution mode both end up
# inactive, which is the safe default (defaults to file+execute pipeline).
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
export GSTACK_PLAN_MODE="active"
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
export GSTACK_PLAN_MODE="active"
else
export GSTACK_PLAN_MODE="inactive"
fi
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
```
@@ -108,7 +135,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
## Skill Invocation During Plan Mode
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
@@ -143,7 +170,7 @@ touch ~/.gstack/.writing-style-prompted
Skip if `WRITING_STYLE_PENDING` is `no`.
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Lake** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Ocean** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
```bash
open https://garryslist.org/posts/boil-the-ocean
@@ -154,7 +181,7 @@ Only run `open` if yes. Always run `touch`.
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
Options:
- A) Help gstack get better! (recommended)
@@ -230,6 +257,7 @@ Key routing rules:
- Ship/deploy/PR → invoke /ship or /land-and-deploy
- Save progress → invoke /context-save
- Resume context → invoke /context-restore
- Author a backlog-ready spec/issue → invoke /spec
```
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
@@ -474,9 +502,7 @@ Replace `SKILL_NAME`, `OUTCOME`, and `USED_BROWSE` before running.
## Plan Status Footer
In plan mode before ExitPlanMode: if the plan file lacks `## GSTACK REVIEW REPORT`, run `~/.claude/skills/gstack/bin/gstack-review-read` and append the standard runs/status/findings table. With `NO_REVIEWS` or empty, append a 5-row placeholder with verdict "NO REVIEWS YET — run `/autoplan`". If a richer report exists, skip.
PLAN MODE EXCEPTION — always allowed (it's the plan file).
Skills that run plan reviews (`/plan-*-review`, `/codex review`) include the EXIT PLAN MODE GATE blocking checklist at the end of the skill, which verifies the plan file ends with `## GSTACK REVIEW REPORT` before ExitPlanMode is called. Skills that don't run plan reviews (operational skills like `/ship`, `/qa`, `/review`) typically don't operate in plan mode and have no review report to verify; this footer is a no-op for them. Writing the plan file is the one edit allowed in plan mode.
# browse: QA Testing & Dogfooding
@@ -625,6 +651,51 @@ $B screenshot /tmp/out.png --selector .tweet-card
```
Scale must be 1-3 (gstack policy cap). Changing `--scale` recreates the browser context; refs from `snapshot` are invalidated (rerun `snapshot`), but `load-html` content is replayed automatically. Not supported in headed mode.
### 14. Offline render mode (rasterize your own HTML/JSON, zero network)
This is the blessed path for "I just want to turn my own local HTML or JSON into a
PNG/PDF/bytes on disk" — Excalidraw diagrams, tweet/quote cards, og-images,
report rasterization. It is **plain headless, shared Chromium, no proxy, no Xvfb,
no anti-bot stealth**. Default `$B` is already exactly this; you do not pass
`--headed` or `--proxy`. One Chromium per box, shared by every skill — **do not
`npm i puppeteer` and ship a second browser** (see the note under the cheatsheet).
Two output shapes, pick by what you have:
**A) Visual output → `screenshot --selector` (preferred).** If the thing you want
is a picture of something on the page, screenshot it. The PNG is written from the
browser process straight to disk — the image bytes never cross the CDP wire.
```bash
echo '<div id="card" style="width:400px;height:200px;background:#1da1f2;color:#fff;padding:20px">hi</div>' > /tmp/card.html
$B viewport 480x600 --scale 2
$B load-html /tmp/card.html
$B screenshot /tmp/card.png --selector '#card' # disk path — no megabytes over CDP
```
(Use the disk path, NOT `screenshot --base64` — base64 serializes the bytes back
through the command channel, which is the cost you're trying to avoid.)
**B) Bytes a function returns → `js --out` / `eval --out`.** When a library hands
you the result as a return value (a base64 data URL, a blob, computed JSON) rather
than painting a stable element — e.g. Excalidraw's export function returns a PNG
data URL — write the evaluate result straight to disk. `--out` decodes a
`data:*;base64,...` result to raw bytes automatically (pass `--raw` to write the
literal string). The payload is written by the daemon and never serialized back
out to the CLI/stdout.
```bash
# Load the render bundle, signal readiness, then render-to-file.
$B load-html /tmp/excalidraw-export.html # bundle sets window.__render + a #done flag
$B wait '#done' # deterministic ready handshake
$B js "window.__render(SCENE_JSON)" --out /tmp/diagram.png # data URL → decoded PNG on disk
```
`--out` is a WRITE: it needs the `write` scope and is never allowed over the
pair-agent tunnel (a remote agent can't write to your disk). Parent directories
are created; malformed base64 errors instead of writing corrupt bytes. Pick A when
you can (no CDP transfer at all); reach for B only when the bytes come back as a
return value.
## Puppeteer → browse cheatsheet
Migrating from Puppeteer? Here's the 1:1 mapping for the core workflow:
@@ -638,6 +709,8 @@ Migrating from Puppeteer? Here's the 1:1 mapping for the core workflow:
| `await (await page.$('.x')).screenshot({path})` | `$B screenshot <path> --selector .x` |
| `await page.screenshot({fullPage: true, path})` | `$B screenshot <path>` (full page default) |
| `await page.screenshot({clip: {x, y, w, h}, path})` | `$B screenshot <path> --clip x,y,w,h` |
| `const r = await page.evaluate(fn)` | `$B js "<expr>"` (result to stdout) |
| `fs.writeFileSync(out, Buffer.from(dataUrl.split(',')[1],'base64'))` | `$B js "<expr>" --out <file>` (data URL auto-decoded) |
Worked example (the tweet-renderer flow — Puppeteer → browse):
@@ -652,6 +725,13 @@ $B screenshot /tmp/out.png --selector .tweet-card
Aliases: typing `setcontent` or `set-content` routes to `load-html` automatically. Typing a typo (`load-htm`) returns `Did you mean 'load-html'?`.
**Don't bundle your own puppeteer/Chromium.** `browse` is the one shared Chromium
per box. Skills that need to rasterize local HTML/JSON (diagrams, cards, og-images)
should route through `browse``screenshot --selector` for visual output,
`load-html` + `js --out` for bytes a function returns — instead of
`npm i puppeteer` and downloading a second Chromium that drifts out of version sync.
One install to pin, one daemon's lifecycle to manage.
## User Handoff
When you hit something you can't handle in headless mode (CAPTCHA, complex auth, multi-factor
@@ -856,10 +936,10 @@ $B prettyscreenshot --cleanup --scroll-to ".pricing" --width 1440 ~/Desktop/hero
| `cookies` | All cookies as JSON |
| `css <sel> <prop>` | Computed CSS value |
| `dialog [--clear]` | Dialog messages |
| `eval <file>` | Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. |
| `eval <file> [--out <file>] [--raw]` | Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. With --out <file>, the result is written to disk (base64 data URL decoded to bytes unless --raw); --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel). |
| `inspect [selector] [--all] [--history]` | Deep CSS inspection via CDP — full rule cascade, box model, computed styles |
| `is <prop> <sel|@ref>` | State check on element. Valid <prop> values: visible, hidden, enabled, disabled, checked, editable, focused (case-sensitive). <sel> accepts a CSS selector OR an @ref token from a prior snapshot (e.g. @e3, @c1) — refs are interchangeable with selectors anywhere a selector is expected. |
| `js <expr>` | Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. |
| `js <expr> [--out <file>] [--raw]` | Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. With --out <file>, the result is written to disk instead of returned (a base64 data URL is decoded to raw bytes unless --raw is given) — ideal for rasterizing local renders to PNG without serializing megabytes back through the CLI. --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel). |
| `network [--clear]` | Network requests |
| `perf` | Page load timings |
| `storage | storage set <key> <value>` | Read both localStorage and sessionStorage as JSON. With "set <key> <value>", write to localStorage only (sessionStorage is read-only via this command — set it with `js sessionStorage.setItem(...)`). |
@@ -905,6 +985,7 @@ $B prettyscreenshot --cleanup --scroll-to ".pricing" --width 1440 ~/Desktop/hero
| `disconnect` | Disconnect headed browser, return to headless mode |
| `focus [@ref]` | Bring headed browser window to foreground (macOS) |
| `handoff [message]` | Open visible Chrome at current page for user takeover |
| `memory [--json]` | Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. JSON output with --json. |
| `restart` | Restart server |
| `resume` | Re-snapshot after user takeover, return control to AI |
| `state save|load <name>` | Save/load browser state (cookies + URLs) |
+54
View File
@@ -135,6 +135,51 @@ $B screenshot /tmp/out.png --selector .tweet-card
```
Scale must be 1-3 (gstack policy cap). Changing `--scale` recreates the browser context; refs from `snapshot` are invalidated (rerun `snapshot`), but `load-html` content is replayed automatically. Not supported in headed mode.
### 14. Offline render mode (rasterize your own HTML/JSON, zero network)
This is the blessed path for "I just want to turn my own local HTML or JSON into a
PNG/PDF/bytes on disk" — Excalidraw diagrams, tweet/quote cards, og-images,
report rasterization. It is **plain headless, shared Chromium, no proxy, no Xvfb,
no anti-bot stealth**. Default `$B` is already exactly this; you do not pass
`--headed` or `--proxy`. One Chromium per box, shared by every skill — **do not
`npm i puppeteer` and ship a second browser** (see the note under the cheatsheet).
Two output shapes, pick by what you have:
**A) Visual output → `screenshot --selector` (preferred).** If the thing you want
is a picture of something on the page, screenshot it. The PNG is written from the
browser process straight to disk — the image bytes never cross the CDP wire.
```bash
echo '<div id="card" style="width:400px;height:200px;background:#1da1f2;color:#fff;padding:20px">hi</div>' > /tmp/card.html
$B viewport 480x600 --scale 2
$B load-html /tmp/card.html
$B screenshot /tmp/card.png --selector '#card' # disk path — no megabytes over CDP
```
(Use the disk path, NOT `screenshot --base64` — base64 serializes the bytes back
through the command channel, which is the cost you're trying to avoid.)
**B) Bytes a function returns → `js --out` / `eval --out`.** When a library hands
you the result as a return value (a base64 data URL, a blob, computed JSON) rather
than painting a stable element — e.g. Excalidraw's export function returns a PNG
data URL — write the evaluate result straight to disk. `--out` decodes a
`data:*;base64,...` result to raw bytes automatically (pass `--raw` to write the
literal string). The payload is written by the daemon and never serialized back
out to the CLI/stdout.
```bash
# Load the render bundle, signal readiness, then render-to-file.
$B load-html /tmp/excalidraw-export.html # bundle sets window.__render + a #done flag
$B wait '#done' # deterministic ready handshake
$B js "window.__render(SCENE_JSON)" --out /tmp/diagram.png # data URL → decoded PNG on disk
```
`--out` is a WRITE: it needs the `write` scope and is never allowed over the
pair-agent tunnel (a remote agent can't write to your disk). Parent directories
are created; malformed base64 errors instead of writing corrupt bytes. Pick A when
you can (no CDP transfer at all); reach for B only when the bytes come back as a
return value.
## Puppeteer → browse cheatsheet
Migrating from Puppeteer? Here's the 1:1 mapping for the core workflow:
@@ -148,6 +193,8 @@ Migrating from Puppeteer? Here's the 1:1 mapping for the core workflow:
| `await (await page.$('.x')).screenshot({path})` | `$B screenshot <path> --selector .x` |
| `await page.screenshot({fullPage: true, path})` | `$B screenshot <path>` (full page default) |
| `await page.screenshot({clip: {x, y, w, h}, path})` | `$B screenshot <path> --clip x,y,w,h` |
| `const r = await page.evaluate(fn)` | `$B js "<expr>"` (result to stdout) |
| `fs.writeFileSync(out, Buffer.from(dataUrl.split(',')[1],'base64'))` | `$B js "<expr>" --out <file>` (data URL auto-decoded) |
Worked example (the tweet-renderer flow — Puppeteer → browse):
@@ -162,6 +209,13 @@ $B screenshot /tmp/out.png --selector .tweet-card
Aliases: typing `setcontent` or `set-content` routes to `load-html` automatically. Typing a typo (`load-htm`) returns `Did you mean 'load-html'?`.
**Don't bundle your own puppeteer/Chromium.** `browse` is the one shared Chromium
per box. Skills that need to rasterize local HTML/JSON (diagrams, cards, og-images)
should route through `browse` — `screenshot --selector` for visual output,
`load-html` + `js --out` for bytes a function returns — instead of
`npm i puppeteer` and downloading a second Chromium that drifts out of version sync.
One install to pin, one daemon's lifecycle to manage.
## User Handoff
When you hit something you can't handle in headless mode (CAPTCHA, complex auth, multi-factor
+211 -21
View File
@@ -18,9 +18,12 @@
import { chromium, type Browser, type BrowserContext, type BrowserContextOptions, type Page, type Locator, type Cookie } from 'playwright';
import { writeSecureFile, mkdirSecure } from './file-permissions';
import { addConsoleEntry, addNetworkEntry, addDialogEntry, networkBuffer, type DialogEntry } from './buffers';
import { emitActivity } from './activity';
import { validateNavigationUrl } from './url-validation';
import { TabSession, type RefEntry } from './tab-session';
import { resolveChromiumProfile, cleanSingletonLocks } from './config';
import { withCdpSession } from './cdp-bridge';
import type { MemorySnapshot, MemoryStructureStats, MemoryTabSnapshot, MemoryProcess } from './memory-snapshot';
/**
* Detect whether GSTACK_CHROMIUM_PATH points at a custom Chromium build that
@@ -59,6 +62,13 @@ export function isCustomChromium(): boolean {
*/
export function shouldEnableChromiumSandbox(): boolean {
if (process.platform === 'win32') return false;
// Explicit user override for Ubuntu/AppArmor and similar environments where
// unprivileged Chromium sandboxing is blocked even for normal users (the
// sandbox needs unprivileged user namespaces that the host policy denies,
// so /qa hangs without --no-sandbox). Setting GSTACK_CHROMIUM_NO_SANDBOX=1
// forces the sandbox off without changing the default for everyone else.
// See #1562.
if (process.env.GSTACK_CHROMIUM_NO_SANDBOX === '1') return false;
const isRoot = typeof process.getuid === 'function' && process.getuid() === 0;
return !(process.env.CI || process.env.CONTAINER || isRoot);
}
@@ -187,11 +197,60 @@ export class BrowserManager {
private connectionMode: 'launched' | 'headed' = 'launched';
private intentionalDisconnect = false;
// ─── Tab Count Guardrail (D5 + Codex single-tab flag) ───────
// Idempotent threshold trackers: each guardrail fires exactly once per
// upward crossing of its threshold and re-arms when the tab count drops
// back below. Pre-guardrail, nothing tracked tab count growth and a
// user could accumulate hundreds of tabs (each holding 50300 MB of
// Chromium-side RSS) without warning until the OS OOM-killer fired.
// The toast UX lives in the sidebar (extension/sidepanel.js); the
// server-side responsibility is the audit-trail activity entry that
// appears in the activity feed even when the sidebar is closed.
private static readonly TAB_GUARDRAIL_SOFT = 50;
private static readonly TAB_GUARDRAIL_HARD = 200;
private tabGuardrailSoftHit = false;
private tabGuardrailHardHit = false;
/**
* Called from context.on('page') after a new tab is tracked. Emits at
* most one activity entry per upward crossing of each threshold.
*/
private checkTabGuardrails(): void {
const total = this.pages.size;
if (!this.tabGuardrailSoftHit && total >= BrowserManager.TAB_GUARDRAIL_SOFT) {
this.tabGuardrailSoftHit = true;
const msg = `Tab count crossed ${BrowserManager.TAB_GUARDRAIL_SOFT} (now ${total}). Consider closing unused tabs — each Chromium tab holds 50300 MB.`;
console.warn(`[browse] ${msg}`);
emitActivity({ type: 'error', command: 'tab-guardrail', error: msg, tabs: total });
}
if (!this.tabGuardrailHardHit && total >= BrowserManager.TAB_GUARDRAIL_HARD) {
this.tabGuardrailHardHit = true;
const msg = `Tab count crossed ${BrowserManager.TAB_GUARDRAIL_HARD} (now ${total}). OOM risk imminent. Open the sidebar to see top RAM consumers.`;
console.error(`[browse] ${msg}`);
emitActivity({ type: 'error', command: 'tab-guardrail', error: msg, tabs: total });
}
}
/** Called from page.on('close') so the guardrails re-arm. */
private recheckTabGuardrailsOnClose(): void {
const total = this.pages.size;
if (this.tabGuardrailSoftHit && total < BrowserManager.TAB_GUARDRAIL_SOFT) {
this.tabGuardrailSoftHit = false;
}
if (this.tabGuardrailHardHit && total < BrowserManager.TAB_GUARDRAIL_HARD) {
this.tabGuardrailHardHit = false;
}
}
// Called when the headed browser disconnects without intentional teardown
// (user closed the window). Wired up by server.ts to run full cleanup
// (sidebar-agent, state file, profile locks) before exiting with code 2.
// Returns void or a Promise; rejections are caught and fall back to exit(2).
public onDisconnect: (() => void | Promise<void>) | null = null;
// `exitCode` is the resolved process exit code from the disconnect cause:
// 0 on clean user-initiated quit (e.g., Cmd+Q on headed Chromium), 2 on
// crash/signal-kill. Callers (server.ts) forward it to their shutdown
// pipeline so process supervisors (gbrowser's gbd) read the right signal.
public onDisconnect: ((exitCode?: number) => void | Promise<void>) | null = null;
getConnectionMode(): 'launched' | 'headed' { return this.connectionMode; }
@@ -296,12 +355,16 @@ export class BrowserManager {
}
if (extensionsDir) {
launchArgs.push(
`--disable-extensions-except=${extensionsDir}`,
`--load-extension=${extensionsDir}`,
'--window-position=-9999,-9999',
'--window-size=1,1',
);
// Skip --load-extension when running against a custom Chromium build that
// already bakes the extension in (e.g., GBrowser / GStack Browser.app).
// Loading it twice causes a ServiceWorkerState::SetWorkerId DCHECK crash.
if (!isCustomChromium()) {
launchArgs.push(
`--disable-extensions-except=${extensionsDir}`,
`--load-extension=${extensionsDir}`,
);
}
launchArgs.push('--window-position=-9999,-9999', '--window-size=1,1');
useHeadless = false; // extensions require headed mode; off-screen window simulates headless
console.log(`[browse] Extensions loaded from: ${extensionsDir}`);
}
@@ -621,6 +684,7 @@ export class BrowserManager {
// Inject indicator on the new tab
page.evaluate(indicatorScript).catch(() => {});
console.log(`[browse] New tab detected (id=${id}, total=${this.pages.size})`);
this.checkTabGuardrails();
});
// Persistent context opens a default page — adopt it instead of creating a new one
@@ -666,7 +730,7 @@ export class BrowserManager {
return;
}
try {
const result = this.onDisconnect();
const result = this.onDisconnect(exitCode);
if (result && typeof (result as Promise<void>).catch === 'function') {
(result as Promise<void>).catch((err) => {
console.error('[browse] onDisconnect rejected:', err);
@@ -1005,6 +1069,116 @@ export class BrowserManager {
}
}
/**
* Diagnostic for `$B memory` and the /memory endpoint.
*
* Collects:
* - Bun process memory (cross-platform, accurate, no shelling).
* - Per-tab JS heap via CDP Performance.getMetrics the most portable
* per-tab signal CDP exposes. Misses native/GPU/Skia/cache memory
* (Codex flag on the eng-review; see follow-up TODO "native/GPU
* memory breakdown").
* - Chromium process tree via SystemInfo.getProcessInfo PID + type
* + CPU time. Per-process RSS is NOT exposed via CDP and the eng
* review (D2 USE_CDP) explicitly chose CDP over shelling to `ps`,
* so RSS columns are absent and `notes[]` says why.
*
* `structures` is passed in by the caller (read-commands / server) so
* browser-manager doesn't take a hard dep on every buffer-owning module.
*/
async getMemorySnapshot(structures: MemoryStructureStats): Promise<MemorySnapshot> {
const bunMem = process.memoryUsage();
const notes: string[] = [];
// Per-tab JS heap. Lazy: only the pages we already track. A target
// that died mid-snapshot is omitted, never throws.
const tabs: MemoryTabSnapshot[] = [];
for (const [id, page] of this.pages) {
try {
const url = (() => { try { return page.url(); } catch { return ''; } })();
const title = await page.title().catch(() => '');
const metrics = await withCdpSession(page, async (session) => {
await session.send('Performance.enable').catch(() => undefined);
const result = await session.send('Performance.getMetrics');
return ((result as { metrics?: Array<{ name: string; value: number }> }).metrics) ?? [];
});
const mm: Record<string, number> = {};
for (const m of metrics) mm[m.name] = m.value;
tabs.push({
id,
url,
title,
jsHeapUsed: mm.JSHeapUsedSize ?? 0,
jsHeapTotal: mm.JSHeapTotalSize ?? 0,
documents: mm.Documents ?? 0,
nodes: mm.Nodes ?? 0,
listeners: mm.JSEventListeners ?? 0,
});
} catch {
// Target died or CDP unavailable mid-snapshot — skip this tab.
}
}
// Chromium process tree. Browser handle may be on the `browser` field
// (launched mode) or accessible via `context.browser()` (persistent
// context / headed mode); try both.
let processes: MemoryProcess[] | null = null;
const browser: Browser | null = this.browser ?? (this.context ? this.context.browser() : null);
if (browser) {
try {
// `newBrowserCDPSession` is browser-wide. Not exposed on every
// Playwright TypeScript surface, but present at runtime on the
// Browser instance — use a typed cast to avoid the @ts-expect-error.
type BrowserWithCDP = Browser & {
newBrowserCDPSession?: () => Promise<{
send: (method: string, params?: unknown) => Promise<unknown>;
detach: () => Promise<void>;
}>;
};
const maybeFactory = (browser as BrowserWithCDP).newBrowserCDPSession;
if (typeof maybeFactory === 'function') {
const browserSession = await maybeFactory.call(browser);
try {
const info = (await browserSession.send('SystemInfo.getProcessInfo')) as {
processInfo?: Array<{ id: number; type: string; cpuTime: number }>;
};
processes = (info.processInfo ?? []).map((p) => ({
id: p.id,
type: p.type,
cpuTime: p.cpuTime,
}));
notes.push(
'Per-Chromium-process RSS not collected — SystemInfo.getProcessInfo exposes PID+type+CPU only. ' +
'See follow-up TODO "native/GPU memory breakdown" for the deferred fix.',
);
} finally {
await browserSession.detach().catch(() => undefined);
}
} else {
notes.push('Playwright build does not expose newBrowserCDPSession; per-process info skipped.');
}
} catch (err: any) {
notes.push(`CDP browser session unavailable: ${err?.message ?? String(err)}`);
}
} else {
notes.push('Browser handle unavailable (server connection mode); per-process info skipped.');
}
return {
bunServer: {
rss: bunMem.rss,
heapUsed: bunMem.heapUsed,
heapTotal: bunMem.heapTotal,
external: bunMem.external,
},
tabs,
processes,
structures,
capturedAt: Date.now(),
notes,
};
}
// ─── Ref Map (delegates to active session) ──────────────────
setRefMap(refs: Map<string, RefEntry>) {
this.getActiveSession().setRefMap(refs);
@@ -1533,6 +1707,7 @@ export class BrowserManager {
break;
}
}
this.recheckTabGuardrailsOnClose();
});
// Clear ref map on navigation — refs point to stale elements after page change
@@ -1601,23 +1776,38 @@ export class BrowserManager {
}
});
// Capture response sizes via response finished
// Capture response sizes via requestfinished — but DO NOT call
// response.body() here. Pre-fix, this listener materialized every
// response body across CDP just to read .length: multi-GB/hour of
// Buffer churn on long-lived headed Chromium with media-heavy
// pages, the primary Bun-side accelerant on the gbrowser-OOM
// investigation. req.sizes() pulls from the Network.loadingFinished
// event Chromium already emits — accurate for chunked transfer,
// gzip-compressed responses, and streaming media, all the cases
// where the previous Content-Length-header approach would have
// missed the size.
//
// The "single context-level CDP listener" architecture (D10's
// stretch goal — would reduce per-page listener count from N to 1
// via Target.setAutoAttach) is deferred. TODOS.md tracks it.
page.on('requestfinished', async (req) => {
try {
const res = await req.response();
if (res) {
const url = req.url();
const body = await res.body().catch(() => null);
const size = body ? body.length : 0;
for (let i = networkBuffer.length - 1; i >= 0; i--) {
const entry = networkBuffer.get(i);
if (entry && entry.url === url && !entry.size) {
networkBuffer.set(i, { ...entry, size });
break;
}
const sizes = await req.sizes().catch(() => null);
if (!sizes) return;
const url = req.url();
const size = sizes.responseBodySize ?? 0;
for (let i = networkBuffer.length - 1; i >= 0; i--) {
const entry = networkBuffer.get(i);
if (entry && entry.url === url && !entry.size) {
networkBuffer.set(i, { ...entry, size });
break;
}
}
} catch {}
} catch {
// Best-effort: requestfinished fires for aborted/cached requests too,
// where sizes() is unavailable. Missing size is acceptable; an
// unbounded throw would noise the console for every cache hit.
}
});
}
}
+75 -9
View File
@@ -25,18 +25,84 @@ import { logTelemetry } from './telemetry';
const CDP_TIMEOUT_MS = 5000;
const CDP_ACQUIRE_TIMEOUT_MS = 5000;
// Per-page CDPSession cache. Created lazily on first allow-listed call,
// cleaned up when the page closes.
// ─── CDP session lifecycle helpers ─────────────────────────────
//
// Every direct `newCDPSession(page)` call needs a matching `session.detach()`
// to release the Chromium-side CDP target. Forgetting the detach leaves the
// target attached until the underlying transport drops (often process exit),
// which on a long-lived headed browser shows up as steadily-climbing
// browser-process RSS. To make the leak class unforgettable, callers should
// go through one of these two helpers and a static-grep test
// (browse/test/cdp-session-cleanup.test.ts) fails CI if any source file
// calls `newCDPSession(` outside this module.
/**
* Ephemeral CDP session with try/finally detach. Use for one-shot CDP work
* where the caller doesn't need session reuse e.g. archive snapshots,
* `$B memory`, a single `Page.captureScreenshot`. The session is detached
* in `finally` regardless of whether `fn` threw, so the Chromium target
* doesn't leak on the error path.
*
* For repeated use of the same page (e.g. the `$B cdp` bridge or the
* inspector), use `getOrCreateCdpSession` instead it caches and detaches
* on page close.
*/
export async function withCdpSession<T>(
page: Page,
fn: (session: any) => Promise<T>,
): Promise<T> {
const session = await page.context().newCDPSession(page);
try {
return await fn(session);
} finally {
try {
await session.detach();
} catch {
// Best-effort cleanup. Session may already be detached (target closed,
// context recreated, browser disconnect). Swallowing all errors is the
// correct cleanup posture per CLAUDE.md "best-effort cleanup paths".
}
}
}
/**
* Cached long-lived CDP session keyed by Page. First call creates the
* session and registers a `page.once('close', ...)` hook that removes the
* cache entry AND calls `session.detach()`. Pre-helper code only removed
* the cache entry, leaving the Chromium-side target attached.
*
* Pass a caller-owned WeakMap so this helper doesn't impose a single global
* cache the `$B cdp` bridge and the inspector each keep their own session
* pool with different invariants (e.g. the inspector also detaches on
* `framenavigated` because DOM/CSS domain state is tied to the document).
*/
export async function getOrCreateCdpSession(
page: Page,
cache: WeakMap<Page, any>,
): Promise<any> {
let session = cache.get(page);
if (session) return session;
session = await page.context().newCDPSession(page);
cache.set(page, session);
page.once('close', () => {
cache.delete(page);
session.detach().catch(() => {
// Best-effort cleanup — see withCdpSession finally block.
});
});
return session;
}
// ─── $B cdp bridge ─────────────────────────────────────────────
// Per-page CDPSession cache. Lifecycle delegated to getOrCreateCdpSession
// which registers a close hook that BOTH removes the cache entry AND calls
// session.detach() — pre-helper code only did the former, leaving the
// Chromium-side target attached.
const sessionCache: WeakMap<Page, any> = new WeakMap();
async function getCdpSession(page: Page): Promise<any> {
let s = sessionCache.get(page);
if (s) return s;
s = await page.context().newCDPSession(page);
sessionCache.set(page, s);
// Clear cache on detach so we don't hold a stale handle.
page.once('close', () => sessionCache.delete(page));
return s;
return getOrCreateCdpSession(page, sessionCache);
}
export interface CdpDispatchInput {
+75 -9
View File
@@ -13,6 +13,7 @@
*/
import type { Page } from 'playwright';
import { getOrCreateCdpSession } from './cdp-bridge';
// ─── Types ──────────────────────────────────────────────────────
@@ -106,15 +107,23 @@ async function getOrCreateSession(page: Page): Promise<any> {
}
}
session = await page.context().newCDPSession(page);
cdpSessions.set(page, session);
session = await getOrCreateCdpSession(page, cdpSessions);
// Enable DOM and CSS domains
await session.send('DOM.enable');
await session.send('CSS.enable');
initializedPages.add(page);
// Enable DOM and CSS domains on first init for this page. The session
// itself is cached + close-detached by getOrCreateCdpSession; the
// initializedPages WeakSet is inspector-layer state that needs its
// own close hook to stay in sync.
if (!initializedPages.has(page)) {
await session.send('DOM.enable');
await session.send('CSS.enable');
initializedPages.add(page);
page.once('close', () => initializedPages.delete(page));
}
// Auto-detach on navigation
// Auto-detach on navigation — DOM/CSS domain state is tied to the
// document. Close-detach (from getOrCreateCdpSession) handles the
// tab-close case; framenavigated catches in-tab navigation that
// invalidates inspector state without closing the tab.
page.once('framenavigated', () => {
try {
session.detach().catch(() => {});
@@ -130,7 +139,41 @@ async function getOrCreateSession(page: Page): Promise<any> {
// ─── Modification History ───────────────────────────────────────
// Bounded FIFO of style modifications. Pre-cap, this was an unbounded
// module-scoped array that grew for every CSS edit made through $B css
// across the whole browser session — small per-entry footprint but no
// upper bound, the kind of slow leak that compounds over multi-day
// inspector use. The cap is 200 because per-session undo workflows
// rarely walk back more than a handful of edits, and a user who really
// wants to roll a long change back can `$B css reset` to revert all of
// them. totalPushed is monotonic across the session so undoModification
// can tell the user when their target index has been evicted, instead
// of just "no modification at index N".
const MOD_HISTORY_CAP = 200;
const modificationHistory: StyleModification[] = [];
let modHistoryTotalPushed = 0;
function pushModification(mod: StyleModification): void {
modificationHistory.push(mod);
modHistoryTotalPushed++;
while (modificationHistory.length > MOD_HISTORY_CAP) {
modificationHistory.shift();
}
}
// Test-only entry: exposes the history-cap mechanics (push, reset, cap value)
// without requiring a CDP-driven Page. Production code must go through
// modifyStyle / undoModification / resetModifications.
export const __testInternals = {
pushModification,
MOD_HISTORY_CAP,
getRawHistory: () => modificationHistory.slice(),
getTotalPushed: () => modHistoryTotalPushed,
resetForTest: () => {
modificationHistory.length = 0;
modHistoryTotalPushed = 0;
},
};
// ─── Specificity Calculation ────────────────────────────────────
@@ -559,7 +602,7 @@ export async function modifyStyle(
method,
};
modificationHistory.push(modification);
pushModification(modification);
return modification;
}
@@ -569,7 +612,12 @@ export async function modifyStyle(
export async function undoModification(page: Page, index?: number): Promise<void> {
const idx = index ?? modificationHistory.length - 1;
if (idx < 0 || idx >= modificationHistory.length) {
throw new Error(`No modification at index ${idx}. History has ${modificationHistory.length} entries.`);
const evictedNote = modHistoryTotalPushed > MOD_HISTORY_CAP
? ` (most recent ${MOD_HISTORY_CAP} only — ${modHistoryTotalPushed - MOD_HISTORY_CAP} earlier entries evicted at the cap)`
: '';
throw new Error(
`No modification at index ${idx}. History has ${modificationHistory.length} entries${evictedNote}.`,
);
}
const mod = modificationHistory[idx];
@@ -622,6 +670,23 @@ export function getModificationHistory(): StyleModification[] {
return [...modificationHistory];
}
/**
* Diagnostic accessor for the $B memory snapshot. Returns current buffer
* occupancy, the cap, and how many entries have been evicted since the
* last reset.
*/
export function getModificationHistoryStats(): {
current: number;
cap: number;
evicted: number;
} {
return {
current: modificationHistory.length,
cap: MOD_HISTORY_CAP,
evicted: Math.max(0, modHistoryTotalPushed - MOD_HISTORY_CAP),
};
}
/**
* Reset all modifications, restoring original values.
*/
@@ -648,6 +713,7 @@ export async function resetModifications(page: Page): Promise<void> {
}
}
modificationHistory.length = 0;
modHistoryTotalPushed = 0;
}
/**
+262 -92
View File
@@ -11,11 +11,13 @@
import * as fs from 'fs';
import * as path from 'path';
import { spawn as nodeSpawn } from 'child_process';
import { safeUnlink, safeUnlinkQuiet, safeKill, isProcessAlive } from './error-handling';
import { writeSecureFile, mkdirSecure } from './file-permissions';
import { resolveConfig, ensureStateDir, readVersionHash } from './config';
import { parseProxyConfig, computeConfigHash, ProxyConfigError } from './proxy-config';
import { redactProxyUrl } from './proxy-redact';
import { spawnTerminalAgent } from './terminal-agent-control';
const config = resolveConfig();
const IS_WINDOWS = process.platform === 'win32';
@@ -209,6 +211,86 @@ function cleanupLegacyState(): void {
}
}
// ─── Chromium profile lock helpers (#1781) ─────────────────────
/** Profile dir used by headed/connect Chromium sessions. */
function chromiumProfileDir(): string {
return path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile');
}
/** Remove Chromium SingletonLock/Socket/Cookie so a relaunch can acquire the
* profile. Safe to call when absent. */
function cleanChromiumProfileLocks(profileDir: string = chromiumProfileDir()): void {
for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) {
safeUnlinkQuiet(path.join(profileDir, lockFile));
}
}
/** Kill an orphaned Chromium that still holds the profile's SingletonLock. The
* lock symlink target is "hostname-PID"; killing that PID tears down its
* renderer tree so the next launch starts clean. No-op when absent/stale. */
async function killOrphanChromium(profileDir: string = chromiumProfileDir()): Promise<void> {
try {
const lockTarget = fs.readlinkSync(path.join(profileDir, 'SingletonLock')); // "hostname-12345"
const orphanPid = parseInt(lockTarget.split('-').pop() || '', 10);
if (orphanPid && isProcessAlive(orphanPid)) {
safeKill(orphanPid, 'SIGTERM');
await new Promise(r => setTimeout(r, 1000));
if (isProcessAlive(orphanPid)) {
safeKill(orphanPid, 'SIGKILL');
await new Promise(r => setTimeout(r, 500));
}
}
} catch (err: any) {
if (err?.code !== 'ENOENT' && err?.code !== 'EINVAL') throw err;
}
}
/** Bounded /health probe. Returns true if the server answers within `attempts`
* tries spaced `backoffMs` apart distinguishes a busy-but-alive daemon from a
* dead one (#1781) so a slow server isn't killed and restarted into a crash-loop. */
async function probeHealthWithBackoff(port: number, attempts = 3, backoffMs = 250): Promise<boolean> {
for (let i = 0; i < attempts; i++) {
if (await isServerHealthy(port)) return true;
if (i < attempts - 1) await Bun.sleep(backoffMs);
}
return false;
}
/**
* Build the env for an auto-restart after a crash. headed/proxy/configHash are
* reapplied from THIS invocation OR the persisted server state, so a restart
* triggered by a plain command (goto/status, no --headed flag) never silently
* downgrades a headed session to headless (#1781). Pure + exported for tests.
*/
export function buildRestartEnv(
globalFlags: GlobalFlags | null | undefined,
oldState: ServerState | null,
): Record<string, string> {
const env: Record<string, string> = {};
if (globalFlags?.proxyUrl) env.BROWSE_PROXY_URL = globalFlags.proxyUrl;
if (globalFlags?.headed || oldState?.mode === 'headed') env.BROWSE_HEADED = '1';
const configHash = globalFlags?.configHash || oldState?.configHash;
if (configHash) env.BROWSE_CONFIG_HASH = configHash;
return env;
}
/** macOS only: pull the headed Chromium window to the user's current Space.
* "Google Chrome for Testing" frequently opens behind the active window or on
* another Space the first thing users read as "I can't see the browser"
* (#1781). Best-effort, fire-and-forget, never throws. The app name is a fixed
* literal (no interpolation). */
function raiseHeadedWindowMacOS(): void {
if (process.platform !== 'darwin') return;
try {
nodeSpawn('osascript', ['-e', 'tell application "Google Chrome for Testing" to activate'], {
stdio: 'ignore',
detached: true,
}).unref();
} catch {
// osascript missing or app not present — non-fatal
}
}
// ─── Server Lifecycle ──────────────────────────────────────────
async function startServer(extraEnv?: Record<string, string>): Promise<ServerState> {
ensureStateDir(config);
@@ -217,7 +299,12 @@ async function startServer(extraEnv?: Record<string, string>): Promise<ServerSta
safeUnlink(config.stateFile);
safeUnlink(path.join(config.stateDir, 'browse-startup-error.log'));
let proc: any = null;
// #1781: clear a stale Chromium profile lock (and kill the orphan still
// holding it) before launch, so an auto-restart after an abrupt kill isn't
// blocked by the previous Chromium's SingletonLock — the self-inflicted
// crash-loop. Previously only the manual connect preamble did this.
await killOrphanChromium();
cleanChromiumProfileLocks();
// Allow the caller to opt out of the parent-process watchdog by setting
// BROWSE_PARENT_PID=0 in the environment. Useful for CI, non-interactive
@@ -240,12 +327,22 @@ async function startServer(extraEnv?: Record<string, string>): Promise<ServerSta
`${extraEnvStr})}).unref()`;
Bun.spawnSync(['node', '-e', launcherCode], { stdio: ['ignore', 'ignore', 'ignore'] });
} else {
// macOS/Linux: Bun.spawn + unref works correctly
proc = Bun.spawn(['bun', 'run', SERVER_SCRIPT], {
stdio: ['ignore', 'pipe', 'pipe'],
// macOS/Linux: Bun.spawn().unref() only removes the child from Bun's event
// loop — it does NOT call setsid(), so the spawned server stays in the
// parent's process session. When the CLI runs inside a session-managed
// shell (e.g. Claude Code's per-command Bash sandbox, Conductor, CI
// step runners), the session leader's exit sends SIGHUP to every PID in
// the session, killing the bun server (and its Chromium grandchildren).
// Even with BROWSE_PARENT_PID=0 disabling the watchdog, SIGHUP still
// reaps the server. Use Node's child_process.spawn with detached:true,
// which calls setsid() so the server becomes its own session leader
// (PPID=1, STAT=Ss) and survives the spawning shell's exit. Mirrors
// the Windows path's rationale — same root cause, different OS API.
nodeSpawn('bun', ['run', SERVER_SCRIPT], {
detached: true,
stdio: ['ignore', 'ignore', 'ignore'],
env: { ...process.env, BROWSE_STATE_FILE: config.stateFile, BROWSE_PARENT_PID: parentPid, ...extraEnv },
});
proc.unref();
}).unref();
}
// Wait for server to become healthy.
@@ -260,27 +357,17 @@ async function startServer(extraEnv?: Record<string, string>): Promise<ServerSta
await Bun.sleep(100);
}
// Server didn't start in time — try to get error details
if (proc?.stderr) {
// macOS/Linux: read stderr from the spawned process
const reader = proc.stderr.getReader();
const { value } = await reader.read();
if (value) {
const errText = new TextDecoder().decode(value);
throw new Error(`Server failed to start:\n${errText}`);
}
} else {
// Windows: check startup error log (server writes errors to disk since
// stderr is unavailable due to stdio: 'ignore' for detachment)
const errorLogPath = path.join(config.stateDir, 'browse-startup-error.log');
try {
const errorLog = fs.readFileSync(errorLogPath, 'utf-8').trim();
if (errorLog) {
throw new Error(`Server failed to start:\n${errorLog}`);
}
} catch (e: any) {
if (e.code !== 'ENOENT') throw e;
// Server didn't start in time — check the on-disk startup error log.
// Both platforms now spawn with stdio: 'ignore', so the server writes
// errors to disk for the CLI to read (see server.ts start().catch).
const errorLogPath = path.join(config.stateDir, 'browse-startup-error.log');
try {
const errorLog = fs.readFileSync(errorLogPath, 'utf-8').trim();
if (errorLog) {
throw new Error(`Server failed to start:\n${errorLog}`);
}
} catch (e: any) {
if (e.code !== 'ENOENT') throw e;
}
throw new Error(`Server failed to start within ${MAX_START_WAIT / 1000}s`);
}
@@ -486,26 +573,42 @@ async function sendCommand(state: ServerState, command: string, args: string[],
}
} catch (err: any) {
if (err.name === 'AbortError') {
console.error('[browse] Command timed out after 30s');
// #1781: a 30s timeout on a heavy page usually means busy, not dead.
// Don't kill a live server (that's what triggered the crash-loop) — report
// and exit so the user can retry rather than losing their (headed) window.
const ts = readState();
const alive = ts?.pid ? isProcessAlive(ts.pid) : false;
console.error(alive
? '[browse] Command timed out after 30s (server still alive — busy, not restarting). Retry, or raise load.'
: '[browse] Command timed out after 30s');
process.exit(1);
}
// Connection error — server may have crashed
// Connection error — server may have crashed, OR may just be busy.
if (err.code === 'ECONNREFUSED' || err.code === 'ECONNRESET' || err.message?.includes('fetch failed')) {
const oldState = readState();
// #1781 busy-vs-dead: a single-threaded daemon under beacon/extension load
// can briefly stop answering HTTP while still alive. Before declaring a
// crash, if the process is alive give /health a bounded chance to recover
// and just retry the command — never kill+restart a live-but-busy server.
if (oldState?.pid && isProcessAlive(oldState.pid) && await probeHealthWithBackoff(oldState.port)) {
if (retries >= 1) throw new Error('[browse] Server unresponsive after retry — aborting');
console.error('[browse] Server was briefly unresponsive (busy); retrying command...');
return sendCommand(oldState, command, args, retries + 1);
}
// Truly dead (or health never recovered) → restart.
if (retries >= 1) throw new Error('[browse] Server crashed twice in a row — aborting');
console.error('[browse] Server connection lost. Restarting...');
// Kill the old server to avoid orphaned chromium processes
const oldState = readState();
if (oldState && oldState.pid) {
await killServer(oldState.pid);
}
// Reapply --proxy / --headed flags from this invocation when restarting
// after a crash. Without this, a proxied daemon that dies mid-command
// would silently restart in default direct/headless mode and bypass
// the SOCKS bridge.
const restartEnv: Record<string, string> = {};
if (_globalFlags?.proxyUrl) restartEnv.BROWSE_PROXY_URL = _globalFlags.proxyUrl;
if (_globalFlags?.headed) restartEnv.BROWSE_HEADED = '1';
if (_globalFlags?.configHash) restartEnv.BROWSE_CONFIG_HASH = _globalFlags.configHash;
// startServer() now clears the Chromium SingletonLock + reaps the orphan,
// so the relaunch isn't blocked by the dead Chromium's profile lock (#1781).
//
// Reapply --proxy / --headed when restarting. headed comes from THIS
// invocation OR the persisted server mode, so a restart triggered by a
// plain command (goto/status, no --headed) never silently downgrades a
// headed session to headless (#1781). Same for proxy/configHash.
const restartEnv = buildRestartEnv(_globalFlags, oldState);
const newState = await startServer(Object.keys(restartEnv).length ? restartEnv : undefined);
return sendCommand(newState, command, args, retries + 1);
}
@@ -966,30 +1069,11 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
}
}
// Kill orphaned Chromium processes that may still hold the profile lock.
// The server PID is the Bun process; Chromium is a child that can outlive it
// if the server is killed abruptly (SIGKILL, crash, manual rm of state file).
const profileDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile');
try {
const singletonLock = path.join(profileDir, 'SingletonLock');
const lockTarget = fs.readlinkSync(singletonLock); // e.g. "hostname-12345"
const orphanPid = parseInt(lockTarget.split('-').pop() || '', 10);
if (orphanPid && isProcessAlive(orphanPid)) {
safeKill(orphanPid, 'SIGTERM');
await new Promise(resolve => setTimeout(resolve, 1000));
if (isProcessAlive(orphanPid)) {
safeKill(orphanPid, 'SIGKILL');
await new Promise(resolve => setTimeout(resolve, 500));
}
}
} catch (err: any) {
if (err?.code !== 'ENOENT' && err?.code !== 'EINVAL') throw err;
}
// Clean up Chromium profile locks (can persist after crashes)
for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) {
safeUnlinkQuiet(path.join(profileDir, lockFile));
}
// Kill an orphaned Chromium still holding the profile lock (the Bun server
// PID's Chromium child can outlive an abrupt kill/crash), then clear the
// lock files so the launch is clean. Shared with the auto-restart path (#1781).
await killOrphanChromium();
cleanChromiumProfileLocks();
// Delete stale state file
safeUnlinkQuiet(config.stateFile);
@@ -1027,38 +1111,29 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
});
const status = await resp.text();
console.log(`Connected to real Chrome\n${status}`);
// #1781: surface the window — it often opens behind/on another Space.
raiseHeadedWindowMacOS();
if (process.platform === 'darwin') {
console.log('(If you still don\'t see it, check Mission Control / other Spaces.)');
}
// sidebar-agent.ts spawn was here. Ripped alongside the chat queue —
// the Terminal pane runs an interactive PTY now, no more one-shot
// claude -p subprocesses to multiplex.
// Auto-start terminal agent (non-compiled bun process). Owns the PTY
// WebSocket for the sidebar Terminal pane.
let termAgentScript = path.resolve(__dirname, 'terminal-agent.ts');
if (!fs.existsSync(termAgentScript)) {
termAgentScript = path.resolve(path.dirname(process.execPath), '..', 'src', 'terminal-agent.ts');
}
// WebSocket for the sidebar Terminal pane. Routes through the shared
// spawnTerminalAgent helper so the CLI cold-start path and the
// server.ts watchdog respawn path share one implementation. The
// helper handles prior-PID cleanup, script lookup, and env wiring.
try {
if (fs.existsSync(termAgentScript)) {
// Kill old terminal-agents so a stale port file can't trick the
// server into routing /pty-session at a dead listener.
try {
const { spawnSync } = require('child_process');
spawnSync('pkill', ['-f', 'terminal-agent\\.ts'], { stdio: 'ignore', timeout: 3000 });
} catch (err: any) {
if (err?.code !== 'ENOENT') throw err;
}
const termProc = Bun.spawn(['bun', 'run', termAgentScript], {
cwd: config.projectDir,
env: {
...process.env,
BROWSE_STATE_FILE: config.stateFile,
BROWSE_SERVER_PORT: String(newState.port),
},
stdio: ['ignore', 'ignore', 'ignore'],
});
termProc.unref();
console.log(`[browse] Terminal agent started (PID: ${termProc.pid})`);
const newPid = spawnTerminalAgent({
stateFile: config.stateFile,
serverPort: newState.port,
cwd: config.projectDir,
});
if (newPid) {
console.log(`[browse] Terminal agent started (PID: ${newPid})`);
}
} catch (err: any) {
// Non-fatal: chat still works without the terminal agent.
@@ -1068,6 +1143,96 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
console.error(`[browse] Connect failed: ${err.message}`);
process.exit(1);
}
// ─── Outer Supervisor (v1.44+, opt-in) ──────────────────────────
//
// Default: fire-and-forget (CLI exits, server runs detached). This is
// the contract every existing call site relies on, including Claude
// Code's Bash tool which expects `$B connect` to return promptly.
//
// Opt-in via `--supervise` flag or BROWSE_SUPERVISE=1 env: the CLI
// stays attached, polls the spawned server's PID every 30s, and
// respawns it through the same headed-mode startServer path on
// unexpected exit. Crash-loop guard: 5 respawns inside 5 min →
// give up and exit 1 with a clear error. SIGINT / SIGTERM cleanly
// tear down the supervised server before exit.
//
// Out of scope for v1.44 minimum: routing the Chromium-disconnect
// exit-code-1 path back through this supervisor. The terminal-agent
// watchdog (T5) already covers the highest-frequency restart case;
// Chromium-crash-respawn is documented as a follow-up so the
// supervisor stays a tight, testable primitive.
const superviseRequested = commandArgs.includes('--supervise')
|| process.env.BROWSE_SUPERVISE === '1';
if (!superviseRequested) {
process.exit(0);
}
console.log('[browse] Supervisor mode: monitoring server. Ctrl-C to stop.');
let supervisorExiting = false;
const teardownAndExit = (signal: string) => {
if (supervisorExiting) return;
supervisorExiting = true;
console.log(`\n[browse] ${signal} received — stopping server.`);
const state = readState();
if (state?.pid && isProcessAlive(state.pid)) {
safeKill(state.pid, 'SIGTERM');
}
process.exit(0);
};
process.on('SIGINT', () => teardownAndExit('SIGINT'));
process.on('SIGTERM', () => teardownAndExit('SIGTERM'));
const SUPERVISOR_TICK_MS = parseInt(
process.env.GSTACK_SUPERVISOR_TICK_MS || '30000',
10,
);
const SUPERVISOR_GUARD_WINDOW_MS = 5 * 60_000;
const SUPERVISOR_GUARD_MAX = 5;
const SUPERVISOR_BACKOFF_MS = (process.env.GSTACK_SUPERVISOR_BACKOFF || '1000,2000,4000,8000,30000')
.split(',').map(s => parseInt(s.trim(), 10)).filter(n => Number.isFinite(n));
const respawns: number[] = [];
while (!supervisorExiting) {
await new Promise(resolve => setTimeout(resolve, SUPERVISOR_TICK_MS));
if (supervisorExiting) break;
const state = readState();
if (state?.pid && isProcessAlive(state.pid)) continue;
// Server died. Prune rolling window and check guard.
const now = Date.now();
while (respawns.length && now - respawns[0] > SUPERVISOR_GUARD_WINDOW_MS) {
respawns.shift();
}
if (respawns.length >= SUPERVISOR_GUARD_MAX) {
console.error(
`[browse] Supervisor: ${SUPERVISOR_GUARD_MAX} crashes in ${SUPERVISOR_GUARD_WINDOW_MS / 1000}s — giving up.`,
);
process.exit(1);
}
const attempt = respawns.length;
respawns.push(now);
const backoff = SUPERVISOR_BACKOFF_MS[Math.min(attempt, SUPERVISOR_BACKOFF_MS.length - 1)] ?? 30_000;
console.warn(`[browse] Supervisor: server PID gone — respawning in ${backoff}ms (attempt ${attempt + 1}/${SUPERVISOR_GUARD_MAX})...`);
await new Promise(resolve => setTimeout(resolve, backoff));
if (supervisorExiting) break;
try {
const respawned = await startServer(serverEnv);
console.log(`[browse] Supervisor: server respawned (PID ${respawned.pid}, port ${respawned.port}).`);
// Re-spawn the terminal-agent too; same env wiring as the initial connect.
try {
spawnTerminalAgent({
stateFile: config.stateFile,
serverPort: respawned.port,
cwd: config.projectDir,
});
} catch (err: any) {
console.warn(`[browse] Supervisor: terminal-agent respawn failed: ${err?.message || err}`);
}
} catch (err: any) {
console.error(`[browse] Supervisor: server respawn failed: ${err?.message || err}`);
// Let the next tick try again — the crash-loop guard already
// bounded the retries via the rolling window.
}
}
process.exit(0);
}
@@ -1118,11 +1283,11 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
safeKill(existingState.pid, 'SIGKILL');
}
}
// Clean profile locks and state file
const profileDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile');
for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) {
safeUnlinkQuiet(path.join(profileDir, lockFile));
}
// #1781: killing the daemon can orphan its Chromium child tree, which keeps
// holding the SingletonLock and makes the next `connect` fail to launch.
// Reap the orphan via the lock, then clear the lock files + state.
await killOrphanChromium();
cleanChromiumProfileLocks();
// Xvfb orphan cleanup: if the recorded PID still matches our Xvfb (by
// cmdline AND start-time), kill it. PID-only would risk killing a
// recycled PID belonging to an unrelated process.
@@ -1182,6 +1347,11 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
}
await sendCommand(state, command, commandArgs);
// #1781: `focus` means "show me the window". The server-side focus activates
// the page via CDP, but on macOS the app can still sit on another Space — pull
// it to the user's current Space too.
if (command === 'focus') raiseHeadedWindowMacOS();
}
if (import.meta.main) {
+4 -2
View File
@@ -45,6 +45,7 @@ export const META_COMMANDS = new Set([
'domain-skill',
'skill',
'cdp',
'memory',
]);
export const ALL_COMMANDS = new Set([...READ_COMMANDS, ...WRITE_COMMANDS, ...META_COMMANDS]);
@@ -89,6 +90,7 @@ export function wrapUntrustedContent(result: string, url: string): string {
export const COMMAND_DESCRIPTIONS: Record<string, { category: string; description: string; usage?: string }> = {
// Navigation
'memory': { category: 'Server', description: 'Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. JSON output with --json.', usage: 'memory [--json]' },
'goto': { category: 'Navigation', description: 'Navigate to URL (http://, https://, or file:// scoped to cwd/TEMP_DIR)', usage: 'goto <url>' },
'load-html': { category: 'Navigation', description: 'Load HTML via setContent. Accepts a file path under safe-dirs (validated), OR --from-file <payload.json> with {"html":"...","waitUntil":"..."} for large inline HTML (Windows argv safe).', usage: 'load-html <file> [--wait-until load|domcontentloaded|networkidle] [--tab-id <N>] | load-html --from-file <payload.json> [--tab-id <N>]' },
'back': { category: 'Navigation', description: 'History back' },
@@ -104,8 +106,8 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
'media': { category: 'Reading', description: 'All media elements (images, videos, audio) with URLs, dimensions, types', usage: 'media [--images|--videos|--audio] [selector]' },
'data': { category: 'Reading', description: 'Structured data: JSON-LD, Open Graph, Twitter Cards, meta tags', usage: 'data [--jsonld|--og|--meta|--twitter]' },
// Inspection
'js': { category: 'Inspection', description: 'Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file.', usage: 'js <expr>' },
'eval': { category: 'Inspection', description: 'Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners.', usage: 'eval <file>' },
'js': { category: 'Inspection', description: 'Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. With --out <file>, the result is written to disk instead of returned (a base64 data URL is decoded to raw bytes unless --raw is given) — ideal for rasterizing local renders to PNG without serializing megabytes back through the CLI. --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel).', usage: 'js <expr> [--out <file>] [--raw]' },
'eval': { category: 'Inspection', description: 'Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. With --out <file>, the result is written to disk (base64 data URL decoded to bytes unless --raw); --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel).', usage: 'eval <file> [--out <file>] [--raw]' },
'css': { category: 'Inspection', description: 'Computed CSS value', usage: 'css <sel> <prop>' },
'attrs': { category: 'Inspection', description: 'Element attributes as JSON', usage: 'attrs <sel|@ref>' },
'is': { category: 'Inspection', description: 'State check on element. Valid <prop> values: visible, hidden, enabled, disabled, checked, editable, focused (case-sensitive). <sel> accepts a CSS selector OR an @ref token from a prior snapshot (e.g. @e3, @c1) — refs are interchangeable with selectors anywhere a selector is expected.', usage: 'is <prop> <sel|@ref>' },
+44 -3
View File
@@ -5,7 +5,7 @@
* Outputs the absolute path to the browse binary on stdout, or exits 1 if not found.
*/
import { existsSync } from 'fs';
import { accessSync, constants } from 'fs';
import { join } from 'path';
import { homedir } from 'os';
@@ -24,6 +24,35 @@ function getGitRoot(): string | null {
}
}
// Probe a path for executability. accessSync(X_OK) checks the executable
// bit on Linux/macOS and degrades to an existence check on Windows (no
// true execute bit). Mirrors make-pdf/src/browseClient.ts:159 /
// make-pdf/src/pdftotext.ts:117.
function isExecutable(p: string): boolean {
try {
accessSync(p, constants.X_OK);
return true;
} catch {
return false;
}
}
// Resolve a bare binary path to the actual file on disk. On Windows, `bun
// build --compile` appends `.exe` to the output filename, so `browse` on
// disk is actually `browse.exe`. After a bare-path probe, try the Windows
// extensions. Linux/macOS behavior is unchanged. Mirrors the helper in
// make-pdf/src/browseClient.ts:89 and make-pdf/src/pdftotext.ts:52.
function findExecutable(base: string): string | null {
if (isExecutable(base)) return base;
if (process.platform === 'win32') {
for (const ext of ['.exe', '.cmd', '.bat']) {
const withExt = base + ext;
if (isExecutable(withExt)) return withExt;
}
}
return null;
}
export function locateBinary(): string | null {
const root = getGitRoot();
const home = homedir();
@@ -33,14 +62,26 @@ export function locateBinary(): string | null {
if (root) {
for (const m of markers) {
const local = join(root, m, 'skills', 'gstack', 'browse', 'dist', 'browse');
if (existsSync(local)) return local;
const found = findExecutable(local);
if (found) return found;
}
// Source-checkout fallback (no installed skill layout — the binary
// lives directly at <repo>/browse/dist/browse[.exe]). Hit by:
// - gstack repo dev workflow before `./setup` runs
// - the windows-setup-e2e.yml CI workflow which builds binaries
// in place but never installs them under a marker dir
// - make-pdf consumers running from a sibling source checkout
const sourceCheckout = join(root, 'browse', 'dist', 'browse');
const sourceFound = findExecutable(sourceCheckout);
if (sourceFound) return sourceFound;
}
// Global fallback
for (const m of markers) {
const global = join(home, m, 'skills', 'gstack', 'browse', 'dist', 'browse');
if (existsSync(global)) return global;
const found = findExecutable(global);
if (found) return found;
}
return null;
+78
View File
@@ -0,0 +1,78 @@
/**
* find-security-sidecar resolve the Node entry that runs the L4 ML
* classifier sidecar.
*
* The sidecar can't be bundled into the compiled browse binary because
* onnxruntime-node fails to dlopen from Bun's compile extract dir. It runs
* as a separate Node subprocess instead. This module resolves the right
* path + interpreter on each platform:
*
* 1. Prefer node on PATH + a bundled JS entry at
* browse/dist/security-sidecar.js (built by package.json's
* build:security-sidecar script).
* 2. Dev fallback: node + browse/src/security-sidecar-entry.ts via tsx
* (only available in the source checkout, not the compiled install).
* 3. If Node is missing or no entry resolves, return null. The /pty-inject-scan
* endpoint then responds with l4 { available: false } and the extension
* degrades to WARN+confirm (D7).
*/
import { existsSync } from "fs";
import { join, dirname } from "path";
import { execFileSync } from "child_process";
export interface SidecarLocation {
node: string;
entry: string;
/** "compiled" if running from browse/dist/, "dev" if running from src */
mode: "compiled" | "dev";
}
function nodeOnPath(): string | null {
try {
execFileSync("node", ["--version"], { stdio: "ignore", timeout: 2000 });
return "node";
} catch {
return null;
}
}
function browseRoot(): string {
// When running compiled, __dirname (via import.meta.dir) points at the
// Bun extract temp. Walk up until we find a directory containing
// browse/dist/ or browse/src/.
let candidate = dirname(import.meta.path || "");
for (let i = 0; i < 6; i += 1) {
if (existsSync(join(candidate, "browse", "dist", "security-sidecar.js"))) {
return candidate;
}
if (existsSync(join(candidate, "src", "security-sidecar-entry.ts"))) {
return candidate;
}
const next = dirname(candidate);
if (next === candidate) break;
candidate = next;
}
return process.cwd();
}
export function findSecuritySidecar(): SidecarLocation | null {
const node = nodeOnPath();
if (!node) return null;
const root = browseRoot();
const compiled = join(root, "browse", "dist", "security-sidecar.js");
if (existsSync(compiled)) {
return { node, entry: compiled, mode: "compiled" };
}
// Dev fallback. Compiled installs won't have src/ on disk so this only
// resolves when running from the source checkout.
const devEntry = join(root, "src", "security-sidecar-entry.ts");
if (existsSync(devEntry)) {
return { node, entry: devEntry, mode: "dev" };
}
return null;
}
+115
View File
@@ -0,0 +1,115 @@
// `$B memory` — diagnostic snapshot of Bun heap + per-tab JS heap +
// Chromium process tree + bounded buffer sizes. Lives in its own file
// because the meta-commands dispatcher imports it lazily — projects
// that never run the diagnostic don't pay the import-graph cost (CDP
// bridge, memory-snapshot types, buffer accessors).
import type { BrowserManager } from './browser-manager';
import { formatBytes, type MemorySnapshot, type MemoryStructureStats } from './memory-snapshot';
import { getModificationHistoryStats } from './cdp-inspector';
import { getSubscriberCount as getActivitySubscriberCount } from './activity';
import { getInspectorSubscriberCount } from './server';
import { consoleBuffer, networkBuffer, dialogBuffer } from './buffers';
import { getCaptureBuffer } from './network-capture';
/**
* Assemble the MemoryStructureStats from the modules that own each buffer.
* Browser-manager doesn't take a hard dep on every buffer-owning module
* the snapshot caller passes them in.
*/
function collectStructureStats(): MemoryStructureStats {
return {
modificationHistory: getModificationHistoryStats(),
activitySubscribers: getActivitySubscriberCount(),
inspectorSubscribers: getInspectorSubscriberCount(),
consoleBufferLen: consoleBuffer.length,
networkBufferLen: networkBuffer.length,
dialogBufferLen: dialogBuffer.length,
captureBufferBytes: getCaptureBuffer().byteSize,
};
}
/**
* Pretty-print the snapshot for terminal output. JSON mode (--json) goes
* straight through JSON.stringify so the extension footer and any test
* harness can consume it programmatically.
*/
function formatSnapshotText(s: MemorySnapshot): string {
const lines: string[] = [];
lines.push(
`Bun server: RSS: ${formatBytes(s.bunServer.rss)} ` +
`heap: ${formatBytes(s.bunServer.heapUsed)} / ${formatBytes(s.bunServer.heapTotal)} ` +
`external: ${formatBytes(s.bunServer.external)}`,
);
if (s.processes && s.processes.length > 0) {
// Group by type so the user sees "renderer: 12" vs listing 12 separate rows.
const byType: Record<string, number> = {};
for (const p of s.processes) byType[p.type] = (byType[p.type] ?? 0) + 1;
const typeSummary = Object.entries(byType)
.map(([t, n]) => `${t}=${n}`)
.join(' ');
lines.push(`Chromium processes: ${s.processes.length} total (${typeSummary})`);
} else if (s.processes === null) {
lines.push('Chromium processes: (unavailable — see notes)');
} else {
lines.push('Chromium processes: 0');
}
if (s.tabs.length > 0) {
// Sort by JS heap descending; show top 10 plus "...N more" tail.
const sorted = [...s.tabs].sort((a, b) => b.jsHeapUsed - a.jsHeapUsed);
const shown = sorted.slice(0, 10);
lines.push(`Renderers: ${s.tabs.length} tabs (top by JS heap):`);
for (const t of shown) {
const urlShort = t.url.length > 80 ? t.url.slice(0, 77) + '...' : t.url;
lines.push(
` [${formatBytes(t.jsHeapUsed).padStart(8)} JS, ` +
`${String(t.nodes).padStart(6)} nodes, ` +
`${String(t.listeners).padStart(5)} listeners] ` +
`tab #${t.id}${urlShort}`,
);
}
if (sorted.length > shown.length) {
lines.push(` ...and ${sorted.length - shown.length} more`);
}
} else {
lines.push('Renderers: (no tabs tracked)');
}
lines.push('─────────────────────────────────────────────────');
lines.push('In-memory structures (Bun side):');
const m = s.structures.modificationHistory;
lines.push(
` modificationHistory: ${m.current} / ${m.cap} entries` +
(m.evicted > 0 ? ` (${m.evicted} evicted since reset)` : ''),
);
lines.push(` inspectorSubscribers: ${s.structures.inspectorSubscribers}`);
lines.push(` activitySubscribers: ${s.structures.activitySubscribers}`);
lines.push(` consoleBuffer: ${s.structures.consoleBufferLen} entries`);
lines.push(` networkBuffer: ${s.structures.networkBufferLen} entries`);
lines.push(` dialogBuffer: ${s.structures.dialogBufferLen} entries`);
lines.push(` captureBuffer: ${formatBytes(s.structures.captureBufferBytes)}`);
if (s.notes.length > 0) {
lines.push('');
lines.push('Notes:');
for (const n of s.notes) lines.push(` - ${n}`);
}
return lines.join('\n');
}
export async function handleMemoryCommand(args: string[], bm: BrowserManager): Promise<string> {
const jsonMode = args.includes('--json');
const structures = collectStructureStats();
const snapshot = await bm.getMemorySnapshot(structures);
if (jsonMode) return JSON.stringify(snapshot);
return formatSnapshotText(snapshot);
}
/** Entry point used by the /memory HTTP endpoint — same data, always JSON. */
export async function buildMemorySnapshotJson(bm: BrowserManager): Promise<MemorySnapshot> {
const structures = collectStructureStats();
return bm.getMemorySnapshot(structures);
}
+73
View File
@@ -0,0 +1,73 @@
// Shared types for the $B memory diagnostic command and the /memory
// endpoint. Lives in its own module so server.ts, read-commands.ts, and
// the extension footer poll can import without taking a circular dep on
// browser-manager.ts.
//
// Background: the gbrowser-OOM investigation (160 GB Activity Monitor
// reading on a friend's machine) needed a diagnostic that could land
// before the next incident — measurement comes first, fixes come after.
// $B memory is that diagnostic.
/** Counts/bytes for the bounded in-memory structures on the Bun side. */
export interface MemoryStructureStats {
modificationHistory: { current: number; cap: number; evicted: number };
activitySubscribers: number;
inspectorSubscribers: number;
consoleBufferLen: number;
networkBufferLen: number;
dialogBufferLen: number;
captureBufferBytes: number;
}
/** Per-tab JS heap snapshot (CDP Performance.getMetrics). */
export interface MemoryTabSnapshot {
id: number;
url: string;
title: string;
jsHeapUsed: number;
jsHeapTotal: number;
documents: number;
nodes: number;
listeners: number;
}
/** Chromium process metadata via CDP SystemInfo.getProcessInfo. */
export interface MemoryProcess {
/** Chromium-internal process id (not OS PID). */
id: number;
/** 'browser' | 'renderer' | 'gpu' | 'utility' | 'extension' | ... */
type: string;
/** CPU time accumulated since process start (seconds). */
cpuTime: number;
}
export interface MemorySnapshot {
bunServer: {
rss: number;
heapUsed: number;
heapTotal: number;
external: number;
};
tabs: MemoryTabSnapshot[];
/**
* Chromium process tree. `null` when no browser handle is available
* (server in connection mode, or browser not yet launched).
*
* Per-process RSS is NOT included: SystemInfo.getProcessInfo returns
* id+type+cpuTime but Chromium does not expose RSS via CDP. The
* `notes[]` field tells the caller why see the follow-up TODO
* "native/GPU memory breakdown" for the deferred fix.
*/
processes: MemoryProcess[] | null;
structures: MemoryStructureStats;
capturedAt: number;
notes: string[];
}
/** Format bytes as a short human string ("1.4 GB", "312 MB", "84 KB"). */
export function formatBytes(n: number): string {
if (n < 1024) return `${n} B`;
if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`;
if (n < 1024 * 1024 * 1024) return `${(n / 1024 / 1024).toFixed(1)} MB`;
return `${(n / 1024 / 1024 / 1024).toFixed(2)} GB`;
}
+25 -2
View File
@@ -11,6 +11,7 @@ import { handleSkillCommand } from './browser-skill-commands';
import { validateNavigationUrl } from './url-validation';
import { checkScope, type TokenInfo } from './token-registry';
import { validateOutputPath, validateReadPath, SAFE_DIRECTORIES, escapeRegExp } from './path-security';
import { guardScreenshotBuffer, guardScreenshotPath } from './screenshot-size-guard';
// Re-export for backward compatibility (tests import from meta-commands)
export { validateOutputPath, escapeRegExp } from './path-security';
import * as Diff from 'diff';
@@ -136,7 +137,7 @@ function parsePdfArgs(args: string[]): ParsedPdfArgs {
return result;
}
function parsePdfFromFile(payloadPath: string): ParsedPdfArgs {
export function parsePdfFromFile(payloadPath: string): ParsedPdfArgs {
// Parity with load-html --from-file (browse/src/write-commands.ts) and
// the direct load-html <file> path: every caller-supplied file path
// must pass validateReadPath so the safe-dirs policy can't be skirted
@@ -149,7 +150,16 @@ function parsePdfFromFile(payloadPath: string): ParsedPdfArgs {
);
}
const raw = fs.readFileSync(payloadPath, 'utf8');
const json = JSON.parse(raw);
let json: any;
try {
json = JSON.parse(raw);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
throw new Error(`pdf: --from-file ${payloadPath} is not valid JSON (${msg}).`);
}
if (json === null || typeof json !== 'object' || Array.isArray(json)) {
throw new Error(`pdf: --from-file ${payloadPath} must be a JSON object, got ${Array.isArray(json) ? 'array' : typeof json}.`);
}
const out: ParsedPdfArgs = {
output: json.output || `${TEMP_DIR}/browse-page.pdf`,
format: json.format,
@@ -497,6 +507,10 @@ export async function handleMetaCommand(
buffer = await page.screenshot({ clip: clipRect });
} else {
buffer = await page.screenshot({ fullPage: !viewportOnly });
// Guard the most common API-bricking case (fullPage). Element /
// clip captures usually stay within the cap; we still guard the
// path-mode below for fullPage writes.
({ buffer } = await guardScreenshotBuffer(buffer));
}
if (buffer.length > 10 * 1024 * 1024) {
throw new Error('Screenshot too large for --base64 (>10MB). Use disk path instead.');
@@ -517,6 +531,7 @@ export async function handleMetaCommand(
}
await page.screenshot({ path: outputPath, fullPage: !viewportOnly });
if (!viewportOnly) await guardScreenshotPath(outputPath);
return `Screenshot saved${viewportOnly ? ' (viewport)' : ''}: ${outputPath}`;
}
@@ -567,6 +582,7 @@ export async function handleMetaCommand(
const screenshotPath = `${prefix}-${vp.name}.png`;
validateOutputPath(screenshotPath);
await page.screenshot({ path: screenshotPath, fullPage: true });
await guardScreenshotPath(screenshotPath);
results.push(`${vp.name} (${vp.width}x${vp.height}): ${screenshotPath}`);
}
@@ -1145,6 +1161,13 @@ export async function handleMetaCommand(
return await handleCdpCommand(args, bm);
}
case 'memory': {
// Lazy import — pulls in cdp-bridge + memory-snapshot + buffer accessors
// that aren't useful for projects that never run the diagnostic.
const { handleMemoryCommand } = await import('./memory-command');
return await handleMemoryCommand(args, bm);
}
default:
throw new Error(`Unknown meta command: ${command}`);
}
+137
View File
@@ -0,0 +1,137 @@
/**
* PTY session lease registry (v1.44+).
*
* Separates two concerns that pre-v1.44 were conflated under one token:
*
* - **sessionId** stable, non-secret identifier for a single PTY session.
* Safe to log, safe to include in URLs and server access logs, safe to
* keep in DevTools. Identifies "this terminal," not "you're allowed to
* use this terminal."
*
* - **attachToken** secret, short-lived (30 s) bearer credential that
* grants the WS upgrade for ONE attach attempt against a session. Minted
* on every /pty-session and /pty-session/reattach call; revoked when
* the WS upgrade consumes it. Kept out of logs.
*
* - **lease** server-side bookkeeping that maps sessionId expiresAt.
* Re-attach within the lease window resumes the same PTY (and replays
* the ring buffer from terminal-agent). Lease expiry tears down the
* session.
*
* Codex outside-voice (T1 of the eng review) pushed for this separation:
* "the auth token IS the session id" collapsed identity into a secret,
* meaning re-attach URLs and logs carry the bearer credential. The lease
* model fixes that without changing the user experience.
*
* Mint cadence:
* - Initial /pty-session: mint sessionId + lease + attachToken (one round trip).
* - /pty-session/reattach: validate sessionId/lease, mint fresh attachToken.
* - /pty-restart: revoke old lease, mint fresh sessionId + lease + attachToken.
* - /pty-dispose: revoke lease (and the terminal-agent disposes the PTY).
*
* Lease TTL is env-overridable so v1.44 e2e tests can compress detach
* windows to 1 s instead of waiting 30 minutes per assertion.
*/
import * as crypto from 'crypto';
interface Lease {
createdAt: number;
expiresAt: number;
}
const LEASE_TTL_MS = parseInt(
process.env.GSTACK_PTY_LEASE_TTL_MS || `${30 * 60 * 1000}`,
10,
); // 30 minutes default; covers idle-but-engaged user sessions
const MAX_LEASES = 10_000;
const leases = new Map<string, Lease>();
/**
* Mint a fresh sessionId + lease. Returns the non-secret sessionId and
* the expiry timestamp (caller surfaces both to the client). Never throws.
*/
export function mintLease(): { sessionId: string; expiresAt: number } {
const sessionId = crypto.randomBytes(32).toString('base64url');
const now = Date.now();
const expiresAt = now + LEASE_TTL_MS;
leases.set(sessionId, { createdAt: now, expiresAt });
pruneExpired(now);
return { sessionId, expiresAt };
}
/**
* Check whether a lease is still valid (exists AND not expired). Returns
* the current expiresAt for valid leases; null otherwise. Lazily prunes
* stale entries.
*/
export function validateLease(sessionId: string | null | undefined): { ok: true; expiresAt: number } | { ok: false } {
if (!sessionId) return { ok: false };
const lease = leases.get(sessionId);
if (!lease) {
pruneExpired(Date.now());
return { ok: false };
}
if (Date.now() > lease.expiresAt) {
leases.delete(sessionId);
pruneExpired(Date.now());
return { ok: false };
}
return { ok: true, expiresAt: lease.expiresAt };
}
/**
* Extend the lease's expiresAt to `now + LEASE_TTL_MS`. Caller should
* gate refresh on `expiresAt - now < REFRESH_THRESHOLD` (D10 lazy
* refresh: avoid refreshing on every keepalive when the lease is
* comfortably far from expiry).
*
* Returns `{ ok: true, expiresAt }` on success, `{ ok: false }` if the
* lease is unknown or already expired (the agent must close the WS and
* surface auth-invalid). Critical security invariant: never resurrect
* an expired lease the 30-min TTL is what bounds blast radius for a
* leaked attach token whose lease should have been GC'd.
*/
export function refreshLease(sessionId: string | null | undefined): { ok: true; expiresAt: number } | { ok: false } {
if (!sessionId) return { ok: false };
const lease = leases.get(sessionId);
if (!lease) return { ok: false };
const now = Date.now();
if (now > lease.expiresAt) {
leases.delete(sessionId);
return { ok: false };
}
lease.expiresAt = now + LEASE_TTL_MS;
return { ok: true, expiresAt: lease.expiresAt };
}
/**
* Drop a lease. Called on explicit dispose (/pty-dispose, /pty-restart,
* WS close with code 4001) and on session timeout in terminal-agent.
*/
export function revokeLease(sessionId: string | null | undefined): void {
if (!sessionId) return;
leases.delete(sessionId);
}
/** Returns the lease count — test + observability helper. */
export function leaseCount(): number {
return leases.size;
}
/** Test-only reset. */
export function __resetLeases(): void {
leases.clear();
}
function pruneExpired(now: number): void {
let checked = 0;
for (const [sessionId, lease] of leases) {
if (checked++ >= 20) break;
if (lease.expiresAt <= now) leases.delete(sessionId);
}
while (leases.size > MAX_LEASES) {
const first = leases.keys().next().value;
if (!first) break;
leases.delete(first);
}
}
+130 -7
View File
@@ -13,7 +13,7 @@ import * as fs from 'fs';
import * as path from 'path';
import { TEMP_DIR } from './platform';
import { inspectElement, formatInspectorResult, getModificationHistory } from './cdp-inspector';
import { validateReadPath } from './path-security';
import { validateReadPath, validateOutputPath } from './path-security';
import { stripLoneSurrogates } from './sanitize';
// Re-export for backward compatibility (tests import from read-commands)
export { validateReadPath } from './path-security';
@@ -46,6 +46,117 @@ function wrapForEvaluate(code: string): string {
: `(async()=>(${trimmed}))()`;
}
/** Flags split out of `js`/`eval` args by parseOutArgs. */
export interface OutArgs {
outPath?: string;
raw: boolean;
rest: string[];
}
/**
* Parse `--out <path>` / `--out=<path>` and `--raw` / `--raw=true|false` out of an
* arg list, returning the flags plus the remaining positional args (`rest`).
*
* Single source of truth shared by the js/eval handlers and the write-capability
* gate in server.ts, so the two never disagree on what counts as an `--out`
* invocation. Throws on malformed usage (repeated `--out`, missing value, bad
* `--raw` value) so the user gets a clear error instead of a silent misparse.
*/
export function parseOutArgs(args: string[]): OutArgs {
let outPath: string | undefined;
let raw = false;
const rest: string[] = [];
for (let i = 0; i < args.length; i++) {
const a = args[i];
if (a === '--out') {
if (outPath !== undefined) throw new Error('--out specified more than once');
const val = args[i + 1];
if (val === undefined || val.startsWith('--')) throw new Error('--out requires a file path');
outPath = val;
i++;
} else if (a.startsWith('--out=')) {
if (outPath !== undefined) throw new Error('--out specified more than once');
const val = a.slice('--out='.length);
if (val === '') throw new Error('--out requires a file path');
outPath = val;
} else if (a === '--raw') {
raw = true;
} else if (a.startsWith('--raw=')) {
const v = a.slice('--raw='.length).toLowerCase();
if (v !== 'true' && v !== 'false') throw new Error('--raw must be true or false');
raw = v === 'true';
} else {
rest.push(a);
}
}
return { outPath, raw, rest };
}
/**
* True iff an arg list contains an `--out` flag in any accepted form
* (`--out <path>` or `--out=<path>`). Used by the write-capability gate to
* decide whether an otherwise-read command (`js`/`eval`) is actually a write
* invocation. Mirrors parseOutArgs's `--out` recognition exactly. Never throws
* a malformed `--out=` still counts as an out attempt (fail safe: gate it).
*/
export function hasOutArg(args: string[]): boolean {
return args.some(a => a === '--out' || a.startsWith('--out='));
}
/**
* Convert an evaluate() result to its string form the exact conversion `js`/`eval`
* used inline before `--out` existed. Kept byte-for-byte: `typeof === 'object'`
* (which includes `null`) goes through JSON.stringify (so `null` `"null"`);
* everything else via `String(result ?? '')` (so `undefined` `''`). JSON.stringify
* still throws on circular / BigInt-bearing results, same as before.
*/
export function resultToString(result: unknown): string {
return typeof result === 'object'
? JSON.stringify(result, null, 2)
: String(result ?? '');
}
/**
* Write an evaluate result string to disk for `--out`, returning bytes written.
*
* When the result is a base64 data URL (`data:<type>;...;base64,<payload>`) and
* `raw` is false, decode the payload to raw bytes this is the Excalidraw / og-image
* path where a render function returns a PNG data URL. The header is parsed
* case-insensitively and split on the FIRST comma (data URLs can contain commas in
* the payload). The payload is validated against the base64 charset before decoding,
* because `Buffer.from(_, 'base64')` silently drops invalid characters and would
* otherwise write corrupted bytes. `--raw` forces a literal write even for data URLs.
*
* Non-base64 strings are surrogate-sanitized (matching what the stdout egress path
* did before) and written as UTF-8. Parent directories are created validateOutputPath
* gates the location but does not mkdir.
*/
export function writeEvalResult(outPath: string, str: string, opts: { raw: boolean }): number {
validateOutputPath(outPath);
fs.mkdirSync(path.dirname(path.resolve(outPath)), { recursive: true });
if (!opts.raw && str.startsWith('data:')) {
const comma = str.indexOf(',');
if (comma !== -1) {
const header = str.slice('data:'.length, comma);
const tokens = header.split(';').map(t => t.trim().toLowerCase());
if (tokens.includes('base64')) {
const payload = str.slice(comma + 1).replace(/\s+/g, '');
if (!/^[A-Za-z0-9+/]*={0,2}$/.test(payload)) {
throw new Error('--out: malformed base64 in data URL (decode would corrupt output)');
}
const buf = Buffer.from(payload, 'base64');
fs.writeFileSync(outPath, buf);
return buf.length;
}
}
}
const buf = Buffer.from(stripLoneSurrogates(str), 'utf-8');
fs.writeFileSync(outPath, buf);
return buf.length;
}
/**
* Extract clean text from a page (strips script/style/noscript/svg).
* Exported for DRY reuse in meta-commands (diff).
@@ -179,24 +290,36 @@ export async function handleReadCommand(
}
case 'js': {
const expr = args[0];
if (!expr) throw new Error('Usage: browse js <expression>');
const { outPath, raw, rest } = parseOutArgs(args);
const expr = rest[0];
if (!expr) throw new Error('Usage: browse js <expression> [--out <file>] [--raw]');
if (bm) assertJsOriginAllowed(bm, page.url());
const wrapped = wrapForEvaluate(expr);
const result = await target.evaluate(wrapped);
return typeof result === 'object' ? JSON.stringify(result, null, 2) : String(result ?? '');
const str = resultToString(result);
if (outPath) {
const n = writeEvalResult(outPath, str, { raw });
return `JS result written: ${outPath} (${n} bytes)`;
}
return str;
}
case 'eval': {
const filePath = args[0];
if (!filePath) throw new Error('Usage: browse eval <js-file>');
const { outPath, raw, rest } = parseOutArgs(args);
const filePath = rest[0];
if (!filePath) throw new Error('Usage: browse eval <js-file> [--out <file>] [--raw]');
if (bm) assertJsOriginAllowed(bm, page.url());
validateReadPath(filePath);
if (!fs.existsSync(filePath)) throw new Error(`File not found: ${filePath}`);
const code = fs.readFileSync(filePath, 'utf-8');
const wrapped = wrapForEvaluate(code);
const result = await target.evaluate(wrapped);
return typeof result === 'object' ? JSON.stringify(result, null, 2) : String(result ?? '');
const str = resultToString(result);
if (outPath) {
const n = writeEvalResult(outPath, str, { raw });
return `Eval result written: ${outPath} (${n} bytes)`;
}
return str;
}
case 'css': {
+106
View File
@@ -0,0 +1,106 @@
/**
* Screenshot size guard keep full-page screenshots 2000px max-dim.
*
* The Anthropic vision API rejects images whose longest dimension exceeds
* 2000 image-pixels (post deviceScaleFactor). Full-page screenshots of long
* pages routinely exceed that, silently bricking the session: the agent
* burns turns on a base64 blob that errors model-side with no useful
* stderr surfacing on the browse side.
*
* This module centralizes the "after page.screenshot, check dimensions and
* downscale if too big" path so every full-page caller in browse/src can
* share the same enforcement. The cap is image-pixels, not CSS pixels,
* matching the Anthropic API's own threshold.
*
* Used by: snapshot.ts (annotated, heatmap), meta-commands.ts (screenshot),
* write-commands.ts (prettyscreenshot). See test/snapshot-meta-write-guard.test.ts.
*
* Closes #1214.
*/
import { writeFileSync, readFileSync } from "fs";
const MAX_DIMENSION_PX = 2000;
export interface SizeGuardResult {
/** True if the input image exceeded MAX_DIMENSION_PX and was downscaled. */
resized: boolean;
/** Final width and height (pixels) of the image as written/returned. */
width: number;
height: number;
/** Original dimensions before any downscale. */
originalWidth: number;
originalHeight: number;
}
/**
* Inspect an image buffer and downscale if its longest side exceeds the
* 2000px Anthropic vision API cap. Preserves aspect ratio. Encodes back
* to PNG. Returns the resulting buffer plus a diagnostic shape.
*
* Imports sharp lazily so the module load cost only hits screenshot paths
* (sharp's native binding is non-trivial to initialize).
*/
export async function guardScreenshotBuffer(input: Buffer): Promise<{ buffer: Buffer; result: SizeGuardResult }> {
const sharpModule = await import("sharp");
const sharp = sharpModule.default ?? sharpModule;
const image = sharp(input);
const metadata = await image.metadata();
const width = metadata.width ?? 0;
const height = metadata.height ?? 0;
const longest = Math.max(width, height);
if (longest <= MAX_DIMENSION_PX) {
return {
buffer: input,
result: {
resized: false,
width,
height,
originalWidth: width,
originalHeight: height,
},
};
}
const scale = MAX_DIMENSION_PX / longest;
const newWidth = Math.round(width * scale);
const newHeight = Math.round(height * scale);
const resized = await image
.resize(newWidth, newHeight, { fit: "inside" })
.png()
.toBuffer();
process.stderr.write(
`[screenshot-size-guard] image ${width}x${height} exceeded ${MAX_DIMENSION_PX}px max-dim; ` +
`downscaled to ${newWidth}x${newHeight} to fit Anthropic vision API\n`,
);
return {
buffer: resized,
result: {
resized: true,
width: newWidth,
height: newHeight,
originalWidth: width,
originalHeight: height,
},
};
}
/**
* File-mode variant: read the image at the given path, downscale if
* needed, and write the result back to the same path. Returns the
* diagnostic shape. Use this after `await page.screenshot({ path, ... })`.
*/
export async function guardScreenshotPath(filePath: string): Promise<SizeGuardResult> {
const input = readFileSync(filePath);
const { buffer, result } = await guardScreenshotBuffer(input);
if (result.resized) {
writeFileSync(filePath, buffer);
}
return result;
}
export const SCREENSHOT_MAX_DIMENSION_PX = MAX_DIMENSION_PX;
+24 -10
View File
@@ -135,7 +135,7 @@ export function getClassifierStatus(): ClassifierStatus {
// ─── Model download + staging ────────────────────────────────
async function downloadFile(url: string, dest: string): Promise<void> {
export async function downloadFile(url: string, dest: string): Promise<void> {
const res = await fetch(url);
if (!res.ok || !res.body) {
throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
@@ -144,16 +144,30 @@ async function downloadFile(url: string, dest: string): Promise<void> {
const writer = fs.createWriteStream(tmp);
// @ts-ignore — Node stream compat
const reader = res.body.getReader();
let done = false;
while (!done) {
const chunk = await reader.read();
if (chunk.done) { done = true; break; }
writer.write(chunk.value);
try {
let done = false;
while (!done) {
const chunk = await reader.read();
if (chunk.done) { done = true; break; }
writer.write(chunk.value);
}
await new Promise<void>((resolve, reject) => {
writer.end((err?: Error | null) => (err ? reject(err) : resolve()));
});
fs.renameSync(tmp, dest);
} catch (err) {
// Drop the half-written tmp so we don't ship a truncated model file to
// a retry's renameSync. Wait for the writer to close fully before
// unlinking: Node's createWriteStream lazily opens the FD and flushes
// buffered writes during destroy(), so a naive unlinkSync hits ENOENT
// first and the writer re-creates the file on the next tick.
await new Promise<void>((resolve) => {
writer.once('close', () => resolve());
writer.destroy();
});
try { fs.unlinkSync(tmp); } catch { /* nothing to clean */ }
throw err;
}
await new Promise<void>((resolve, reject) => {
writer.end((err?: Error | null) => (err ? reject(err) : resolve()));
});
fs.renameSync(tmp, dest);
}
async function ensureTestsavantStaged(onProgress?: (msg: string) => void): Promise<void> {
+231
View File
@@ -0,0 +1,231 @@
/**
* Security sidecar client IPC layer for the Node L4 classifier subprocess.
*
* Spawn model: lazy. First call to scan() spawns the sidecar, warms it (the
* sidecar's loadTestsavant call on first scan-page-content), and reuses
* the same process for every subsequent scan. The process dies when the
* browse server exits (Node's stdin-close behavior).
*
* Reliability:
* - 5s default timeout per scan. Caller can override per-call.
* - 64KB request cap. Larger payloads short-circuit with `payload-too-large`.
* - Respawn capped at 3 failures within 10 minutes; further failures
* trip a circuit breaker that returns `available: false` until reset.
* - Parent-exit cleanup: process.on('exit') sends SIGTERM to the child.
*
* Failure semantics:
* - Node not on PATH available() returns false; caller (the
* /pty-inject-scan endpoint) returns l4: { available: false } and the
* extension degrades to WARN + user confirm.
* - Scan throws or times out caller treats as L4-unavailable for that
* request and falls through to L1-L3-only verdict.
*
* Single-process singleton. Multiple callers within the same browse
* process share one sidecar.
*/
import { ChildProcessByStdio, spawn } from "child_process";
import { Readable, Writable } from "stream";
import { findSecuritySidecar } from "./find-security-sidecar";
const REQUEST_CAP_BYTES = 64 * 1024;
const DEFAULT_TIMEOUT_MS = 5000;
const RESPAWN_WINDOW_MS = 10 * 60 * 1000;
const RESPAWN_LIMIT = 3;
interface PendingRequest {
resolve: (response: unknown) => void;
reject: (err: Error) => void;
timer: ReturnType<typeof setTimeout>;
}
interface SidecarState {
child: ChildProcessByStdio<Writable, Readable, Readable> | null;
pending: Map<string, PendingRequest>;
buffer: string;
failures: number[]; // timestamps of recent failures
available: boolean;
/** True after circuit-breaker tripped; stays true until reset() */
brokenCircuit: boolean;
nextId: number;
}
let state: SidecarState | null = null;
function getState(): SidecarState {
if (!state) {
state = {
child: null,
pending: new Map(),
buffer: "",
failures: [],
available: true,
brokenCircuit: false,
nextId: 1,
};
}
return state;
}
function recordFailure(): void {
const s = getState();
const now = Date.now();
s.failures = s.failures.filter((t) => now - t < RESPAWN_WINDOW_MS);
s.failures.push(now);
if (s.failures.length >= RESPAWN_LIMIT) {
s.brokenCircuit = true;
s.available = false;
}
}
function processBuffer(): void {
const s = getState();
let idx = s.buffer.indexOf("\n");
while (idx !== -1) {
const line = s.buffer.slice(0, idx).trim();
s.buffer = s.buffer.slice(idx + 1);
idx = s.buffer.indexOf("\n");
if (!line) continue;
let parsed: { id?: string; ok?: boolean; verdict?: unknown; status?: unknown; error?: string };
try {
parsed = JSON.parse(line);
} catch {
// Malformed line — record as failure but don't reject any specific
// pending request (we don't know which one this was meant for).
recordFailure();
continue;
}
const id = typeof parsed.id === "string" ? parsed.id : null;
if (!id) continue;
const pending = s.pending.get(id);
if (!pending) continue;
s.pending.delete(id);
clearTimeout(pending.timer);
if (parsed.ok) {
pending.resolve(parsed);
} else {
recordFailure();
pending.reject(new Error(parsed.error ?? "sidecar-error"));
}
}
}
function shutdownChild(): void {
const s = getState();
if (!s.child) return;
try {
s.child.kill("SIGTERM");
} catch {
// Already dead.
}
s.child = null;
for (const [, p] of s.pending) {
clearTimeout(p.timer);
p.reject(new Error("sidecar-died"));
}
s.pending.clear();
}
function spawnSidecar(): boolean {
const s = getState();
if (s.brokenCircuit) return false;
const location = findSecuritySidecar();
if (!location) {
s.available = false;
return false;
}
try {
const child = spawn(location.node, [location.entry], {
stdio: ["pipe", "pipe", "pipe"],
detached: false,
});
child.stdout.on("data", (chunk: Buffer) => {
s.buffer += chunk.toString("utf-8");
processBuffer();
});
child.on("exit", () => {
shutdownChild();
});
child.on("error", () => {
recordFailure();
shutdownChild();
});
s.child = child;
s.available = true;
return true;
} catch {
recordFailure();
return false;
}
}
// Best-effort parent-exit cleanup. Node's "exit" event blocks async work, so
// we send SIGTERM synchronously and let the OS reap the child.
process.on("exit", () => shutdownChild());
export interface SidecarAvailability {
available: boolean;
reason?: string;
}
export function isSidecarAvailable(): SidecarAvailability {
const s = getState();
if (s.brokenCircuit) return { available: false, reason: "circuit-broken" };
if (s.child) return { available: true };
// Probe via findSecuritySidecar without spawning. If the resolver returns
// null (no node on PATH, no entry on disk), we're permanently unavailable
// until a setup re-run.
const location = findSecuritySidecar();
if (!location) return { available: false, reason: "no-node-or-entry" };
return { available: true };
}
export async function scanWithSidecar(text: string, opts?: { timeoutMs?: number }): Promise<{ verdict: unknown }> {
const s = getState();
if (s.brokenCircuit) {
throw new Error("sidecar-circuit-broken");
}
if (Buffer.byteLength(text, "utf-8") > REQUEST_CAP_BYTES) {
throw new Error("payload-too-large");
}
if (!s.child) {
if (!spawnSidecar()) {
throw new Error("sidecar-spawn-failed");
}
}
const id = String(s.nextId++);
const timeoutMs = opts?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
s.pending.delete(id);
recordFailure();
reject(new Error("sidecar-timeout"));
}, timeoutMs);
s.pending.set(id, {
resolve: (response: unknown) => {
const r = response as { verdict?: unknown };
resolve({ verdict: r.verdict });
},
reject,
timer,
});
const payload = JSON.stringify({ id, op: "scan-page-content", text }) + "\n";
try {
s.child!.stdin.write(payload);
} catch (err) {
clearTimeout(timer);
s.pending.delete(id);
recordFailure();
reject(err instanceof Error ? err : new Error(String(err)));
}
});
}
/** Reset the circuit breaker. Test-only escape hatch. */
export function resetSidecarForTests(): void {
shutdownChild();
state = null;
}
+120
View File
@@ -0,0 +1,120 @@
/**
* Security sidecar entry Node script that hosts the L4 ML classifier on
* behalf of the compiled browse server.
*
* Why a sidecar:
* - browse/src/security-classifier.ts depends on @huggingface/transformers
* which loads onnxruntime-node, a native module that fails to `dlopen`
* from Bun's compile-binary temp extraction dir (CLAUDE.md "Sidebar
* security stack" section). Importing the classifier into server.ts
* would brick the compiled binary at startup.
* - sidebar-agent.ts (the previous host of the classifier) was removed
* when the PTY proved out. The classifier file still ships but had no
* caller exactly the gap codex flagged in #1370.
*
* This entry runs under plain Node (resolved by find-security-sidecar.ts).
* It reads NDJSON requests from stdin and writes NDJSON responses to stdout.
*
* Protocol (one JSON object per line, both directions):
* request: { id: string, op: "scan-page-content" | "ping", text?: string }
* response: { id: string, ok: true, verdict: LayerSignal } |
* { id: string, ok: false, error: string }
*
* Lifecycle:
* - Spawned lazily by security-sidecar-client.ts on first /pty-inject-scan
* - Exits when stdin closes (parent gone) standard Node behavior
* - Exits on SIGTERM cleanly
*
* Failure modes:
* - Model download fails reply { ok: false, error: "model-load" } and
* keep the loop alive for the next request (caller decides whether to
* retry or fail-safe to L1-L3-only)
*/
import * as readline from "readline";
import { scanPageContent, getClassifierStatus, loadTestsavant } from "./security-classifier";
interface Request {
id: string;
op: "scan-page-content" | "ping" | "status";
text?: string;
}
interface OkResponse {
id: string;
ok: true;
verdict?: unknown;
status?: unknown;
}
interface ErrResponse {
id: string;
ok: false;
error: string;
}
function write(obj: OkResponse | ErrResponse): void {
process.stdout.write(JSON.stringify(obj) + "\n");
}
async function handle(req: Request): Promise<void> {
if (!req || typeof req.id !== "string") {
// Drop unidentifiable requests silently — protocol invariant.
return;
}
try {
if (req.op === "ping") {
write({ id: req.id, ok: true, verdict: { layer: "ping", verdict: "alive", score: 0 } });
return;
}
if (req.op === "status") {
write({ id: req.id, ok: true, status: getClassifierStatus() });
return;
}
if (req.op === "scan-page-content") {
if (typeof req.text !== "string") {
write({ id: req.id, ok: false, error: "missing-text" });
return;
}
// Warm the classifier once per process; subsequent scans are fast.
await loadTestsavant().catch(() => {
// loadTestsavant degrades gracefully; scanPageContent below will
// return a fail-open verdict if the model never loaded.
});
const verdict = await scanPageContent(req.text);
write({ id: req.id, ok: true, verdict });
return;
}
write({ id: req.id, ok: false, error: `unknown-op:${(req as { op?: unknown }).op}` });
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
write({ id: req.id, ok: false, error: msg });
}
}
function main(): void {
// readline buffers stdin into one-line chunks. Stay alive until stdin
// closes (parent gone) — Node exits naturally then.
const rl = readline.createInterface({ input: process.stdin });
rl.on("line", (line) => {
if (!line.trim()) return;
let req: Request;
try {
req = JSON.parse(line) as Request;
} catch {
// Malformed line — write a generic error without an id, callers can
// detect via missing id and trip the circuit breaker.
write({ id: "<malformed>", ok: false, error: "malformed-json" });
return;
}
// Fire-and-forget; concurrent requests get id-correlated responses.
void handle(req);
});
rl.on("close", () => {
process.exit(0);
});
process.on("SIGTERM", () => process.exit(0));
process.on("SIGINT", () => process.exit(0));
}
main();
+750 -173
View File
File diff suppressed because it is too large Load Diff
+3
View File
@@ -23,6 +23,7 @@ import * as Diff from 'diff';
import { TEMP_DIR, isPathWithin } from './platform';
import { escapeEnvelopeSentinels } from './content-security';
import { stripLoneSurrogates } from './sanitize';
import { guardScreenshotPath } from './screenshot-size-guard';
// Roles considered "interactive" for the -i flag
const INTERACTIVE_ROLES = new Set([
@@ -418,6 +419,7 @@ export async function handleSnapshot(
}, boxes);
await page.screenshot({ path: screenshotPath, fullPage: true });
await guardScreenshotPath(screenshotPath);
// Always remove overlays
await page.evaluate(() => {
@@ -538,6 +540,7 @@ export async function handleSnapshot(
}, boxes);
await page.screenshot({ path: heatmapPath, fullPage: true });
await guardScreenshotPath(heatmapPath);
// Remove heatmap overlays
await page.evaluate(() => {
+154
View File
@@ -0,0 +1,154 @@
// SSE endpoint helper — shared cleanup contract for stream endpoints.
//
// Pre-helper, /activity/stream and /inspector/events implemented the same
// pattern in parallel and both leaked subscribers when enqueue failed
// without a corresponding abort signal (e.g. Chromium MV3 service-worker
// suspend dropped the TCP without an abort edge). The subscriber closure
// stayed in the Set, capturing the ReadableStreamDefaultController plus
// any payloads queued behind it. Over a multi-day sidebar session this
// compounded into multi-MB of retained controllers per dead connection.
//
// Centralizing the cleanup contract here means any future SSE endpoint
// inherits the invariant — cleanup runs on abort, enqueue failure, AND
// heartbeat failure, exactly once, regardless of which edge fires first.
import { stripLoneSurrogates } from './sanitize';
/**
* JSON.stringify replacer that strips lone UTF-16 surrogates from string
* values before they get escape-encoded. Pair with stringify when the
* consumer will JSON.parse the payload back into JS strings (SSE clients
* do this). Required at every SSE egress that ships page-content-derived
* fields see CLAUDE.md "Unicode sanitization at server egress".
*/
function sanitizeReplacer(_key: string, value: unknown): unknown {
return typeof value === 'string' ? stripLoneSurrogates(value) : value;
}
/** Send an SSE event. Handles JSON encoding + lone-surrogate sanitization. */
export type SseSender = (event: string, data: unknown) => void;
export interface SseEndpointConfig<T> {
/**
* Optional. Runs once after the stream opens, before subscribing for live
* events. Use for initial event replay (activity gap detection, history
* burst) or a current-state snapshot (inspector). The `send` helper
* handles JSON encoding with sanitizeReplacer and SSE framing; pass
* any event name and any payload object.
*/
initialReplay?: (send: SseSender) => void;
/**
* Subscribe to the live event source. Receives a `notify` callback;
* returns an unsubscribe function. The callback routes through the
* helper's safeEnqueue + cleanup-on-throw, so a dead consumer ends up
* removed from the subscriber set on the very next event (instead of
* waiting for an abort that may never fire).
*/
subscribe: (notify: (entry: T) => void) => () => void;
/**
* SSE event name for live events. `data: <JSON.stringify(entry)>\n\n`
* is wrapped automatically. /activity/stream uses 'activity';
* /inspector/events uses 'inspector'.
*/
liveEventName: string;
/** Heartbeat interval in ms. Default: 15000. */
heartbeatMs?: number;
}
/**
* Build a streaming Response that owns the cleanup contract:
* - safeEnqueue catches enqueue throws cleanup
* - 15s heartbeat catches dead peers; failure cleanup
* - req.signal abort cleanup
* - cleanup is idempotent (clearInterval + unsubscribe + try close)
*/
export function createSseEndpoint<T>(
req: Request,
config: SseEndpointConfig<T>,
): Response {
const heartbeatMs = config.heartbeatMs ?? 15000;
const encoder = new TextEncoder();
const stream = new ReadableStream({
start(controller) {
let cleanedUp = false;
let heartbeat: ReturnType<typeof setInterval> | null = null;
let unsubscribe: (() => void) | null = null;
const cleanup = (): void => {
if (cleanedUp) return;
cleanedUp = true;
if (heartbeat !== null) {
clearInterval(heartbeat);
heartbeat = null;
}
if (unsubscribe !== null) {
unsubscribe();
unsubscribe = null;
}
try {
controller.close();
} catch {
// Expected: stream already closed by the consumer.
}
};
const send: SseSender = (event, data) => {
if (cleanedUp) return;
try {
controller.enqueue(
encoder.encode(
`event: ${event}\ndata: ${JSON.stringify(data, sanitizeReplacer)}\n\n`,
),
);
} catch {
// Consumer disconnected mid-write. Tear down so this subscriber
// doesn't sit in the set forever.
cleanup();
}
};
// Initial replay (caller-provided).
if (config.initialReplay) {
try {
config.initialReplay(send);
} catch {
cleanup();
return;
}
if (cleanedUp) return;
}
// Subscribe for live events.
unsubscribe = config.subscribe((entry) => {
send(config.liveEventName, entry);
});
// Heartbeat keeps NAT boxes and proxies from dropping idle SSE,
// and serves as a liveness probe: an enqueue failure here is the
// cheapest way to learn the consumer is gone without waiting for
// an abort signal that may never arrive.
heartbeat = setInterval(() => {
if (cleanedUp) return;
try {
controller.enqueue(encoder.encode(`: heartbeat\n\n`));
} catch {
cleanup();
}
}, heartbeatMs);
req.signal.addEventListener('abort', cleanup);
},
});
return new Response(stream, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
},
});
}
+146 -3
View File
@@ -239,18 +239,156 @@ export function buildStealthScript(hw: HostProfile): string {
})();`;
}
/**
* Extended-mode init script six detection-vector patches. Applied
* AFTER the default mask, so the property-getter version remains in
* place if any of the deletion paths fail.
*
* Self-contained string so it can be passed to addInitScript({ content })
* without bundling concerns.
*/
export const EXTENDED_STEALTH_SCRIPT = `
(() => {
try {
// 1. Fully delete navigator.webdriver from the prototype so
// \`"webdriver" in navigator\` returns false (not just falsy).
delete Object.getPrototypeOf(navigator).webdriver;
} catch {}
try {
// 2. WebGL renderer spoof — SwiftShader is the canonical software-GPU
// tell. Spoof to a plausible Apple M1 Pro string.
const getParameter = WebGLRenderingContext.prototype.getParameter;
WebGLRenderingContext.prototype.getParameter = function (parameter) {
// UNMASKED_VENDOR_WEBGL (37445) → 'Apple Inc.'
if (parameter === 37445) return 'Apple Inc.';
// UNMASKED_RENDERER_WEBGL (37446) → realistic Apple silicon string
if (parameter === 37446) return 'Apple M1 Pro, OpenGL 4.1';
return getParameter.call(this, parameter);
};
} catch {}
try {
// 3. navigator.plugins: real PluginArray with MimeType objects.
const makePlugin = (name, filename, desc, mimes) => {
const p = Object.create(Plugin.prototype);
Object.defineProperties(p, {
name: { get: () => name },
filename: { get: () => filename },
description: { get: () => desc },
length: { get: () => mimes.length },
});
mimes.forEach((m, i) => { p[i] = m; });
p.item = (i) => mimes[i];
p.namedItem = (n) => mimes.find((m) => m.type === n);
return p;
};
const makeMime = (type, suffixes, desc) => {
const m = Object.create(MimeType.prototype);
Object.defineProperties(m, {
type: { get: () => type },
suffixes: { get: () => suffixes },
description: { get: () => desc },
});
return m;
};
const pdfMime = makeMime('application/pdf', 'pdf', '');
const cpdfMime = makeMime('application/x-google-chrome-pdf', 'pdf', 'Portable Document Format');
const plugins = [
makePlugin('PDF Viewer', 'internal-pdf-viewer', '', [pdfMime]),
makePlugin('Chrome PDF Viewer', 'internal-pdf-viewer', '', [cpdfMime]),
makePlugin('Chromium PDF Viewer', 'internal-pdf-viewer', '', [cpdfMime]),
];
Object.defineProperty(navigator, 'plugins', {
get: () => {
const arr = Object.create(PluginArray.prototype);
Object.defineProperty(arr, 'length', { get: () => plugins.length });
plugins.forEach((p, i) => { arr[i] = p; });
arr.item = (i) => plugins[i];
arr.namedItem = (n) => plugins.find((p) => p.name === n);
arr.refresh = () => {};
return arr;
},
});
} catch {}
try {
// 4. window.chrome shape — chrome.app + chrome.runtime + loadTimes/csi.
if (!window.chrome) {
window.chrome = {};
}
if (!window.chrome.runtime) {
window.chrome.runtime = { OnInstalledReason: {}, OnRestartRequiredReason: {} };
}
if (!window.chrome.app) {
window.chrome.app = {
isInstalled: false,
InstallState: { DISABLED: 'disabled', INSTALLED: 'installed', NOT_INSTALLED: 'not_installed' },
RunningState: { CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running' },
};
}
if (!window.chrome.loadTimes) {
window.chrome.loadTimes = function () {
return { commitLoadTime: Date.now() / 1000, finishLoadTime: Date.now() / 1000 };
};
}
if (!window.chrome.csi) {
window.chrome.csi = function () {
return { startE: Date.now(), onloadT: Date.now(), pageT: 0, tran: 15 };
};
}
} catch {}
try {
// 5. mediaDevices — some headless builds drop it entirely.
if (!navigator.mediaDevices) {
Object.defineProperty(navigator, 'mediaDevices', {
get: () => ({ enumerateDevices: () => Promise.resolve([]) }),
});
}
} catch {}
try {
// 6. CDP cdc_* property cleanup. Chromium under CDP sets cdc_*-prefixed
// globals (driver injection markers); a bot detector finds them by
// iterating window keys. Strip all matching keys.
for (const k of Object.keys(window)) {
if (k.startsWith('cdc_')) {
try { delete window[k]; } catch {}
}
}
} catch {}
})();
`;
function extendedModeEnabled(): boolean {
const v = process.env.GSTACK_STEALTH;
return v === 'extended' || v === '1' || v === 'true';
}
/**
* Apply stealth patches to a fresh BrowserContext (or persistent context).
* Called by browser-manager.launch() and launchHeaded().
*
* Resolves the host profile from process.env at call time so per-install
* Always applies the always-on Layer C stealth script (built from the
* per-install host profile) the consistency-first default. When
* GSTACK_STEALTH=extended is set, layers the opt-in EXTENDED_STEALTH_SCRIPT
* on top: its window.chrome.* patches are `if (!...)`-guarded, so Layer C's
* richer shapes win, while the extended-only additions (WebGL spoof, faked
* navigator.plugins, mediaDevices, cdc_* cleanup) apply on top. Extended
* mode actively LIES about the browser and can break sites that reflect on
* these properties, so it stays off by default.
*
* Host profile is resolved from process.env at call time so per-install
* values bake into the script before Playwright sends it to Chromium via
* Page.addScriptToEvaluateOnNewDocument.
*/
export async function applyStealth(context: BrowserContext): Promise<void> {
const hw = readHostProfile();
const script = buildStealthScript(hw);
await context.addInitScript({ content: script });
await context.addInitScript({ content: buildStealthScript(hw) });
if (extendedModeEnabled()) {
await context.addInitScript({ content: EXTENDED_STEALTH_SCRIPT });
}
}
/**
@@ -359,3 +497,8 @@ export const STEALTH_IGNORE_DEFAULT_ARGS = [
'--disable-component-update',
'--disable-default-apps',
];
/** Test-only helper: report whether extended mode is currently active. */
export function isExtendedStealthEnabled(): boolean {
return extendedModeEnabled();
}
+143
View File
@@ -0,0 +1,143 @@
/**
* terminal-agent process-control primitives shared by cli.ts spawn site,
* server.ts shutdown teardown, and the v1.44 watchdog/respawn loop.
*
* Why this exists: pre-v1.44 used `pkill -f terminal-agent\.ts`, which
* matches any process whose argv contains the string and would kill
* sibling gstack sessions on the same host. The agent now writes a
* structured `terminal-agent-pid` record (`{pid, gen, startedAt}`) and
* every kill site routes through `killAgentByRecord` here identity-based,
* no regex.
*
* The `gen` field is a per-boot generation counter. Loopback /internal/*
* calls from the parent server include `X-Browse-Gen` so a slow agent that
* the watchdog respawned around can't accidentally service a stale grant
* from the old generation.
*/
import * as fs from 'fs';
import * as path from 'path';
import { safeUnlink, safeKill, isProcessAlive } from './error-handling';
import { writeSecureFile, mkdirSecure } from './file-permissions';
/**
* Locate the terminal-agent script on disk. In dev (cli.ts running via
* `bun run`), it lives next to this file in browse/src. In a compiled
* binary, Bun's --compile bakes the source into the executable and
* exposes it relative to process.execPath. Either path must work or
* the agent can't be spawned at all.
*/
export function resolveTerminalAgentScript(searchHints: { metaDir?: string; execPath?: string } = {}): string | null {
const meta = searchHints.metaDir || __dirname;
const exec = searchHints.execPath || process.execPath;
const candidates = [
path.resolve(meta, 'terminal-agent.ts'),
path.resolve(path.dirname(exec), '..', 'src', 'terminal-agent.ts'),
];
for (const c of candidates) {
if (fs.existsSync(c)) return c;
}
return null;
}
/**
* Spawn a fresh terminal-agent as a detached child. Handles the standard
* three steps: kill any prior agent recorded at `<stateDir>/terminal-agent-pid`,
* clear the stale record, then `Bun.spawn(['bun', 'run', script], ...)` with
* env wiring. Returns the PID of the new agent on success, null when the
* agent script can't be located.
*
* Used by both the CLI cold-start path (cli.ts) and the v1.44 watchdog in
* server.ts. Centralizing here removes a copy-paste between them and means
* future spawn-env additions (e.g. BROWSE_OWNER_PID for the generation
* counter rollout) land in one place.
*/
export function spawnTerminalAgent(opts: {
stateFile: string;
serverPort: number;
cwd?: string;
/** Optional extra env vars to add to the agent's process env. */
extraEnv?: Record<string, string>;
/** Override script lookup for tests. */
scriptPath?: string;
}): number | null {
const stateDir = path.dirname(opts.stateFile);
const prior = readAgentRecord(stateDir);
if (prior) {
killAgentByRecord(prior, 'SIGTERM');
clearAgentRecord(stateDir);
}
const script = opts.scriptPath || resolveTerminalAgentScript();
if (!script || !fs.existsSync(script)) return null;
const proc = (Bun as any).spawn(['bun', 'run', script], {
cwd: opts.cwd || process.cwd(),
env: {
...process.env,
BROWSE_STATE_FILE: opts.stateFile,
BROWSE_SERVER_PORT: String(opts.serverPort),
...(opts.extraEnv || {}),
},
stdio: ['ignore', 'ignore', 'ignore'],
});
proc.unref?.();
return proc.pid ?? null;
}
export interface AgentRecord {
pid: number;
/** Random per-boot identifier. Loopback /internal/* sees X-Browse-Gen: <gen>. */
gen: string;
/** ms since epoch. Reserved for future PID-reuse guards. */
startedAt: number;
}
export function agentRecordPath(stateDir: string): string {
return path.join(stateDir, 'terminal-agent-pid');
}
/** Read the current record. Returns null on missing/malformed file. */
export function readAgentRecord(stateDir: string): AgentRecord | null {
try {
const raw = fs.readFileSync(agentRecordPath(stateDir), 'utf-8');
const j = JSON.parse(raw);
if (typeof j?.pid === 'number' && typeof j?.gen === 'string' && typeof j?.startedAt === 'number') {
return j as AgentRecord;
}
return null;
} catch {
return null;
}
}
/** Atomic write. Caller must ensure stateDir exists; agent does this at boot. */
export function writeAgentRecord(stateDir: string, record: AgentRecord): void {
try { mkdirSecure(stateDir); } catch {}
const target = agentRecordPath(stateDir);
const tmp = `${target}.tmp-${process.pid}`;
writeSecureFile(tmp, JSON.stringify(record));
fs.renameSync(tmp, target);
}
export function clearAgentRecord(stateDir: string): void {
safeUnlink(agentRecordPath(stateDir));
}
/**
* Kill the agent identified by `record`. Signal defaults to SIGTERM (give
* the agent a chance to run its own SIGTERM cleanup). Returns true if a
* signal was actually sent to a live PID; false if the PID was already
* dead (no-op). Never throws ESRCH is swallowed by safeKill.
*
* Validates liveness BEFORE signaling so a PID-reuse race (the recorded
* PID was reaped and a brand-new unrelated process now holds it) can't
* cause us to kill the wrong process. This is a best-effort defense:
* Linux/macOS don't expose process-start-time cheaply, and the gap
* between record-write and watchdog-tick is small (60s max).
*/
export function killAgentByRecord(
record: AgentRecord,
signal: NodeJS.Signals = 'SIGTERM',
): boolean {
if (!isProcessAlive(record.pid)) return false;
safeKill(record.pid, signal);
return true;
}
+509 -73
View File
@@ -25,16 +25,47 @@ import * as path from 'path';
import * as crypto from 'crypto';
import { writeSecureFile, mkdirSecure } from './file-permissions';
import { safeUnlink } from './error-handling';
import { writeAgentRecord, clearAgentRecord } from './terminal-agent-control';
const STATE_FILE = process.env.BROWSE_STATE_FILE || path.join(process.env.HOME || '/tmp', '.gstack', 'browse.json');
const PORT_FILE = path.join(path.dirname(STATE_FILE), 'terminal-port');
const BROWSE_SERVER_PORT = parseInt(process.env.BROWSE_SERVER_PORT || '0', 10);
const EXTENSION_ID = process.env.BROWSE_EXTENSION_ID || ''; // optional: tighten Origin check
const INTERNAL_TOKEN = crypto.randomBytes(32).toString('base64url'); // shared with parent server via env at spawn
/**
* Per-boot generation identifier. Loopback /internal/* callers include
* `X-Browse-Gen: <CURRENT_GEN>` so a slow agent the watchdog respawned
* around can't service a stale grant from the prior generation. Absent
* header means "legacy caller" and is accepted (backward compat); a
* present-but-mismatched header returns 409 stale generation.
*/
const CURRENT_GEN = crypto.randomBytes(16).toString('base64url');
// In-memory cookie token registry. Parent posts /internal/grant after
// /pty-session; we validate WS cookies against this set.
const validTokens = new Set<string>();
// In-memory attach-token registry. Parent posts /internal/grant after
// /pty-session; we validate WS upgrades against this map.
//
// v1.44+: each token is bound to a v1.44 sessionId (the stable, non-secret
// identifier from browse/src/pty-session-lease.ts). The token grants ONE
// attach for ONE session — re-attach within the lease window comes through
// /pty-session/reattach, which mints a fresh token for the same sessionId.
//
// Legacy callers can still pass `{token}` without sessionId (the value
// stays null and the WS upgrade still works); those callers don't get
// re-attach because there's no stable identifier to match against.
const validTokens = new Map<string, string | null>(); // token → sessionId
/**
* Reverse index for re-attach lookups: sessionId live PtySession.
* Populated when a WS first attaches with a known sessionId; cleared when
* the session is disposed or the lease expires. Used by:
* - /ws upgrade: if the incoming attachToken maps to a sessionId that
* already has a live session, REPLACE its ws ref instead of spawning.
* - /internal/restart: enumerate by sessionId, dispose that one session.
*
* Kept separate from the WeakMap<ws,PtySession> so re-attach can find the
* session by id even after the original ws has gone.
*/
const sessionsById = new Map<string, PtySession>();
// Active PTY session per WS. One terminal per connection. Codex finding #4:
// uncaught handlers below catch bugs in framing/cleanup so they don't kill
@@ -46,12 +77,154 @@ process.on('unhandledRejection', (reason) => {
console.error('[terminal-agent] unhandledRejection:', reason);
});
interface PtySession {
export interface PtySession {
proc: any | null; // Bun.Subprocess once spawned
cols: number;
rows: number;
cookie: string;
/**
* Current attached websocket. Swapped on re-attach (Commit 3): when a new
* WS upgrade matches this session's sessionId, the old liveWs is gone
* and the new ws takes its place. The PTY on-data callback closes over
* `session`, not the original `ws`, so it always writes to the current
* liveWs (or skips the write when detached and liveWs is null).
*/
liveWs: any | null;
/**
* v1.44+ stable session identifier (from pty-session-lease). Null for
* legacy /internal/grant callers that didn't pass one. Used for
* targeted /internal/restart and Commit 3 re-attach lookups.
*/
sessionId: string | null;
spawned: boolean;
/**
* 25s server-side WS keepalive interval (v1.44+). Set in the WS `open`
* handler, cleared in `close`. We send `{type:"ping",ts}` text frames so
* NAT boxes, proxies, and Chrome's MV3 panel-suspend heuristics see the
* connection as active; the client either replies with `{type:"pong"}`
* or fires its own 25s `{type:"keepalive"}` cycle. Either path keeps
* the underlying TCP from being silently dropped.
*/
pingInterval: ReturnType<typeof setInterval> | null;
/**
* Commit 3 scrollback ring buffer. Each PTY write appends a frame; the
* total byte count is capped at RING_BUFFER_MAX_BYTES with oldest frames
* evicted first. On re-attach, the surviving frames are replayed as a
* single binary frame (prefixed with the v1.44 reset sequence) so the
* user sees their last screen of output. Frame boundaries preserve UTF-8
* + ANSI-CSI boundaries because each frame is the exact buffer that
* spawnClaude's on-data callback emitted.
*/
ringBuffer: Buffer[];
ringBufferBytes: number;
/**
* Tracks whether the PTY is currently in xterm alt-screen mode. claude's
* TUI enters alt-screen (CSI ?1049h) during tool calls and exits (CSI
* ?1049l) when returning to the main prompt. On re-attach, the replay
* prelude must re-enter alt-screen if the original PTY left it active,
* otherwise the replay renders against the main screen and the cursor
* + colors end up in the wrong place.
*/
altScreenActive: boolean;
/**
* Detach state machine (Commit 3). When the WS closes for a reason OTHER
* than the v1.44 intentional-restart code (4001), we keep the PtySession
* alive for the detach window (default 60s) so a re-attach within the
* window can resume the same PTY and replay the ring buffer. The timer
* disposes the session if no re-attach arrives in time.
*/
detached: boolean;
detachTimer: ReturnType<typeof setTimeout> | null;
}
/**
* WS keepalive interval. 25s is comfortably under the lowest common NAT
* idle timeout (typically 30-60s) and shorter than Chromium's WebSocket
* dead-peer threshold. Test-overridable via env so the v1.44 e2e tests
* can compress idle-window assertions to <1s without waiting half a
* minute per assertion.
*/
const KEEPALIVE_INTERVAL_MS = parseInt(
process.env.GSTACK_PTY_KEEPALIVE_INTERVAL_MS || '25000',
10,
);
/**
* Commit 3 scrollback ring buffer cap. 1 MB is enough for a full screen
* of dense claude output (including a recent tool result), small enough
* that a worst-case 10 detached sessions only cost ~10 MB of RSS.
* Env-overridable so e2e tests can verify eviction without writing 1 MB
* of fixture data per assertion.
*/
const RING_BUFFER_MAX_BYTES = parseInt(
process.env.GSTACK_PTY_RING_BUFFER_BYTES || `${1024 * 1024}`,
10,
);
/**
* Commit 3 detach window how long to keep a session alive after WS
* close (with any code other than 4001 intentional-restart) so a
* re-attach can resume the same PTY. 60s is long enough to cover a
* Chrome MV3 service-worker suspend cycle, a wifi blip, or a brief
* laptop sleep; short enough that genuinely-closed sessions don't
* stack up unbounded.
*/
const DETACH_WINDOW_MS = parseInt(
process.env.GSTACK_PTY_DETACH_WINDOW_MS || '60000',
10,
);
/**
* Append a frame to a session's ring buffer, evicting oldest frames if
* the total byte count exceeds RING_BUFFER_MAX_BYTES. Eviction is at
* frame boundaries (one PTY write = one frame), so we never cut a
* multi-byte UTF-8 sequence or a partial ANSI CSI in half claude's
* on-data callback emits coherent frames.
*
* Side effect: scans the appended chunk for alt-screen enter/exit
* sequences (CSI ?1049h / CSI ?1049l) and updates session.altScreenActive
* so the re-attach prelude knows whether to re-enter alt-screen.
*/
export function appendToRingBuffer(session: PtySession, frame: Buffer): void {
session.ringBuffer.push(frame);
session.ringBufferBytes += frame.length;
while (session.ringBufferBytes > RING_BUFFER_MAX_BYTES && session.ringBuffer.length > 1) {
const evicted = session.ringBuffer.shift()!;
session.ringBufferBytes -= evicted.length;
}
// Alt-screen tracking. Scan for the canonical xterm enter/exit pairs.
// We do this on every append (not just on attach) so the state is
// correct even if many frames have flowed since the last attach.
const ascii = frame.toString('latin1'); // single-byte view is enough — the codes are 7-bit ASCII
// Use lastIndexOf so trailing state wins when both appear in one frame
// (e.g., a quick tool-call open+close inside one render pass).
const enterIdx = ascii.lastIndexOf('\x1b[?1049h');
const exitIdx = ascii.lastIndexOf('\x1b[?1049l');
if (enterIdx >= 0 && enterIdx > exitIdx) session.altScreenActive = true;
else if (exitIdx >= 0 && exitIdx > enterIdx) session.altScreenActive = false;
}
/**
* Build the re-attach replay payload: server-side reset prelude + the
* accumulated ring buffer. The client side writes RIS (`\x1bc`) to xterm
* BEFORE feeding this payload in, so the layout is:
*
* 1. Client: `\x1bc` (RIS full reset, clears pre-blip xterm content)
* 2. Server: `\x1b[!p` (DECSTR soft reset re-defaults char attributes)
* 3. Server: optional `\x1b[?1049h` if we were in alt-screen at detach
* 4. Server: ring buffer contents, in append order
*
* The client coordinates the order by waiting for a `{type:"reattach-begin"}`
* text frame before treating the next binary frame as replay. That separation
* is what lets us prepend reset codes without clobbering the live stream
* that resumes immediately after.
*/
export function buildReplayPayload(session: PtySession): Buffer {
const parts: Buffer[] = [];
parts.push(Buffer.from('\x1b[!p'));
if (session.altScreenActive) parts.push(Buffer.from('\x1b[?1049h'));
for (const frame of session.ringBuffer) parts.push(frame);
return Buffer.concat(parts);
}
const sessions = new WeakMap<any, PtySession>(); // ws -> session
@@ -201,6 +374,118 @@ function disposeSession(session: PtySession): void {
*
* Everything else returns 404. The listener binds 127.0.0.1 only.
*/
/**
* Validate a loopback /internal/* request. Returns null when the request
* is allowed; otherwise returns the Response to send back. Centralizes
* bearer auth + the v1.44 X-Browse-Gen generation check so adding a new
* /internal/* route is a one-liner.
*/
function checkInternalAuth(req: Request): Response | null {
const auth = req.headers.get('authorization');
if (auth !== `Bearer ${INTERNAL_TOKEN}`) {
return new Response('forbidden', { status: 403 });
}
const headerGen = req.headers.get('x-browse-gen');
if (headerGen && headerGen !== CURRENT_GEN) {
return new Response('stale generation', { status: 409 });
}
return null;
}
/**
* Wrap a JSON-bodied /internal/* handler with the standard bearer-auth +
* generation-check + json-parse + error-response boilerplate. The handler
* `fn` is called with the parsed body; whatever it returns is JSON-stringified
* into a 200 Response, or the handler can return a Response directly to
* customize status / headers. Throwing from `fn` collapses to a 400 "bad".
*
* Centralizing the dance kills the copy-paste pattern of bearer + gen check
* + req.json().then(...).catch(...) that every /internal/* route needs.
* New routes become a single call to internalHandler.
*/
async function internalHandler<T>(
req: Request,
fn: (body: any) => T | Promise<T> | Response | Promise<Response>,
): Promise<Response> {
const denied = checkInternalAuth(req);
if (denied) return denied;
let body: any;
try {
body = await req.json();
} catch {
return new Response('bad', { status: 400 });
}
try {
const result = await fn(body);
if (result instanceof Response) return result;
if (result === undefined || result === null) return new Response('ok');
return new Response(JSON.stringify(result), {
status: 200,
headers: { 'Content-Type': 'application/json' },
});
} catch {
return new Response('bad', { status: 400 });
}
}
/**
* Spawn the claude PTY for a session if it hasn't been spawned yet.
* Used by both the legacy binary-frame spawn trigger and the v1.44 explicit
* `{type:"start"}` text-frame trigger. Idempotent on `session.spawned`.
*
* Returns true if claude is now running, false if spawn failed (e.g. claude
* binary not on PATH). On failure, the caller is expected to have already
* surfaced the error to the client (or will via the next frame).
*/
function maybeSpawnPty(ws: any, session: PtySession): boolean {
if (session.spawned) return true;
session.spawned = true;
let leftover = Buffer.alloc(0);
const proc = spawnClaude(session.cols, session.rows, (chunk) => {
const combined = Buffer.concat([leftover, Buffer.from(chunk)]);
// UTF-8 boundary detection (issue #1272). Look back at most 3 bytes
// for the start of an incomplete multibyte sequence and defer it.
let safeEnd = combined.length;
for (let i = combined.length - 1; i >= Math.max(0, combined.length - 3); i--) {
const b = combined[i];
if ((b & 0x80) === 0) { safeEnd = i + 1; break; }
if ((b & 0xC0) === 0x80) continue;
const expected = (b & 0xE0) === 0xC0 ? 2 : (b & 0xF0) === 0xE0 ? 3 : 4;
safeEnd = (combined.length - i >= expected) ? combined.length : i;
break;
}
const flush = combined.slice(0, safeEnd);
leftover = combined.slice(safeEnd);
if (flush.length) {
// Always record into the ring buffer (Commit 3) so re-attach can
// replay. session.liveWs is what changes across re-attaches — we
// close over `session`, not the original `ws`, so the write always
// goes to whichever ws is currently attached (or is skipped when
// detached and liveWs is null).
appendToRingBuffer(session, flush);
if (session.liveWs) {
try { session.liveWs.sendBinary(flush); } catch {}
}
}
});
if (!proc) {
try {
ws.send(JSON.stringify({
type: 'error',
code: 'CLAUDE_NOT_FOUND',
message: 'claude CLI not on PATH. Install: https://docs.anthropic.com/en/docs/claude-code',
}));
ws.close(4404, 'claude not found');
} catch {}
return false;
}
session.proc = proc;
proc.exited?.then?.(() => {
try { session.liveWs?.close(1000, 'pty exited'); } catch {}
});
return true;
}
function buildServer() {
return Bun.serve({
hostname: '127.0.0.1',
@@ -211,29 +496,66 @@ function buildServer() {
const url = new URL(req.url);
// /internal/grant — loopback-only handshake from parent server.
// v1.44+: accepts `{token, sessionId?}`. The sessionId binding lets
// the agent route re-attach attempts (same sessionId, fresh token)
// back to the same PtySession. Legacy callers passing just `{token}`
// still work — sessionId becomes null and re-attach is unavailable
// for that grant.
if (url.pathname === '/internal/grant' && req.method === 'POST') {
const auth = req.headers.get('authorization');
if (auth !== `Bearer ${INTERNAL_TOKEN}`) {
return new Response('forbidden', { status: 403 });
}
return req.json().then((body: any) => {
return internalHandler(req, (body) => {
if (typeof body?.token === 'string' && body.token.length > 16) {
validTokens.add(body.token);
const sid = typeof body?.sessionId === 'string' && body.sessionId.length > 0
? body.sessionId
: null;
validTokens.set(body.token, sid);
}
return new Response('ok');
}).catch(() => new Response('bad', { status: 400 }));
});
}
// /internal/revoke — drop a token (called on WS close or bootstrap reload)
if (url.pathname === '/internal/revoke' && req.method === 'POST') {
const auth = req.headers.get('authorization');
if (auth !== `Bearer ${INTERNAL_TOKEN}`) {
return new Response('forbidden', { status: 403 });
}
return req.json().then((body: any) => {
return internalHandler(req, (body) => {
if (typeof body?.token === 'string') validTokens.delete(body.token);
return new Response('ok');
}).catch(() => new Response('bad', { status: 400 }));
});
}
// /internal/restart — dispose the PtySession for a specific sessionId.
// Scoped to one caller (not enumerate-all). Server.ts /pty-restart
// posts here with the caller's sessionId; we kill ONLY that PTY,
// leaving any other live sidebar tabs untouched. Codex T2 of the
// eng review caught this gap — pre-spec the route would have
// disposed all sessions.
if (url.pathname === '/internal/restart' && req.method === 'POST') {
return internalHandler(req, (body) => {
const sid = typeof body?.sessionId === 'string' ? body.sessionId : null;
if (!sid) return { killed: 0 };
const session = sessionsById.get(sid);
if (!session) return { killed: 0 };
// Cancel any pending detach timer before disposal — otherwise it
// would fire later against an already-disposed session.
if (session.detachTimer) {
clearTimeout(session.detachTimer);
session.detachTimer = null;
}
disposeSession(session);
sessionsById.delete(sid);
return { killed: 1 };
});
}
// /internal/healthz — liveness probe used by the v1.44 watchdog.
// Returns this agent's pid + gen + active session count without
// touching claude binary lookup (which can fail for non-process
// reasons and isn't a useful liveness signal). GET — no body to parse,
// so it stays on the bare checkInternalAuth gate.
if (url.pathname === '/internal/healthz' && req.method === 'GET') {
const denied = checkInternalAuth(req);
if (denied) return denied;
return new Response(JSON.stringify({
pid: process.pid,
gen: CURRENT_GEN,
sessions: validTokens.size,
}), { status: 200, headers: { 'Content-Type': 'application/json' } });
}
// /claude-available — bootstrap card hits this when user clicks "I installed it".
@@ -305,8 +627,13 @@ function buildServer() {
return new Response('unauthorized', { status: 401 });
}
// v1.44+: surface the token's sessionId binding to the upgraded ws.
// open() reads it via ws.data and registers the session in
// sessionsById so /internal/restart and (Commit 3) re-attach
// lookups can find it.
const sessionId = validTokens.get(token) ?? null;
const upgraded = server.upgrade(req, {
data: { cookie: token },
data: { cookie: token, sessionId },
// Echo the protocol back so the browser accepts the upgrade.
// Required when the client sends Sec-WebSocket-Protocol — the
// server MUST select one of the offered protocols, otherwise
@@ -320,22 +647,105 @@ function buildServer() {
},
websocket: {
/**
* Spawn the claude PTY for `session` if it hasn't been spawned yet.
* Called from both message paths: the legacy binary-frame trigger
* (any keystroke) AND the v1.44 explicit `{type:"start"}` trigger
* (forceRestart sends this on every fresh WS to get an eager prompt
* without requiring the user to type). Idempotent a second call
* after `spawned: true` is a no-op.
*/
open(ws) {
const sessionId = (ws.data as any)?.sessionId ?? null;
const cookie = (ws.data as any)?.cookie || '';
// Commit 3 re-attach: if this sessionId already has a detached
// PtySession in sessionsById, REPLACE its liveWs ref and replay
// the ring buffer. The PTY process is unchanged — claude keeps
// running through the wifi blip / panel-suspend cycle.
if (sessionId) {
const existing = sessionsById.get(sessionId);
if (existing) {
if (existing.detachTimer) {
clearTimeout(existing.detachTimer);
existing.detachTimer = null;
}
existing.detached = false;
existing.liveWs = ws;
existing.cookie = cookie;
// Re-bind the WS-keyed map so resize/close/message handlers
// can still find this session via the new ws.
sessions.set(ws, existing);
// Restart keepalive on the new ws.
if (existing.pingInterval) clearInterval(existing.pingInterval);
existing.pingInterval = setInterval(() => {
try { ws.send(JSON.stringify({ type: 'ping', ts: Date.now() })); } catch {}
}, KEEPALIVE_INTERVAL_MS);
// Tell the client to prep its xterm (write RIS) before the
// replay binary arrives. Order matters — the binary frame
// immediately after this text frame IS the replay.
try { ws.send(JSON.stringify({ type: 'reattach-begin', sessionId })); } catch {}
try { ws.sendBinary(buildReplayPayload(existing)); } catch {}
return;
}
}
const session: PtySession = {
proc: null,
cols: 80,
rows: 24,
cookie,
liveWs: ws,
sessionId,
spawned: false,
pingInterval: null,
ringBuffer: [],
ringBufferBytes: 0,
altScreenActive: false,
detached: false,
detachTimer: null,
};
session.pingInterval = setInterval(() => {
try {
ws.send(JSON.stringify({ type: 'ping', ts: Date.now() }));
} catch {
// ws likely closed mid-tick; close handler clears the interval.
}
}, KEEPALIVE_INTERVAL_MS);
sessions.set(ws, session);
// Index by sessionId for /internal/restart + Commit 3 re-attach.
if (sessionId) sessionsById.set(sessionId, session);
},
message(ws, raw) {
let session = sessions.get(ws);
if (!session) {
// Fallback for any path where open() didn't fire (shouldn't happen
// in Bun.serve but keeps the spawn path safe). No keepalive on
// this branch — open() is the supported entry point.
session = {
proc: null,
cols: 80,
rows: 24,
cookie: (ws.data as any)?.cookie || '',
liveWs: ws,
sessionId: (ws.data as any)?.sessionId ?? null,
spawned: false,
pingInterval: null,
ringBuffer: [],
ringBufferBytes: 0,
altScreenActive: false,
detached: false,
detachTimer: null,
};
sessions.set(ws, session);
if (session.sessionId) sessionsById.set(session.sessionId, session);
}
// Text frames are control messages: {type: "resize", cols, rows} or
// {type: "tabSwitch", tabId, url, title}. Binary frames are raw input
// bytes destined for the PTY stdin.
// Text frames are control messages: {type: "resize", cols, rows},
// {type: "tabSwitch", tabId, url, title}, {type: "tabState", ...},
// or v1.44 keepalive frames: {type: "pong", ts}, {type: "keepalive"}.
// Binary frames are raw input bytes destined for the PTY stdin.
if (typeof raw === 'string') {
let msg: any;
try { msg = JSON.parse(raw); } catch { return; }
@@ -355,50 +765,32 @@ function buildServer() {
handleTabState(msg);
return;
}
if (msg?.type === 'pong' || msg?.type === 'keepalive' || msg?.type === 'ping') {
// Keepalive frames — accepted and silently dropped. The mere
// fact that the WS carried this frame is the liveness signal;
// there's no application-level state to update at this layer.
// `ping` is acknowledged here too in case the client (or a
// future agent peer) mirrors our server-side ping shape.
return;
}
if (msg?.type === 'start') {
// v1.44 explicit spawn trigger. forceRestart sends this
// immediately on every fresh WS so claude boots without the
// user having to type a keystroke (pre-v1.44, the lazy-binary
// spawn made restart look stuck until the user typed). No-op
// if already spawned.
maybeSpawnPty(ws, session);
return;
}
// Unknown text frame — ignore.
return;
}
// Binary input. Lazy-spawn claude on the first byte.
// Binary input. Lazy-spawn claude on the first byte if `start`
// wasn't sent first. Both paths land in the same maybeSpawnPty
// helper for behavior parity.
if (!session.spawned) {
session.spawned = true;
// UTF-8 boundary detection to prevent splitting multi-byte characters (issue #1272).
// Buffer incomplete UTF-8 sequences until the next chunk completes them.
let leftover = Buffer.alloc(0);
const proc = spawnClaude(session.cols, session.rows, (chunk) => {
const combined = Buffer.concat([leftover, Buffer.from(chunk)]);
// Find the last index where a UTF-8 codepoint ends. Look back at most 3 bytes.
let safeEnd = combined.length;
for (let i = combined.length - 1; i >= Math.max(0, combined.length - 3); i--) {
const b = combined[i];
if ((b & 0x80) === 0) { safeEnd = i + 1; break; } // ASCII
if ((b & 0xC0) === 0x80) continue; // continuation byte
const expected = (b & 0xE0) === 0xC0 ? 2 : (b & 0xF0) === 0xE0 ? 3 : 4;
safeEnd = (combined.length - i >= expected) ? combined.length : i;
break;
}
const flush = combined.slice(0, safeEnd);
leftover = combined.slice(safeEnd);
if (flush.length) {
try { ws.sendBinary(flush); } catch {}
}
});
if (!proc) {
try {
ws.send(JSON.stringify({
type: 'error',
code: 'CLAUDE_NOT_FOUND',
message: 'claude CLI not on PATH. Install: https://docs.anthropic.com/en/docs/claude-code',
}));
ws.close(4404, 'claude not found');
} catch {}
return;
}
session.proc = proc;
// Watch for child exit so the WS closes cleanly when claude exits.
proc.exited?.then?.(() => {
try { ws.close(1000, 'pty exited'); } catch {}
});
if (!maybeSpawnPty(ws, session)) return;
}
try {
// raw is a Uint8Array; Bun.Terminal.write accepts string|Buffer.
@@ -409,16 +801,49 @@ function buildServer() {
}
},
close(ws) {
close(ws, code, _reason) {
const session = sessions.get(ws);
if (session) {
disposeSession(session);
if (session.cookie) {
// Drop the cookie so it can't be replayed against a new PTY.
validTokens.delete(session.cookie);
}
sessions.delete(ws);
if (!session) return;
// Always drop the WS-keyed map entry and the per-attach
// attachToken — the attach grant was single-use.
sessions.delete(ws);
if (session.cookie) validTokens.delete(session.cookie);
// Keepalive lives with the WS — every attach starts a fresh one.
if (session.pingInterval) {
clearInterval(session.pingInterval);
session.pingInterval = null;
}
// Commit 3 detach state machine. If the close was intentional
// (code 4001 = restart, 4404 = no-claude error), dispose
// immediately — there's no value in keeping the PTY alive.
// Otherwise enter the detach window: claude keeps running, the
// ring buffer keeps accumulating, and a re-attach with the same
// sessionId within DETACH_WINDOW_MS picks back up. If the timer
// fires without a re-attach, the session is disposed normally.
//
// Sessions without a sessionId (legacy single-shot grants) can't
// re-attach by definition — fall through to immediate dispose.
const intentional = code === 4001 || code === 4404 || code === 1000;
if (intentional || !session.sessionId) {
disposeSession(session);
if (session.sessionId) sessionsById.delete(session.sessionId);
return;
}
// Mark detached and start the disposal timer. The session stays
// in sessionsById so the next /ws upgrade with the same
// sessionId can find and reattach to it.
session.detached = true;
session.liveWs = null;
session.detachTimer = setTimeout(() => {
if (!session.detached) return; // re-attached in the meantime
disposeSession(session);
if (session.sessionId) sessionsById.delete(session.sessionId);
}, DETACH_WINDOW_MS);
// setTimeout returns a Bun Timer; unref so the detach window
// doesn't keep the process alive past natural shutdown.
(session.detachTimer as any)?.unref?.();
},
},
});
@@ -548,14 +973,25 @@ function main() {
writeSecureFile(tmp, String(port));
fs.renameSync(tmp, PORT_FILE);
// Write identity-based agent record (pid + per-boot gen). Replaces the
// v1.43- `pkill -f terminal-agent\.ts` regex teardown that could kill
// sibling gstack sessions. Callers (cli.ts spawn site, server.ts
// shutdown, the v1.44 watchdog) now route through killAgentByRecord in
// terminal-agent-control.ts.
writeAgentRecord(dir, { pid: process.pid, gen: CURRENT_GEN, startedAt: Date.now() });
// Hand the parent the internal token so it can call /internal/grant.
// Parent learns INTERNAL_TOKEN via env (TERMINAL_AGENT_INTERNAL_TOKEN below).
// We just print it on stdout for the supervising process to pick up if it's
// not already in env. Defense against env races at spawn time.
console.log(`[terminal-agent] listening on 127.0.0.1:${port} pid=${process.pid}`);
console.log(`[terminal-agent] listening on 127.0.0.1:${port} pid=${process.pid} gen=${CURRENT_GEN}`);
// Cleanup port file on exit.
const cleanup = () => { safeUnlink(PORT_FILE); process.exit(0); };
// Cleanup port file + agent record on exit.
const cleanup = () => {
safeUnlink(PORT_FILE);
clearAgentRecord(dir);
process.exit(0);
};
process.on('SIGTERM', cleanup);
process.on('SIGINT', cleanup);
}
+10 -3
View File
@@ -11,12 +11,14 @@ import { findInstalledBrowsers, importCookies, importCookiesViaCdp, hasV20Cookie
import { generatePickerCode } from './cookie-picker-routes';
import { validateNavigationUrl } from './url-validation';
import { validateOutputPath, validateReadPath } from './path-security';
import { guardScreenshotPath } from './screenshot-size-guard';
import * as fs from 'fs';
import * as path from 'path';
import type { SetContentWaitUntil } from './tab-session';
import { TEMP_DIR, isPathWithin } from './platform';
import { SAFE_DIRECTORIES } from './path-security';
import { modifyStyle, undoModification, resetModifications, getModificationHistory } from './cdp-inspector';
import { withCdpSession } from './cdp-bridge';
/**
* Aggressive page cleanup selectors and heuristics.
@@ -1123,6 +1125,10 @@ export async function handleWriteCommand(
// Take screenshot
await page.screenshot({ path: outputPath, fullPage: !scrollTo });
// Guard against Anthropic vision API >2000px brick (#1214). Only
// applies to fullPage captures; scrollTo viewport-bound shots are
// already capped by the viewport size.
if (!scrollTo) await guardScreenshotPath(outputPath);
// Restore viewport
if (viewportWidth && originalViewport) {
@@ -1404,9 +1410,10 @@ export async function handleWriteCommand(
validateOutputPath(outputPath);
try {
const cdp = await page.context().newCDPSession(page);
const { data } = await cdp.send('Page.captureSnapshot', { format: 'mhtml' });
await cdp.detach();
const data = await withCdpSession(page, async (cdp) => {
const result = await cdp.send('Page.captureSnapshot', { format: 'mhtml' });
return (result as { data: string }).data;
});
fs.writeFileSync(outputPath, data);
return `Archive saved: ${outputPath} (${Math.round(data.length / 1024)}KB, MHTML)`;
} catch (err: any) {
+64
View File
@@ -29,17 +29,20 @@ describe('shouldEnableChromiumSandbox', () => {
const origPlatform = process.platform;
const origCI = process.env.CI;
const origContainer = process.env.CONTAINER;
const origNoSandbox = process.env.GSTACK_CHROMIUM_NO_SANDBOX;
const origGetuid = process.getuid;
beforeEach(() => {
delete process.env.CI;
delete process.env.CONTAINER;
delete process.env.GSTACK_CHROMIUM_NO_SANDBOX;
});
afterEach(() => {
Object.defineProperty(process, 'platform', { value: origPlatform });
if (origCI === undefined) delete process.env.CI; else process.env.CI = origCI;
if (origContainer === undefined) delete process.env.CONTAINER; else process.env.CONTAINER = origContainer;
if (origNoSandbox === undefined) delete process.env.GSTACK_CHROMIUM_NO_SANDBOX; else process.env.GSTACK_CHROMIUM_NO_SANDBOX = origNoSandbox;
process.getuid = origGetuid;
});
@@ -90,6 +93,31 @@ describe('shouldEnableChromiumSandbox', () => {
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
expect(shouldEnableChromiumSandbox()).toBe(false);
});
// #1562 — Ubuntu/AppArmor opt-in override
it('linux + GSTACK_CHROMIUM_NO_SANDBOX=1 → false (Ubuntu/AppArmor opt-out)', async () => {
setPlatform('linux');
process.env.GSTACK_CHROMIUM_NO_SANDBOX = '1';
process.getuid = (() => 1000) as typeof process.getuid;
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
expect(shouldEnableChromiumSandbox()).toBe(false);
});
it('darwin + GSTACK_CHROMIUM_NO_SANDBOX=1 → false (env override wins on any platform)', async () => {
setPlatform('darwin');
process.env.GSTACK_CHROMIUM_NO_SANDBOX = '1';
process.getuid = (() => 501) as typeof process.getuid;
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
expect(shouldEnableChromiumSandbox()).toBe(false);
});
it('GSTACK_CHROMIUM_NO_SANDBOX=0 → does NOT trigger override (must be exactly "1")', async () => {
setPlatform('linux');
process.env.GSTACK_CHROMIUM_NO_SANDBOX = '0';
process.getuid = (() => 1000) as typeof process.getuid;
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
expect(shouldEnableChromiumSandbox()).toBe(true);
});
});
// ─── resolveDisconnectCause ──────────────────────────────────────
@@ -163,3 +191,39 @@ describe('resolveDisconnectCause', () => {
expect(await resolveDisconnectCause(null)).toBe('crash');
});
});
// ─── onDisconnect exit-code propagation (regression test) ──────────
//
// The contract: BrowserManager.onDisconnect is called with the resolved
// exit code (0 for clean Cmd+Q, 2 for crash). server.ts then forwards
// that code to activeShutdown(), which exits the process.
//
// Without this propagation, the headed-mode user-visible Cmd+Q respawn
// bug returns: server.ts hardcoded `activeShutdown?.(2)` ignores the
// resolved 0 and gbrowser's gbd HealthMonitor treats the clean quit as
// a crash, restarting the window.
describe('BrowserManager.onDisconnect exit-code propagation', () => {
it('signature accepts an optional exitCode argument', async () => {
const { BrowserManager } = await import('../src/browser-manager');
const bm = new BrowserManager();
const calls: Array<number | undefined> = [];
bm.onDisconnect = (code?: number) => { calls.push(code); };
bm.onDisconnect(0);
bm.onDisconnect(2);
bm.onDisconnect(undefined);
expect(calls).toEqual([0, 2, undefined]);
});
it('server.ts callback forwards exitCode when provided, falls back to 2', async () => {
// Mirror the production wiring in browse/src/server.ts so a refactor
// that drops the forward (e.g. reverting to `() => activeShutdown?.(2)`)
// fails CI before the user-visible bug returns.
const shutdownCalls: number[] = [];
const activeShutdown = (code: number) => { shutdownCalls.push(code); };
const onDisconnect = (code?: number) => activeShutdown(code ?? 2);
onDisconnect(0);
onDisconnect(2);
onDisconnect(undefined);
expect(shutdownCalls).toEqual([0, 2, 2]);
});
});
+26 -3
View File
@@ -178,7 +178,17 @@ describe('buildSpawnEnv', () => {
process.env.LANG = 'en_US.UTF-8';
});
afterEach(() => {
process.env = origEnv;
// process.env = origEnv replaces only the reference; the underlying
// env stays mutated and leaks to later test files in the same Bun
// process (e.g., breaks Bun.which('bash') in security.test.ts and
// bun-spawn in pair-agent-tunnel-eval.test.ts). Delete every current
// key then re-assign from the snapshot — restores the actual env.
for (const k of Object.keys(process.env)) {
if (!(k in origEnv)) delete process.env[k];
}
for (const [k, v] of Object.entries(origEnv)) {
if (v !== undefined) process.env[k] = v;
}
});
it('untrusted: drops $HOME and secrets', () => {
@@ -293,7 +303,15 @@ describe.skipIf(SKIP_SPAWN)('spawnSkill: lifecycle', () => {
expect(parsed.gh).toBeNull();
expect(parsed.gstack).toBeNull();
} finally {
process.env = origEnv;
// See afterEach comment in `buildSpawnEnv` describe — direct
// reassignment of process.env doesn't actually restore the
// underlying env in Bun. Delete + re-assign instead.
for (const k of Object.keys(process.env)) {
if (!(k in origEnv)) delete process.env[k];
}
for (const [k, v] of Object.entries(origEnv)) {
if (v !== undefined) process.env[k] = v;
}
}
});
@@ -312,7 +330,12 @@ describe.skipIf(SKIP_SPAWN)('spawnSkill: lifecycle', () => {
const parsed = JSON.parse(result.stdout);
expect(parsed.home).toBe('/Users/test-user');
} finally {
process.env = origEnv;
for (const k of Object.keys(process.env)) {
if (!(k in origEnv)) delete process.env[k];
}
for (const [k, v] of Object.entries(origEnv)) {
if (v !== undefined) process.env[k] = v;
}
}
});
@@ -0,0 +1,95 @@
import { describe, test, expect, beforeEach } from 'bun:test';
import type { Page } from 'playwright';
import {
__testInternals,
undoModification,
} from '../src/cdp-inspector';
// Regression tests for the modificationHistory cap (D6 / smoking gun #2).
// Pre-cap, the module-scoped array grew unbounded across the session. Cap is
// 200 entries, oldest evicted on push past the cap. undoModification reports
// "evicted at the cap" in the error message so a user who asks for a
// no-longer-available index understands what happened (instead of seeing the
// pre-cap "No modification at index 500" with no context).
const { pushModification, MOD_HISTORY_CAP, getRawHistory, getTotalPushed, resetForTest } = __testInternals;
function fakeMod(id: number) {
return {
selector: `#node-${id}`,
property: 'color',
oldValue: 'red',
newValue: 'blue',
source: 'inline' as const,
timestamp: id,
method: 'setProperty' as 'setProperty',
};
}
beforeEach(() => {
resetForTest();
});
describe('modificationHistory cap', () => {
test('1. push under cap keeps every entry', () => {
for (let i = 0; i < 50; i++) pushModification(fakeMod(i));
expect(getRawHistory().length).toBe(50);
expect(getTotalPushed()).toBe(50);
expect(getRawHistory()[0].timestamp).toBe(0);
expect(getRawHistory()[49].timestamp).toBe(49);
});
test('2. push exactly cap keeps every entry', () => {
for (let i = 0; i < MOD_HISTORY_CAP; i++) pushModification(fakeMod(i));
expect(getRawHistory().length).toBe(MOD_HISTORY_CAP);
expect(getTotalPushed()).toBe(MOD_HISTORY_CAP);
expect(getRawHistory()[0].timestamp).toBe(0);
});
test('3. push past cap evicts oldest, keeps length at cap', () => {
const total = MOD_HISTORY_CAP + 50;
for (let i = 0; i < total; i++) pushModification(fakeMod(i));
expect(getRawHistory().length).toBe(MOD_HISTORY_CAP);
expect(getTotalPushed()).toBe(total);
// Oldest 50 dropped — entry that was #0 is gone; new oldest is #50.
expect(getRawHistory()[0].timestamp).toBe(50);
expect(getRawHistory()[MOD_HISTORY_CAP - 1].timestamp).toBe(total - 1);
});
test('4. resetForTest clears both buffer and totalPushed', () => {
for (let i = 0; i < 10; i++) pushModification(fakeMod(i));
resetForTest();
expect(getRawHistory().length).toBe(0);
expect(getTotalPushed()).toBe(0);
});
});
describe('undoModification eviction-aware error', () => {
// Stub Page: undoModification throws before any await when idx is out of
// range, so the stub never actually gets called.
const stubPage = {} as unknown as Page;
test('5. out-of-range BEFORE any eviction → no evicted note', async () => {
for (let i = 0; i < 5; i++) pushModification(fakeMod(i));
await expect(undoModification(stubPage, 99)).rejects.toThrow(
'No modification at index 99. History has 5 entries.',
);
});
test('6. out-of-range AFTER eviction → message names the evicted count', async () => {
const total = MOD_HISTORY_CAP + 73;
for (let i = 0; i < total; i++) pushModification(fakeMod(i));
// 273 pushed, 200 in buffer, 73 evicted. Ask for idx=400 (above buffer).
await expect(undoModification(stubPage, 400)).rejects.toThrow(
`No modification at index 400. History has ${MOD_HISTORY_CAP} entries ` +
`(most recent ${MOD_HISTORY_CAP} only — 73 earlier entries evicted at the cap).`,
);
});
test('7. negative explicit index throws cleanly (no NaN propagation)', async () => {
for (let i = 0; i < 10; i++) pushModification(fakeMod(i));
await expect(undoModification(stubPage, -1)).rejects.toThrow(
'No modification at index -1.',
);
});
});
+171
View File
@@ -0,0 +1,171 @@
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import type { Page } from 'playwright';
import { withCdpSession, getOrCreateCdpSession } from '../src/cdp-bridge';
// Static-grep tripwire + behavior tests for the CDP session lifecycle
// helpers introduced as part of the D11 EXPAND_SCOPE memory-leak fix.
//
// Direct calls to `page.context().newCDPSession(page)` are the leak class
// the helpers exist to close — every direct call needs a matching
// `session.detach()` and forgetting it leaves the Chromium-side target
// attached until the underlying transport drops. The tripwire fails CI
// if any source file calls `newCDPSession(` outside `cdp-bridge.ts`
// (the file that owns the helpers).
//
// Pattern mirrors browse/test/terminal-agent-pid-identity.test.ts and
// browse/test/server-sanitize-surrogates.test.ts: read source files
// directly, assert an invariant on their contents.
const SRC_DIR = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src');
function readAllSourceFiles(): Array<{ file: string; content: string }> {
const out: Array<{ file: string; content: string }> = [];
for (const entry of fs.readdirSync(SRC_DIR)) {
if (!entry.endsWith('.ts')) continue;
const full = path.join(SRC_DIR, entry);
out.push({ file: entry, content: fs.readFileSync(full, 'utf-8') });
}
return out;
}
describe('CDP session cleanup invariant', () => {
test('1. no source file calls `newCDPSession(` outside cdp-bridge.ts', () => {
const offenders: Array<{ file: string; line: number; text: string }> = [];
for (const { file, content } of readAllSourceFiles()) {
// The helper file is the ONE allowed home for direct newCDPSession calls.
if (file === 'cdp-bridge.ts') continue;
const lines = content.split('\n');
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (!/newCDPSession\s*\(/.test(line)) continue;
// Skip comment lines — documentation mentions are fine.
const trimmed = line.trim();
if (trimmed.startsWith('//') || trimmed.startsWith('*')) continue;
offenders.push({ file, line: i + 1, text: trimmed });
}
}
if (offenders.length > 0) {
const formatted = offenders
.map((o) => ` ${o.file}:${o.line} ${o.text}`)
.join('\n');
throw new Error(
`Direct newCDPSession(...) calls found outside cdp-bridge.ts. ` +
`Route through withCdpSession() (one-shot, finally-detach) or ` +
`getOrCreateCdpSession() (cached, close-detach) instead:\n${formatted}`,
);
}
expect(offenders).toEqual([]);
});
test('2. helper file exports the two documented entry points', () => {
// Sanity: the tripwire is meaningless if the helpers themselves are gone.
expect(typeof withCdpSession).toBe('function');
expect(typeof getOrCreateCdpSession).toBe('function');
});
});
describe('withCdpSession finally-detach', () => {
// Fake Page surface for unit-testing the helper without spinning up a real
// browser. The helper only touches page.context().newCDPSession(page) and
// the returned session's .detach(), so this surface is enough.
function makeFakePage(detachSpy: { called: number; rejected?: Error }) {
const session = {
detach: async () => {
detachSpy.called++;
if (detachSpy.rejected) throw detachSpy.rejected;
},
};
return {
context: () => ({
newCDPSession: async (_p: unknown) => session,
}),
} as unknown as Page;
}
test('3. detaches on the success path', async () => {
const detachSpy = { called: 0 };
const page = makeFakePage(detachSpy);
const result = await withCdpSession(page, async (session) => {
expect(session).toBeDefined();
return 42;
});
expect(result).toBe(42);
expect(detachSpy.called).toBe(1);
});
test('4. detaches even when fn throws (the actual leak fix)', async () => {
const detachSpy = { called: 0 };
const page = makeFakePage(detachSpy);
await expect(
withCdpSession(page, async () => {
throw new Error('boom');
}),
).rejects.toThrow('boom');
expect(detachSpy.called).toBe(1);
});
test('5. swallows detach errors so they do not mask fn errors', async () => {
const detachSpy = { called: 0, rejected: new Error('already detached') };
const page = makeFakePage(detachSpy);
await expect(
withCdpSession(page, async () => {
throw new Error('original');
}),
).rejects.toThrow('original');
expect(detachSpy.called).toBe(1);
});
test('6. swallows detach errors on the success path too', async () => {
const detachSpy = { called: 0, rejected: new Error('target closed') };
const page = makeFakePage(detachSpy);
const result = await withCdpSession(page, async () => 'ok');
expect(result).toBe('ok');
expect(detachSpy.called).toBe(1);
});
});
describe('getOrCreateCdpSession close-detach', () => {
function makeFakePage() {
const closeListeners: Array<() => void> = [];
const session = {
detach: async () => {
session._detachCount++;
},
_detachCount: 0,
};
const page = {
context: () => ({
newCDPSession: async (_p: unknown) => session,
}),
once: (event: string, fn: () => void) => {
if (event === 'close') closeListeners.push(fn);
},
_fireClose: () => {
for (const fn of closeListeners) fn();
},
};
return { page: page as unknown as Page, session, fireClose: page._fireClose };
}
test('7. caches the session across calls', async () => {
const { page } = makeFakePage();
const cache = new WeakMap<Page, any>();
const s1 = await getOrCreateCdpSession(page, cache);
const s2 = await getOrCreateCdpSession(page, cache);
expect(s1).toBe(s2);
});
test('8. close hook detaches the session AND clears the cache', async () => {
const { page, session, fireClose } = makeFakePage();
const cache = new WeakMap<Page, any>();
await getOrCreateCdpSession(page, cache);
expect(cache.get(page)).toBeDefined();
fireClose();
// Detach runs synchronously up to the await in the close hook; let it settle.
await new Promise((r) => setTimeout(r, 0));
expect(cache.get(page)).toBeUndefined();
expect(session._detachCount).toBe(1);
});
});
+75
View File
@@ -0,0 +1,75 @@
/**
* Coverage for #1612 macOS/Linux server must survive sandboxed-shell
* harnesses by becoming its own session leader (setsid).
*
* Pre-#1612, Bun.spawn().unref() removed the child from Bun's event loop
* but did NOT call setsid(). When the CLI ran inside Claude Code's
* per-command sandbox, Conductor, or CI step runners, the session leader's
* exit sent SIGHUP to every PID in the session, killing the bun server.
*
* The fix routes macOS/Linux spawn through Node's child_process.spawn with
* detached:true, which calls setsid() so the server becomes its own session
* leader (PPID=1 on Linux, similar reparenting on Darwin).
*
* The actual setsid syscall is hard to assert in a unit test without a
* real spawn testing here is static: the cli.ts source must use the
* Node spawn path on macOS/Linux, with detached:true and .unref(). If a
* future refactor reverts to Bun.spawn().unref() on the macOS/Linux branch
* the regression returns and these tests fail.
*/
import { describe, expect, test } from "bun:test";
import * as fs from "node:fs";
import * as path from "node:path";
const ROOT = path.resolve(import.meta.dir, "..", "..");
const CLI = path.join(ROOT, "browse", "src", "cli.ts");
function read(): string {
return fs.readFileSync(CLI, "utf-8");
}
describe("#1612 macOS/Linux daemonize via Node setsid path", () => {
test("cli.ts imports nodeSpawn from child_process (Node spawn alias)", () => {
const body = read();
// The fix relies on Node's child_process.spawn (which calls setsid on
// detached:true), aliased to avoid name collision with Bun.spawn. Match
// either `nodeSpawn` or `spawn as nodeSpawn` to be flexible to the
// exact import style.
expect(body).toMatch(/(spawn as nodeSpawn|nodeSpawn\s*[,}])/);
expect(body).toMatch(/from\s+['"]child_process['"]/);
});
test("non-Windows branch uses nodeSpawn(...).unref() with detached:true", () => {
const body = read();
// Find the non-Windows branch and assert it uses the Node spawn alias
// with detached:true. Match the pattern `nodeSpawn(...) ... detached:true`.
expect(body).toMatch(/nodeSpawn\([\s\S]{0,500}detached:\s*true/);
expect(body).toMatch(/nodeSpawn\([\s\S]{0,500}\.unref\(\)/);
});
test("non-Windows branch comment documents setsid/SIGHUP root cause", () => {
const body = read();
// The comment block must mention setsid() so a future refactor sees the
// why before changing the spawn call.
expect(body).toMatch(/setsid/);
expect(body).toMatch(/SIGHUP/);
});
test("the spawn call on macOS/Linux is nodeSpawn, not Bun.spawn", () => {
const body = read();
// Strip line comments before regex matching, so the "Bun.spawn().unref()"
// mentions inside the explanatory comment don't trigger false positives.
const codeOnly = body
.split("\n")
.filter((line) => !line.trim().startsWith("//"))
.join("\n");
// Find the non-Windows branch. The `} else {` block following the
// Windows branch. We then require its first ~400 chars contain a
// nodeSpawn() call and NOT a Bun.spawn() call (excluding the comment).
const nonWindowsStart = codeOnly.indexOf("nodeSpawn('bun'");
expect(nonWindowsStart).toBeGreaterThan(-1);
const slice = codeOnly.slice(nonWindowsStart, nonWindowsStart + 400);
expect(slice).toMatch(/nodeSpawn\(/);
expect(slice).not.toMatch(/Bun\.spawn\(/);
});
});
+81
View File
@@ -0,0 +1,81 @@
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
// v1.44 outer supervisor — static-grep invariants.
//
// Pre-v1.44 `$B connect` was fire-and-forget: spawn server detached, CLI
// exits, server runs unsupervised. If the server crashed, the user had to
// re-run `$B connect`. The opt-in supervisor (--supervise or
// BROWSE_SUPERVISE=1) keeps the CLI attached and respawns the server on
// unexpected exit, with the same crash-loop guard shape as the v1.44
// terminal-agent watchdog.
//
// Live respawn tests belong in the e2e tier (real Bun.spawn cycles take
// 3-8s each). These tripwires defend the load-bearing invariants:
// opt-in by default, signal handlers wired, crash-loop guard, env knobs.
const CLI_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'cli.ts');
describe('CLI outer supervisor (v1.44+)', () => {
test('1. supervisor is opt-in via --supervise flag or BROWSE_SUPERVISE env', () => {
const src = fs.readFileSync(CLI_TS, 'utf-8');
expect(src).toContain("commandArgs.includes('--supervise')");
expect(src).toContain("process.env.BROWSE_SUPERVISE === '1'");
// Default path MUST still exit 0 promptly. The legacy contract is
// that every caller of `$B connect` (Claude Code Bash tool, scripts,
// CI) gets a prompt return.
expect(src).toMatch(/if \(!superviseRequested\) \{\s*process\.exit\(0\);\s*\}/);
});
test('2. SIGINT and SIGTERM trigger clean teardown', () => {
const src = fs.readFileSync(CLI_TS, 'utf-8');
// Both signals must hit the teardown path or the user's Ctrl-C leaves
// an orphaned server (worse than no supervisor).
expect(src).toMatch(/process\.on\('SIGINT'.*teardownAndExit/);
expect(src).toMatch(/process\.on\('SIGTERM'.*teardownAndExit/);
// Teardown must signal the supervised server before exiting itself.
expect(src).toContain("safeKill(state.pid, 'SIGTERM')");
});
test('3. crash-loop guard with 5-in-5min rolling window', () => {
const src = fs.readFileSync(CLI_TS, 'utf-8');
expect(src).toContain('SUPERVISOR_GUARD_WINDOW_MS = 5 * 60_000');
expect(src).toContain('SUPERVISOR_GUARD_MAX = 5');
// Window pruning: a long-lived daemon with sporadic crashes must NOT
// hit the guard (otherwise we punish the user for the supervisor doing
// its job).
expect(src).toMatch(/respawns\.shift\(\)/);
});
test('4. exponential backoff schedule, env-overridable', () => {
const src = fs.readFileSync(CLI_TS, 'utf-8');
expect(src).toContain('GSTACK_SUPERVISOR_BACKOFF');
// Default schedule must include short waits at first (rapid recovery
// from transient crashes) and cap at a sensible long wait.
expect(src).toContain('1000,2000,4000,8000,30000');
});
test('5. tick interval is env-overridable for tests', () => {
const src = fs.readFileSync(CLI_TS, 'utf-8');
expect(src).toContain('GSTACK_SUPERVISOR_TICK_MS');
});
test('6. respawned server gets a fresh terminal-agent too', () => {
const src = fs.readFileSync(CLI_TS, 'utf-8');
// After server respawn, the terminal-agent state is stale (old PID
// record points to a dead agent that exited with its parent). The
// supervisor must re-call spawnTerminalAgent or the PTY path stays
// broken even though the server is back up.
const block = sliceBetween(src, 'Supervisor mode:', '// ─── Headed Disconnect');
expect(block).toContain('spawnTerminalAgent({');
});
});
function sliceBetween(source: string, start: string, end: string): string {
const i = source.indexOf(start);
if (i === -1) throw new Error(`marker not found: ${start}`);
const j = source.indexOf(end, i + start.length);
if (j === -1) throw new Error(`end marker not found: ${end}`);
return source.slice(i, j);
}
+156 -1
View File
@@ -9,7 +9,7 @@ import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import { startTestServer } from './test-server';
import { BrowserManager } from '../src/browser-manager';
import { resolveServerScript } from '../src/cli';
import { handleReadCommand as _handleReadCommand } from '../src/read-commands';
import { handleReadCommand as _handleReadCommand, parseOutArgs, hasOutArg, resultToString } from '../src/read-commands';
import { handleWriteCommand as _handleWriteCommand } from '../src/write-commands';
import { handleMetaCommand } from '../src/meta-commands';
import { consoleBuffer, networkBuffer, dialogBuffer, addConsoleEntry, addNetworkEntry, addDialogEntry, CircularBuffer } from '../src/buffers';
@@ -23,6 +23,65 @@ const handleReadCommand = (cmd: string, args: string[], b: BrowserManager) =>
const handleWriteCommand = (cmd: string, args: string[], b: BrowserManager) =>
_handleWriteCommand(cmd, args, b.getActiveSession(), b);
// ─── Pure arg-parser + result-conversion unit tests (no browser) ───
describe('parseOutArgs / hasOutArg', () => {
test('--out <path> splits the flag from the positional', () => {
expect(parseOutArgs(['expr', '--out', '/tmp/x'])).toEqual({ outPath: '/tmp/x', raw: false, rest: ['expr'] });
});
test('--out=<path> form is equivalent', () => {
expect(parseOutArgs(['expr', '--out=/tmp/x'])).toEqual({ outPath: '/tmp/x', raw: false, rest: ['expr'] });
});
test('flag ordering does not matter', () => {
expect(parseOutArgs(['--out', '/tmp/x', 'expr'])).toEqual({ outPath: '/tmp/x', raw: false, rest: ['expr'] });
});
test('--raw and --raw=true|false', () => {
expect(parseOutArgs(['e', '--out', '/tmp/x', '--raw']).raw).toBe(true);
expect(parseOutArgs(['e', '--out', '/tmp/x', '--raw=true']).raw).toBe(true);
expect(parseOutArgs(['e', '--out', '/tmp/x', '--raw=false']).raw).toBe(false);
});
test('repeated --out throws', () => {
expect(() => parseOutArgs(['e', '--out', '/a', '--out', '/b'])).toThrow(/more than once/);
});
test('--out with a missing value throws', () => {
expect(() => parseOutArgs(['e', '--out'])).toThrow(/requires a file path/);
expect(() => parseOutArgs(['e', '--out', '--raw'])).toThrow(/requires a file path/);
expect(() => parseOutArgs(['e', '--out='])).toThrow(/requires a file path/);
});
test('bad --raw value throws', () => {
expect(() => parseOutArgs(['e', '--out', '/a', '--raw=maybe'])).toThrow(/--raw must be true or false/);
});
test('hasOutArg matches --out and --out= exactly, not lookalikes', () => {
expect(hasOutArg(['a', '--out', 'b'])).toBe(true);
expect(hasOutArg(['a', '--out=b'])).toBe(true);
expect(hasOutArg(['a'])).toBe(false);
expect(hasOutArg(['a', '--output', 'b'])).toBe(false);
expect(hasOutArg(['a', '--outx'])).toBe(false);
});
});
describe('resultToString — byte-for-byte with pre-refactor behavior', () => {
test('null becomes "null" (typeof null === object → JSON.stringify)', () => {
expect(resultToString(null)).toBe('null');
});
test('undefined becomes empty string', () => {
expect(resultToString(undefined)).toBe('');
});
test('objects are pretty-printed JSON', () => {
expect(resultToString({ a: 1 })).toBe(JSON.stringify({ a: 1 }, null, 2));
});
test('primitives use String()', () => {
expect(resultToString(42)).toBe('42');
expect(resultToString(true)).toBe('true');
});
});
let testServer: ReturnType<typeof startTestServer>;
let bm: BrowserManager;
let baseUrl: string;
@@ -225,6 +284,102 @@ describe('Inspection', () => {
expect(result).toBe('3');
});
// ─── js/eval --out (render-to-file) ───────────────────────────
test('js (no --out) returns a multi-MB string without truncation', async () => {
// Handler-level guarantee: the result is not sliced/capped before return.
// (Full HTTP egress path is exercised elsewhere; this pins the handler.)
const result = await handleReadCommand('js', ["'x'.repeat(3 * 1024 * 1024)"], bm);
expect(result.length).toBe(3 * 1024 * 1024);
});
test('js --out writes the result to disk and returns a short status, not the payload', async () => {
const out = `/tmp/browse-out-large-${Date.now()}.txt`;
try {
const result = await handleReadCommand('js', ["'y'.repeat(2 * 1024 * 1024)", '--out', out], bm);
expect(result).toContain('JS result written:');
expect(result).toContain(out);
expect(result).toContain(`(${2 * 1024 * 1024} bytes)`);
expect(result.length).toBeLessThan(200); // status, not the 2MB payload
expect(fs.statSync(out).size).toBe(2 * 1024 * 1024);
} finally {
fs.rmSync(out, { force: true });
}
});
test('js --out decodes a base64 PNG data URL to real bytes', async () => {
// 1x1 transparent PNG.
const b64 = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
const out = `/tmp/browse-out-png-${Date.now()}.png`;
try {
const result = await handleReadCommand('js', [`'data:image/png;base64,' + '${b64}'`, '--out', out], bm);
const buf = fs.readFileSync(out);
// PNG magic bytes: 89 50 4E 47
expect([buf[0], buf[1], buf[2], buf[3]]).toEqual([0x89, 0x50, 0x4e, 0x47]);
const expectedLen = Buffer.from(b64, 'base64').length;
expect(buf.length).toBe(expectedLen);
expect(result).toContain(`(${expectedLen} bytes)`);
} finally {
fs.rmSync(out, { force: true });
}
});
test('js --out --raw writes the literal data-URL string (no decode)', async () => {
const dataUrl = 'data:text/plain;base64,aGVsbG8=';
const out = `/tmp/browse-out-raw-${Date.now()}.txt`;
try {
await handleReadCommand('js', [`'${dataUrl}'`, '--out', out, '--raw'], bm);
expect(fs.readFileSync(out, 'utf-8')).toBe(dataUrl);
} finally {
fs.rmSync(out, { force: true });
}
});
test('js --out throws on a malformed base64 data URL instead of writing corrupt bytes', async () => {
const out = `/tmp/browse-out-bad-${Date.now()}.png`;
try {
await expect(
handleReadCommand('js', ["'data:image/png;base64,!!!not-base64!!!'", '--out', out], bm)
).rejects.toThrow(/malformed base64/);
expect(fs.existsSync(out)).toBe(false);
} finally {
fs.rmSync(out, { force: true });
}
});
test('js --out rejects a path outside the safe directories', async () => {
await expect(
handleReadCommand('js', ['1 + 1', '--out', '/etc/browse-should-not-write.txt'], bm)
).rejects.toThrow();
});
test('js --out creates a missing parent directory', async () => {
// validateOutputPath resolves the parent's realpath, so it permits one level
// of missing dir under a safe root (/tmp). mkdir then materializes it.
const root = `/tmp/browse-out-nested-${Date.now()}`;
const out = `${root}/result.txt`;
try {
await handleReadCommand('js', ["'nested'", '--out', out], bm);
expect(fs.readFileSync(out, 'utf-8')).toBe('nested');
} finally {
fs.rmSync(root, { recursive: true, force: true });
}
});
test('eval --out writes the file result to disk (parity with js)', async () => {
const script = `/tmp/browse-eval-out-src-${Date.now()}.js`;
const out = `/tmp/browse-eval-out-${Date.now()}.txt`;
fs.writeFileSync(script, "'from eval'");
try {
const result = await handleReadCommand('eval', [script, '--out', out], bm);
expect(result).toContain('Eval result written:');
expect(fs.readFileSync(out, 'utf-8')).toBe('from eval');
} finally {
fs.rmSync(script, { force: true });
fs.rmSync(out, { force: true });
}
});
test('css returns computed property', async () => {
const result = await handleReadCommand('css', ['h1', 'color'], bm);
// Navy color
+11
View File
@@ -47,4 +47,15 @@ describe('locateBinary', () => {
expect(typeof locateBinary).toBe('function');
expect(locateBinary.length).toBe(0);
});
test('source-checkout fallback resolves <git-root>/browse/dist/browse[.exe]', () => {
// The windows-setup-e2e.yml workflow builds binaries directly under
// browse/dist/ (no .claude/skills/gstack/ install layout). find-browse
// must resolve those — otherwise every fresh build that hasn't run
// ./setup yet looks broken. Static pin so a future refactor that
// drops the source-checkout branch trips this test.
const src = require('fs').readFileSync(require('path').join(__dirname, '../src/find-browse.ts'), 'utf-8');
expect(src).toContain('Source-checkout fallback');
expect(src).toContain("join(root, 'browse', 'dist', 'browse')");
});
});
+42
View File
@@ -1,6 +1,7 @@
import { describe, test, expect } from 'bun:test';
import * as net from 'net';
import * as path from 'path';
import { __testInternals__ } from '../src/server';
const polyfillPath = path.resolve(import.meta.dir, '../src/bun-polyfill.cjs');
@@ -28,6 +29,47 @@ function getFreePort(): Promise<number> {
}
describe('findPort / isPortAvailable', () => {
test('explicit BROWSE_PORT diagnostic distinguishes bind denial from occupied port', () => {
const blocked = __testInternals__.formatExplicitPortUnavailableError(34567, {
available: false,
code: 'EPERM',
message: 'operation not permitted',
}).message;
expect(blocked).toContain('Cannot bind BROWSE_PORT=34567');
expect(blocked).toContain('localhost port binding is blocked');
expect(blocked).toContain('not that the port is occupied');
const occupied = __testInternals__.formatExplicitPortUnavailableError(34567, {
available: false,
code: 'EADDRINUSE',
message: 'address already in use',
}).message;
expect(occupied).toBe('[browse] Port 34567 (from BROWSE_PORT env) is in use');
});
test('random port diagnostic calls out sandbox-style bind denial', () => {
const message = __testInternals__.formatRandomPortUnavailableError([
{ port: 11001, result: { available: false, code: 'EADDRINUSE', message: 'address already in use' } },
{ port: 12002, result: { available: false, code: 'EPERM', message: 'operation not permitted' } },
]).message;
expect(message).toContain('Cannot bind localhost ports after 2 attempts');
expect(message).toContain('Last error: 12002 (EPERM: operation not permitted)');
expect(message).toContain('not that every sampled port is occupied');
expect(message).toContain('set BROWSE_PORT to an approved port');
});
test('random port diagnostic preserves old busy-port meaning when all attempts are occupied', () => {
const message = __testInternals__.formatRandomPortUnavailableError([
{ port: 11001, result: { available: false, code: 'EADDRINUSE', message: 'address already in use' } },
{ port: 12002, result: { available: false, code: 'EADDRINUSE', message: 'address already in use' } },
]).message;
expect(message).toContain('No available port after 5 attempts');
expect(message).toContain('every sampled port was already in use');
});
test('isPortAvailable returns true for a free port', async () => {
// Use the same isPortAvailable logic from server.ts

Some files were not shown because too many files have changed in this diff Show More