mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-17 23:30:09 +02:00
Merge origin/main into gbrowser-anti-detection
Brings the branch up to date with main (v1.40.0.2 -> v1.58.1.0). Conflict resolutions: - VERSION: take main's 1.58.1.0 (branch re-bumps at /ship time). - CHANGELOG.md: keep main's full history; slot the branch's unique v1.40.0.2 entry into descending-order position (no content lost). - browse/src/browser-manager.ts: keep main's GSTACK_CHROMIUM_NO_SANDBOX override and onDisconnect(exitCode) signature; branch's buildGStackLaunchArgs / STEALTH_IGNORE_DEFAULT_ARGS wiring preserved. - browse/test/browser-manager-unit.test.ts: keep main's override + exit-code propagation tests alongside the branch's Cmd+Q cause-resolver tests. - browse/src/stealth.ts: blend the two stealth designs. Layer C (buildStealthScript) is the always-on consistency-first default; main's GSTACK_STEALTH=extended (EXTENDED_STEALTH_SCRIPT) remains an opt-in layer applied on top. Both public APIs and both test suites (stealth-layer-c + stealth-extended) preserved; the two applyStealth wiring assertions updated to reflect the Layer C default. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -37,3 +37,9 @@ bin/* text eol=lf
|
||||
*.gif binary
|
||||
*.ico binary
|
||||
*.pdf binary
|
||||
|
||||
# The committed diagram-render bundle is hash-pinned (BUILD_INFO sha256);
|
||||
# a CRLF rewrite on Windows checkout would break the drift test and change
|
||||
# the content-addressed staged filename.
|
||||
lib/diagram-render/dist/*.html text eol=lf
|
||||
lib/diagram-render/dist/*.json text eol=lf
|
||||
|
||||
@@ -162,6 +162,12 @@ jobs:
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
# The comment upsert below calls the REST `/issues/{n}/comments` endpoints
|
||||
# (gh api ... issues/comments). With GITHUB_TOKEN those are gated by the
|
||||
# `issues` permission, not `pull-requests` — without it the GET returns 401
|
||||
# on every PR that produces eval artifacts (PRs with no artifacts exit
|
||||
# early and never hit it, which is why this stayed hidden). See #1802 CI fix.
|
||||
issues: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
|
||||
@@ -4,6 +4,8 @@ on:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'make-pdf/**'
|
||||
- 'lib/diagram-render/**'
|
||||
- 'test/diagram-render-drift.test.ts'
|
||||
- 'browse/src/meta-commands.ts'
|
||||
- 'browse/src/write-commands.ts'
|
||||
- 'browse/src/commands.ts'
|
||||
@@ -51,6 +53,15 @@ jobs:
|
||||
if: matrix.os == 'ubicloud-standard-8'
|
||||
run: sudo apt-get update && sudo apt-get install -y poppler-utils
|
||||
|
||||
# Install a color-emoji font BEFORE Chromium launches so the emoji render
|
||||
# gate has a fallback font. macOS ships Apple Color Emoji already.
|
||||
- name: Install color-emoji font (Ubuntu)
|
||||
if: matrix.os == 'ubicloud-standard-8'
|
||||
run: |
|
||||
sudo apt-get install -y fonts-noto-color-emoji
|
||||
fc-cache -f || true
|
||||
fc-match -f '%{family[0]}\t%{color}\n' ':lang=und-zsye:charset=1F600' || true
|
||||
|
||||
- name: Install Playwright Chromium
|
||||
run: bunx playwright install chromium
|
||||
|
||||
@@ -72,9 +83,9 @@ jobs:
|
||||
which pdftotext && pdftotext -v 2>&1 | head -1 || true
|
||||
|
||||
- name: Run make-pdf unit tests
|
||||
run: bun test make-pdf/test/*.test.ts
|
||||
run: bun test make-pdf/test/*.test.ts test/diagram-render-drift.test.ts
|
||||
|
||||
- name: Run combined-features copy-paste gate (P0)
|
||||
- name: Run E2E gates (combined-features copy-paste + emoji render)
|
||||
env:
|
||||
BROWSE_BIN: ${{ github.workspace }}/browse/dist/browse
|
||||
run: bun test make-pdf/test/e2e/combined-gate.test.ts
|
||||
run: bun test make-pdf/test/e2e/
|
||||
|
||||
@@ -1,7 +1,25 @@
|
||||
name: PR Title Sync
|
||||
|
||||
# WHY pull_request_target (not pull_request): the default GITHUB_TOKEN is
|
||||
# READ-ONLY on fork PRs under `pull_request`, so the title-sync backstop could
|
||||
# never `gh pr edit` a fork/agent PR. `pull_request_target` runs in the base-repo
|
||||
# context with a write token, which fixes fork coverage.
|
||||
#
|
||||
# WHY this is SAFE (pull_request_target is the most dangerous trigger):
|
||||
# - We check out the BASE repo (no `ref:`), so the only code we execute is
|
||||
# trusted base-repo infra (bin/gstack-pr-title-rewrite.sh). We NEVER check
|
||||
# out or run PR-head/fork code.
|
||||
# - Every attacker-controlled PR field (title, head repo, head sha) arrives via
|
||||
# `env:` and is referenced as a shell-quoted "$VAR". We NEVER inline a
|
||||
# `${{ github.event.pull_request.* }}` expression inside the run: script
|
||||
# (that would execute a crafted title as shell).
|
||||
# - The PR-head VERSION is read as DATA via the API (raw media type), from the
|
||||
# head repo at the head sha — never by checking out the head.
|
||||
# test/pr-title-sync-workflow-safety.test.ts is the static tripwire for all of
|
||||
# the above and fails CI if any of it regresses.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
pull_request_target:
|
||||
types: [opened, synchronize, edited]
|
||||
paths:
|
||||
- 'VERSION'
|
||||
@@ -19,25 +37,62 @@ jobs:
|
||||
pull-requests: write
|
||||
if: github.actor != 'github-actions[bot]'
|
||||
steps:
|
||||
- name: Checkout PR head
|
||||
# Base repo only — trusted infra (the rewrite helper). No PR-head checkout.
|
||||
- name: Checkout base repo (trusted)
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
|
||||
- name: Rewrite PR title to match VERSION
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
PR_NUM: ${{ github.event.pull_request.number }}
|
||||
# Attacker-controlled on fork PRs — env-only, never inlined into run:.
|
||||
OLD_TITLE: ${{ github.event.pull_request.title }}
|
||||
BASE_REPO: ${{ github.repository }}
|
||||
HEAD_REPO: ${{ github.event.pull_request.head.repo.full_name }}
|
||||
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
chmod +x ./bin/gstack-pr-title-rewrite.sh
|
||||
VERSION=$(cat VERSION | tr -d '[:space:]')
|
||||
NEW_TITLE=$(./bin/gstack-pr-title-rewrite.sh "$VERSION" "$OLD_TITLE")
|
||||
if [ "$NEW_TITLE" = "$OLD_TITLE" ]; then
|
||||
echo "Title already correct; no change."
|
||||
|
||||
if [ "$HEAD_REPO" = "$BASE_REPO" ]; then IS_FORK=0; else IS_FORK=1; fi
|
||||
|
||||
# Read the PR-head VERSION as data (raw bytes), from the head repo at
|
||||
# the head sha. Guard the assignment itself: under `set -e` a bare
|
||||
# `VERSION=$(...)` would abort the step before any later [ -z ] check.
|
||||
if ! VERSION=$(gh api -H "Accept: application/vnd.github.raw" \
|
||||
"repos/$HEAD_REPO/contents/VERSION?ref=$HEAD_SHA" 2>/dev/null | tr -d '[:space:]'); then
|
||||
VERSION=""
|
||||
fi
|
||||
|
||||
if [ -z "$VERSION" ]; then
|
||||
# Same-repo read failure should never happen — fail loudly so we
|
||||
# notice. A fork miss (public-contents quirk, private fork) is a
|
||||
# convenience gap, not a gate — warn and skip so the check stays green.
|
||||
if [ "$IS_FORK" = "0" ]; then
|
||||
echo "::error::Could not read VERSION from same-repo PR head ($HEAD_SHA)."
|
||||
exit 1
|
||||
fi
|
||||
echo "::warning::Could not read VERSION from fork $HEAD_REPO ($HEAD_SHA); skipping title sync."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# The helper rejects a malformed VERSION (exit 2). Same policy: loud for
|
||||
# same-repo, soft for forks. Never echo the raw (attacker-controlled)
|
||||
# title — Actions still parses ::workflow-command:: from stdout.
|
||||
if ! NEW_TITLE=$(./bin/gstack-pr-title-rewrite.sh "$VERSION" "$OLD_TITLE"); then
|
||||
if [ "$IS_FORK" = "0" ]; then
|
||||
echo "::error::Could not compute title for VERSION '$VERSION' on PR #$PR_NUM."
|
||||
exit 1
|
||||
fi
|
||||
echo "::warning::Could not compute title for fork PR #$PR_NUM; skipping."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$NEW_TITLE" = "$OLD_TITLE" ]; then
|
||||
echo "PR #$PR_NUM title already correct; no change."
|
||||
exit 0
|
||||
fi
|
||||
echo "Rewriting: $OLD_TITLE -> $NEW_TITLE"
|
||||
gh pr edit "$PR_NUM" --title "$NEW_TITLE"
|
||||
echo "PR #$PR_NUM title synced to VERSION."
|
||||
|
||||
@@ -116,6 +116,7 @@ jobs:
|
||||
test/setup-windows-fallback.test.ts \
|
||||
test/build-script-shell-compat.test.ts \
|
||||
test/docs-config-keys.test.ts \
|
||||
test/brain-sync-windows-paths.test.ts \
|
||||
make-pdf/test/browseClient.test.ts \
|
||||
make-pdf/test/pdftotext.test.ts
|
||||
shell: bash
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
name: Windows Setup E2E
|
||||
|
||||
# End-to-end fresh-install gate for Windows. Runs `./setup` on a clean
|
||||
# windows-latest checkout and asserts the build completes, binaries
|
||||
# resolve via find-browse, and the gstack-paths state root resolves
|
||||
# cleanly. Catches Bun shell-parser regressions in package.json's build
|
||||
# chain (#1538, #1537, #1530, #1457, #1561) before they reach users.
|
||||
#
|
||||
# Separate from windows-free-tests.yml because that one runs a curated
|
||||
# unit-test subset; this one exercises the install path itself.
|
||||
#
|
||||
# Runner: GitHub-hosted free windows-latest. ~3-5 min total.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'package.json'
|
||||
- 'scripts/build.sh'
|
||||
- 'scripts/write-version-files.sh'
|
||||
- 'setup'
|
||||
- 'browse/src/cli.ts'
|
||||
- 'browse/src/find-browse.ts'
|
||||
- 'bin/gstack-paths'
|
||||
- '.github/workflows/windows-setup-e2e.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: windows-setup-e2e-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
windows-setup:
|
||||
runs-on: windows-latest
|
||||
timeout-minutes: 15
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: oven-sh/setup-bun@v1
|
||||
with:
|
||||
bun-version: latest
|
||||
|
||||
- name: Configure git identity
|
||||
run: |
|
||||
git config --global user.email "windows-setup-e2e@gstack.test"
|
||||
git config --global user.name "Windows Setup E2E"
|
||||
git config --global init.defaultBranch main
|
||||
shell: bash
|
||||
|
||||
- name: Install dependencies
|
||||
run: bun install --frozen-lockfile
|
||||
shell: bash
|
||||
|
||||
- name: Run bun run build (the previously-broken path)
|
||||
# This is the regression gate. Bun's Windows shell parser rejected
|
||||
# multiple constructs the old inline build chain used; the wave
|
||||
# moved the build to scripts/build.sh. If this step fails on
|
||||
# Windows, the build chain regressed.
|
||||
run: bun run build
|
||||
shell: bash
|
||||
env:
|
||||
GSTACK_SKIP_PLAYWRIGHT: '1'
|
||||
|
||||
- name: Verify binaries exist (with .exe extension on Windows)
|
||||
run: |
|
||||
set -e
|
||||
test -f browse/dist/browse.exe || test -f browse/dist/browse || (echo "MISSING: browse" && exit 1)
|
||||
test -f browse/dist/find-browse.exe || test -f browse/dist/find-browse || (echo "MISSING: find-browse" && exit 1)
|
||||
test -f design/dist/design.exe || test -f design/dist/design || (echo "MISSING: design" && exit 1)
|
||||
test -f bin/gstack-global-discover.exe || test -f bin/gstack-global-discover || (echo "MISSING: gstack-global-discover" && exit 1)
|
||||
echo "All binaries present"
|
||||
shell: bash
|
||||
|
||||
- name: Verify find-browse resolves to the .exe variant
|
||||
run: |
|
||||
set -e
|
||||
OUT=$(bun browse/src/find-browse.ts 2>&1) || true
|
||||
echo "find-browse output: $OUT"
|
||||
# On Windows, find-browse should successfully resolve to a binary,
|
||||
# whether or not it has the .exe extension on disk. Empty output
|
||||
# or "not found" means the .exe extension resolver regressed.
|
||||
echo "$OUT" | grep -qE '(browse\.exe|browse)$' || (echo "find-browse failed to resolve binary on Windows" && exit 1)
|
||||
shell: bash
|
||||
|
||||
- name: Verify gstack-paths state root resolves
|
||||
run: |
|
||||
set -e
|
||||
eval "$(bash bin/gstack-paths)"
|
||||
test -n "$GSTACK_STATE_ROOT" || (echo "GSTACK_STATE_ROOT empty" && exit 1)
|
||||
test -n "$PLAN_ROOT" || (echo "PLAN_ROOT empty" && exit 1)
|
||||
test -n "$TMP_ROOT" || (echo "TMP_ROOT empty" && exit 1)
|
||||
echo "GSTACK_STATE_ROOT=$GSTACK_STATE_ROOT"
|
||||
echo "PLAN_ROOT=$PLAN_ROOT"
|
||||
echo "TMP_ROOT=$TMP_ROOT"
|
||||
shell: bash
|
||||
+8
-1
@@ -4,9 +4,13 @@ dist/
|
||||
browse/dist/
|
||||
design/dist/
|
||||
make-pdf/dist/
|
||||
bin/gstack-global-discover
|
||||
# diagram-render ships its built bundle (offline-at-install premise, eng-review D2)
|
||||
!lib/diagram-render/dist/
|
||||
!lib/diagram-render/dist/**
|
||||
bin/gstack-global-discover*
|
||||
.gstack/
|
||||
.claude/skills/
|
||||
.claude/gstack-rendered/
|
||||
.claude/scheduled_tasks.lock
|
||||
.claude/*.lock
|
||||
.agents/
|
||||
@@ -37,3 +41,6 @@ supabase/.temp/
|
||||
|
||||
# Throughput analysis — local-only, regenerate via scripts/garry-output-comparison.ts
|
||||
docs/throughput-*.json
|
||||
|
||||
# gbrain local source-staging dir (capability checks, source clones) — runtime artifact
|
||||
.sources/
|
||||
|
||||
@@ -21,6 +21,7 @@ Invoke them by name (e.g., `/office-hours`).
|
||||
| `/plan-tune` | Self-tune AskUserQuestion sensitivity per question. |
|
||||
| `/autoplan` | One command runs CEO → design → eng → DX review. |
|
||||
| `/design-consultation` | Build a complete design system from scratch. |
|
||||
| `/spec` | Turn vague intent into a precise, executable spec in five phases. Files a GitHub issue, optionally spawns a Claude Code agent in a fresh worktree, and lets `/ship` close the source issue on merge. |
|
||||
|
||||
### Implementation + review
|
||||
|
||||
@@ -75,6 +76,25 @@ Invoke them by name (e.g., `/office-hours`).
|
||||
| `/setup-browser-cookies` | Import cookies from your real browser for authenticated testing. |
|
||||
| `/pair-agent` | Pair a remote AI agent (OpenClaw, Codex, etc.) with your browser. |
|
||||
|
||||
### iOS QA — drive real iPhones over USB or Tailscale (v1.43.0.0+)
|
||||
|
||||
| Skill | What it does |
|
||||
|-------|-------------|
|
||||
| `/ios-qa` | Live-device iOS QA via USB CoreDevice tunnel + embedded StateServer. Optionally exposes the device over Tailscale so remote agents can drive it. |
|
||||
| `/ios-fix` | Autonomous iOS bug fixer with regression snapshot capture. |
|
||||
| `/ios-design-review` | Designer's-eye QA on a real iPhone — 10-dimension Apple HIG rubric. |
|
||||
| `/ios-clean` | Convenience: strip DebugBridge + #if DEBUG wiring before a Release build. |
|
||||
| `/ios-sync` | Regenerate the iOS debug bridge against the latest upstream templates. |
|
||||
|
||||
Companion CLIs (run on the Mac that's plugged into the device):
|
||||
|
||||
| Command | What it does |
|
||||
|---------|-------------|
|
||||
| `gstack-ios-qa-daemon` | Mac-side broker. Loopback by default; `--tailnet` adds a Tailscale-facing listener with capability tiers and audit logging. |
|
||||
| `gstack-ios-qa-mint` | Owner-grant CLI for the tailnet allowlist (`grant`/`revoke`/`list`). |
|
||||
|
||||
End-to-end walkthrough: [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md).
|
||||
|
||||
### Safety + scoping
|
||||
|
||||
| Skill | What it does |
|
||||
@@ -84,6 +104,7 @@ Invoke them by name (e.g., `/office-hours`).
|
||||
| `/guard` | Activate both careful + freeze at once. |
|
||||
| `/unfreeze` | Remove directory edit restrictions. |
|
||||
| `/make-pdf` | Turn any markdown file into a publication-quality PDF. |
|
||||
| `/diagram` | English in, diagram out: mermaid source + editable .excalidraw + SVG/PNG, offline. |
|
||||
|
||||
## Build commands
|
||||
|
||||
|
||||
+3
-2
@@ -212,8 +212,8 @@ from `snapshot`, or `@c` refs from `snapshot -C`. Full table:
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `js <expr>` | Run inline JavaScript expression in page context, return as string |
|
||||
| `eval <file>` | Run JS from a file (path under /tmp or cwd; same sandbox as `js`) |
|
||||
| `js <expr> [--out <file>] [--raw]` | Run inline JavaScript expression in page context, return as string. With `--out <file>` the result is written to disk instead of returned (a `data:*;base64,...` result is decoded to raw bytes unless `--raw`). `--out` makes the invocation a WRITE (needs `write` scope, never allowed over the tunnel). |
|
||||
| `eval <file> [--out <file>] [--raw]` | Run JS from a file (path under /tmp or cwd; same sandbox as `js`). `--out`/`--raw` behave as for `js`. |
|
||||
| `css <sel> <prop>` | Computed CSS value |
|
||||
| `attrs <sel\|@ref>` | Element attributes as JSON |
|
||||
| `is <prop> <sel\|@ref>` | State check: visible, hidden, enabled, disabled, checked, editable, focused |
|
||||
@@ -317,6 +317,7 @@ from `snapshot`, or `@c` refs from `snapshot -C`. Full table:
|
||||
| `disconnect` | Close headed Chrome, return to headless |
|
||||
| `focus [@ref]` | Bring headed Chrome to foreground (macOS); `@ref` also scrolls into view |
|
||||
| `state save\|load <name>` | Save or load browser state (cookies + URLs) |
|
||||
| `memory [--json]` | Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. Use `--json` for programmatic consumers; text mode renders sorted top-10 tabs with "and N more" tail. |
|
||||
|
||||
### Handoff
|
||||
|
||||
|
||||
+2255
File diff suppressed because it is too large
Load Diff
@@ -27,25 +27,31 @@ bun run slop:diff # slop findings in files changed on this branch only
|
||||
`test:evals` requires `ANTHROPIC_API_KEY`. Codex E2E tests (`test/codex-e2e.test.ts`)
|
||||
use Codex's own auth from `~/.codex/` config — no `OPENAI_API_KEY` env var needed.
|
||||
|
||||
**Where the keys live on this machine.** Conductor workspaces don't inherit the
|
||||
user's interactive shell env, so `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` aren't
|
||||
in the default process env. Before running any paid eval / E2E, source them from
|
||||
`~/.zshrc` (that's where Garry keeps them):
|
||||
**Env keys in Conductor workspaces.** The `GSTACK_*` env-shim (v1.39.2.0+,
|
||||
`lib/conductor-env-shim.ts`) promotes `GSTACK_ANTHROPIC_API_KEY` /
|
||||
`GSTACK_OPENAI_API_KEY` to their canonical names inside gstack's TS binaries.
|
||||
Tests run through gstack entrypoints inherit this promotion automatically.
|
||||
Don't echo the key value to stdout, logs, or shell history. The historical
|
||||
"never pass `env:` to `runAgentSdkTest`" rule is retired: the failure was
|
||||
partial-env replacement (the SDK's `Options.env` REPLACES the child's entire
|
||||
environment, so an object without the key broke auth). The runner now always
|
||||
passes a COMPLETE hermetic env with per-test `env:` merged last, so per-test
|
||||
overrides are safe; ambient `process.env.ANTHROPIC_API_KEY` mutation also
|
||||
still works (the env builder reads process.env at call time).
|
||||
|
||||
```bash
|
||||
bash -c '
|
||||
eval "$(grep -E "^export (ANTHROPIC_API_KEY|OPENAI_API_KEY)=" ~/.zshrc)"
|
||||
export ANTHROPIC_API_KEY OPENAI_API_KEY
|
||||
EVALS=1 EVALS_TIER=periodic bun test test/skill-e2e-<whatever>.test.ts
|
||||
'
|
||||
```
|
||||
**Hermetic local E2E (default).** Every E2E runner (claude -p, PTY, Agent
|
||||
SDK, codex, gemini) spawns children through `test/helpers/hermetic-env.ts`:
|
||||
allowlist-scrubbed env (operator `CONDUCTOR_*`, `CLAUDE_*`, `GSTACK_*`,
|
||||
`MCP_*`, `GBRAIN_*`, and credentials like `GH_TOKEN` never reach children),
|
||||
a fresh seeded `CLAUDE_CONFIG_DIR` (no operator `~/.claude` CLAUDE.md /
|
||||
MCP servers / skills), a temp `GSTACK_HOME`, and `--strict-mcp-config`.
|
||||
Local eval signal matches CI. Debug against real operator state with
|
||||
`EVALS_HERMETIC=0` (restores the legacy env AND drops the strict-MCP flag).
|
||||
Per-test `env:` overrides merge last, so deliberate contamination
|
||||
(`CONDUCTOR_WORKSPACE_PATH`, per-test `GSTACK_HOME`) keeps working. Wiring
|
||||
is pinned by `test/hermetic-wiring.test.ts` (static tripwire) and two
|
||||
gate-tier canaries in `test/skill-e2e-hermetic-canary.test.ts`.
|
||||
|
||||
Do not echo the key value anywhere (stdout, logs, shell history). The grep+eval
|
||||
pattern keeps it in process env only. When passing to a test's Agent SDK, do NOT
|
||||
pass `env: {...}` to `runAgentSdkTest` — the SDK's auth pipeline doesn't pick up
|
||||
the key the same way when env is supplied as an object (confirmed failure mode).
|
||||
Instead, mutate `process.env.ANTHROPIC_API_KEY` ambiently before the call and
|
||||
restore in `finally`.
|
||||
E2E tests stream progress in real-time (tool-by-tool via `--output-format stream-json
|
||||
--verbose`). Results are persisted to `~/.gstack-dev/evals/` with auto-comparison
|
||||
against the previous run.
|
||||
@@ -120,6 +126,7 @@ gstack/
|
||||
├── land-and-deploy/ # /land-and-deploy skill (merge → deploy → canary verify)
|
||||
├── office-hours/ # /office-hours skill (YC Office Hours — startup diagnostic + builder brainstorm)
|
||||
├── investigate/ # /investigate skill (systematic root-cause debugging)
|
||||
├── spec/ # /spec skill (five-phase spec → GitHub issue, optional agent spawn, /ship auto-closes)
|
||||
├── retro/ # Retrospective skill (includes /retro global cross-project mode)
|
||||
├── bin/ # CLI utilities (gstack-repo-mode, gstack-slug, gstack-config, etc.)
|
||||
├── document-release/ # /document-release skill (post-ship doc updates + Diataxis coverage map)
|
||||
@@ -145,7 +152,7 @@ gstack/
|
||||
├── setup # One-time setup: build binary + symlink skills
|
||||
├── SKILL.md # Generated from SKILL.md.tmpl (don't edit directly)
|
||||
├── SKILL.md.tmpl # Template: edit this, run gen:skill-docs
|
||||
├── ETHOS.md # Builder philosophy (Boil the Lake, Search Before Building)
|
||||
├── ETHOS.md # Builder philosophy (Boil the Ocean, Search Before Building)
|
||||
└── package.json # Build scripts for browse
|
||||
```
|
||||
|
||||
@@ -236,6 +243,24 @@ Activity / Refs / Inspector as debug overlays behind the footer's
|
||||
flow, dual-token model, and threat-model boundary — silent failures
|
||||
here usually trace to not understanding the cross-component flow.
|
||||
|
||||
**Embedder terminal-agent ownership** (v1.42.1.0+, identity-based kill v1.44.0.0+).
|
||||
`buildFetchHandler` in `browse/src/server.ts` accepts `ServerConfig.ownsTerminalAgent?:
|
||||
boolean` (default `true`). When `true`, factory shutdown runs the full teardown:
|
||||
identity-based kill via `killAgentByRecord(readAgentRecord(stateDir))` from
|
||||
`browse/src/terminal-agent-control.ts` plus `safeUnlinkQuiet` on
|
||||
`<stateDir>/terminal-port`, `<stateDir>/terminal-internal-token`, and
|
||||
`<stateDir>/terminal-agent-pid` (the per-boot agent record introduced in v1.44).
|
||||
Embedders (e.g. the gbrowser phoenix overlay) that pre-launch their own PTY
|
||||
server must pass `false` so their discovery files survive gstack teardown cycles.
|
||||
The flag is the third caller-owned teardown gate in `ServerConfig` (alongside
|
||||
`xvfb?` and `proxyBridge?`); polarity is inverted (explicit bool vs presence) and
|
||||
documented in the field's JSDoc. CLI `start()` always passes `true` explicitly —
|
||||
the static-grep test in `browse/test/server-embedder-terminal-port.test.ts` fails
|
||||
CI if a refactor drops it. Pre-v1.44 used `pkill -f terminal-agent\.ts` (regex
|
||||
match) which would kill sibling gstack sessions on the same host; the new
|
||||
`browse/test/terminal-agent-pid-identity.test.ts` static-grep tripwire fails CI
|
||||
if any source file re-introduces `pkill ... terminal-agent` or `spawnSync('pkill', ...)`.
|
||||
|
||||
**WebSocket auth uses Sec-WebSocket-Protocol, not cookies.** Browsers
|
||||
can't set `Authorization` on a WebSocket upgrade, but they CAN set
|
||||
`Sec-WebSocket-Protocol` via `new WebSocket(url, [token])`. The agent
|
||||
@@ -284,6 +309,26 @@ response in `server.ts`, read
|
||||
`browse/test/server-sanitize-surrogates.test.ts` pins the wiring with invariant
|
||||
tests, so bypasses fail CI.
|
||||
|
||||
**SSE endpoint helper** (v1.51.0.0+). New SSE endpoints in `server.ts` MUST route
|
||||
through `createSseEndpoint(req, config)` from `browse/src/sse-helpers.ts`. The
|
||||
helper owns the cleanup contract (abort + enqueue-throw + heartbeat-throw, all
|
||||
idempotent) and bakes in `sanitizeLoneSurrogates` on every JSON.stringify, so
|
||||
new subscribers can't accidentally regress either invariant. Inline
|
||||
`ReadableStream` wiring leaked subscribers when the TCP connection died without
|
||||
firing `req.signal.abort` (Chromium MV3 service-worker suspend, intermediate
|
||||
proxy half-close). `/activity/stream`, `/inspector/events`, and `/memory`
|
||||
(SSE-eligible) all route through it. `browse/test/sse-helpers.test.ts` pins the
|
||||
cleanup contract.
|
||||
|
||||
**CDP session lifecycle** (v1.51.0.0+). Direct `page.context().newCDPSession(page)`
|
||||
calls outside `browse/src/cdp-bridge.ts` fail CI via the static-grep tripwire in
|
||||
`browse/test/cdp-session-cleanup.test.ts`. Use `withCdpSession(page, async (s) => {...})`
|
||||
for one-shot CDP work (try/finally detach) or `getOrCreateCdpSession(page, cache)`
|
||||
for cached sessions tied to a page's lifetime (close-detach via `Map<page, session>`).
|
||||
Three sites migrated: cdp-bridge frame events, write-commands archive capture,
|
||||
cdp-inspector. The helpers prevent the per-session leak class where successful-path
|
||||
detach happened but error-path detach was missed.
|
||||
|
||||
**Setup symlink hardening** (v1.38.0.0+). Every link site in `setup` MUST route
|
||||
through the `_link_or_copy SRC DST` helper near the `IS_WINDOWS` detection. On
|
||||
Windows without Developer Mode, plain `ln -snf` produces frozen file copies that
|
||||
@@ -388,6 +433,44 @@ because they're tracked despite `.gitignore` — ignore them. When staging files
|
||||
always use specific filenames (`git add file1 file2`) — never `git add .` or
|
||||
`git add -A`, which will accidentally include the binaries.
|
||||
|
||||
## Redaction guard (PII / secrets / legal content)
|
||||
|
||||
Shared redaction engine catches credentials, PII, and legal/damaging content
|
||||
before it reaches an external sink (codex dispatch, GitHub issue/PR body, pushed
|
||||
commit). It is a **guardrail, not airtight enforcement** — `git push --no-verify`,
|
||||
direct `gh issue create`, and `GSTACK_REDACT_PREPUSH=skip` all bypass it. It
|
||||
catches accidents and carelessness, the 99% case. Do not claim it stops a
|
||||
determined leaker (a CHANGELOG line that does would fail a hostile screenshotter).
|
||||
|
||||
- **Engine + taxonomy:** `lib/redact-patterns.ts` (the single source of truth —
|
||||
3 tiers; HIGH = genuinely-secret credentials that block, MEDIUM = PII/legal/
|
||||
internal + high-FP credential shapes that confirm via AskUserQuestion, LOW =
|
||||
FYI) and `lib/redact-engine.ts` (pure `scan()` + `applyRedactions()`).
|
||||
Calibration matters: a gate that cries wolf gets ignored, so context-variable
|
||||
shapes (Stripe `pk_live_`, Google `AIza`, JWT, env `*_KEY=`) sit at MEDIUM.
|
||||
- **CLI:** `bin/gstack-redact` (exit 0 clean / 2 MEDIUM / 3 HIGH; `--json`,
|
||||
`--auto-redact`, `--repo-visibility`, `--from-file`). `bin/gstack-redact-prepush`
|
||||
is the opt-in git hook.
|
||||
- **Skill docs are generated** from `scripts/resolvers/redact-doc.ts`
|
||||
(`{{REDACT_TAXONOMY_TABLE}}`, `{{REDACT_INVOCATION_BLOCK:<sink>}}`) so /spec,
|
||||
/cso, /ship, /document-release, /document-generate never drift from the engine.
|
||||
- **Scan-at-sink:** always scan the EXACT bytes that will be sent — write to a
|
||||
temp file, scan that file, pass the SAME file to `gh`/`git`. Never scan a string
|
||||
then re-render (that reopens a scan-vs-send gap).
|
||||
- **Visibility (no tier promotion):** resolve once per run, order = local config
|
||||
(`gstack-config get redact_repo_visibility`, ~/.gstack so never committed) → gh
|
||||
→ glab → unknown(=public-strict). Public repos get STERNER per-finding
|
||||
confirmation (no batch-acknowledge, no silent-proceed); MEDIUM is never
|
||||
auto-promoted to HIGH.
|
||||
- **Tool-attributed fences:** wrap Codex/Greptile/eval output in ` ```codex-review `
|
||||
/ ` ```greptile ` fences so example credentials those tools quote WARN-degrade
|
||||
instead of blocking. A live-format credential inside the fence still blocks.
|
||||
- **Config keys:** `redact_repo_visibility` (public|private|unknown, local-only
|
||||
override for repos gh/glab can't read), `redact_prepush_hook` (true|false).
|
||||
There is intentionally NO key to disable HIGH blocking.
|
||||
- **Audit:** the /spec semantic pass appends a content-free record (categories +
|
||||
body sha256, no spec text) to `~/.gstack/security/semantic-reviews.jsonl` (0600).
|
||||
|
||||
## Commit style
|
||||
|
||||
**Always bisect commits.** Every commit should be a single logical change. When
|
||||
@@ -708,8 +791,10 @@ When estimating or discussing effort, always show both human-team and CC+gstack
|
||||
| Research / exploration | 1 day | 3 hours | ~3x |
|
||||
|
||||
Completeness is cheap. Don't recommend shortcuts when the complete implementation
|
||||
is a "lake" (achievable) not an "ocean" (multi-quarter migration). See the
|
||||
Completeness Principle in the skill preamble for the full philosophy.
|
||||
is achievable. Boil the ocean — the complete thing is the goal; only genuinely
|
||||
unrelated multi-quarter migrations are separate scope, never an excuse for a
|
||||
shortcut. See the Completeness Principle in the skill preamble for the full
|
||||
philosophy.
|
||||
|
||||
## Search before building
|
||||
|
||||
@@ -758,6 +843,34 @@ them. Report progress at each check (which tests passed, which are running, any
|
||||
failures so far). The user wants to see the run complete, not a promise that
|
||||
you'll check later.
|
||||
|
||||
## Running evals as an agent: always detach (SIGTERM-proof)
|
||||
|
||||
When **you (an agent/harness)** launch a long eval/benchmark run, run it through
|
||||
`bin/gstack-detach` — NEVER as a plain backgrounded Bash task. A plain background
|
||||
task lives in the harness's process group, so a SIGTERM ("polite quit") on a turn
|
||||
boundary, a stopped Monitor, or an interruption kills the run mid-flight (observed:
|
||||
`script "test:gate" was terminated by signal SIGTERM` ~40 min into a run). On macOS
|
||||
the run can also die to idle-sleep. `gstack-detach` fixes both: a fresh session
|
||||
(escapes the group SIGTERM) wrapped in `caffeinate -i` (blocks idle-sleep).
|
||||
|
||||
- Use the `eval:bg*` scripts (`eval:bg`, `eval:bg:all`, `eval:bg:gate`,
|
||||
`eval:bg:periodic`) — they wrap the eval command in `gstack-detach` with the
|
||||
machine-wide `gstack-evals` lock (concurrent worktrees serialize instead of
|
||||
saturating the shared model API), a per-tier watchdog, and a **run-scoped** log
|
||||
under `~/.gstack-dev/eval-runs/` (no shared-`/tmp` collision). Each prints its
|
||||
log path. Or call `gstack-detach [--lock NAME] [--timeout SECS] [--label LBL] --
|
||||
<cmd>` directly for any long agent job. Export `ANTHROPIC_API_KEY` first (never
|
||||
pass keys in argv).
|
||||
- Then **poll the printed logfile** with a death-aware watcher: break on the
|
||||
guaranteed `### gstack-detach EXIT=<code> ###` sentinel (success AND failure are
|
||||
both marked, so silence is never mistaken for success). The detached run survives
|
||||
even if your watcher gets reaped, so re-checking the log always works.
|
||||
- Why the lock: a shared dev box with several Conductor worktrees will rate-limit
|
||||
the model API if two eval suites run at once (15-way concurrency each), which
|
||||
mass-times-out E2E tests. The lock makes the second run WAIT, not collide.
|
||||
- Humans running `bun run test:evals` foreground in their own terminal don't need
|
||||
this — Ctrl-C is intended there. Detachment is for agent-launched runs only.
|
||||
|
||||
## E2E test fixtures: extract, don't copy
|
||||
|
||||
**NEVER copy a full SKILL.md file into an E2E test fixture.** SKILL.md files are
|
||||
@@ -813,6 +926,12 @@ The active skill lives at `~/.claude/skills/gstack/`. After making changes:
|
||||
2. Fetch and reset in the skill directory: `cd ~/.claude/skills/gstack && git fetch origin && git reset --hard origin/main`
|
||||
3. Rebuild: `cd ~/.claude/skills/gstack && bun run build`
|
||||
|
||||
**If you use gbrain:** the `git reset --hard` in step 2 reverts the brain-aware
|
||||
(`GBRAIN_CONTEXT_LOAD` / `GBRAIN_SAVE_RESULTS`) blocks that `gstack-config
|
||||
gbrain-refresh` renders into the install (those generated blocks differ from
|
||||
`main` by design). After deploying, re-run `gstack-config gbrain-refresh` to
|
||||
restore them across all your projects' Claude sessions. It's idempotent.
|
||||
|
||||
Or copy the binaries directly:
|
||||
- `cp browse/dist/browse ~/.claude/skills/gstack/browse/dist/browse`
|
||||
- `cp design/dist/design ~/.claude/skills/gstack/design/dist/design`
|
||||
@@ -835,6 +954,31 @@ Key routing rules:
|
||||
- Save progress → invoke /context-save
|
||||
- Resume context → invoke /context-restore
|
||||
|
||||
## Cross-session decision memory
|
||||
|
||||
Durable decisions and their rationale are captured in an append-only, event-sourced
|
||||
store at `~/.gstack/projects/<slug>/decisions.jsonl` so neither you nor the user
|
||||
re-litigates a settled call or loses the "why" across sessions. This is the reliable,
|
||||
file-only path: it works with gbrain OFF. (gbrain semantic recall is an optional
|
||||
enhancement layered on top, never a dependency.)
|
||||
|
||||
- **Resurface** active decisions before re-deciding: `bin/gstack-decision-search`
|
||||
(`--recent N`, `--scope repo|branch|issue`, `--query KW`, `--all`, `--json`).
|
||||
Add `--semantic` (with `--query`) to append related hits from gbrain memory when
|
||||
it's up; it degrades silently to the reliable file results when gbrain is off.
|
||||
Session start already surfaces scope-relevant active decisions via Context Recovery.
|
||||
If a decision is listed, treat it as settled with its rationale; if you're about to
|
||||
reverse it, say so explicitly.
|
||||
- **Capture** a DURABLE decision when you or the user make one:
|
||||
`bin/gstack-decision-log '{"decision":"...","rationale":"...","scope":"repo|branch|issue","source":"user|skill|agent","confidence":1-10}'`.
|
||||
Reverse a prior call with `--supersede <id>`; expunge an accidental secret with
|
||||
`--redact <id>`; rewrite the log to the active set with `--compact`. Non-interactive
|
||||
(never prompts), injection-sanitized, and HIGH-secret-blocking on write.
|
||||
- **Durable means:** architecture choice, scope cut, tool/vendor choice, or a reversal
|
||||
of a prior call. NOT a turn-level edit, a phrasing tweak, or anything trivially
|
||||
re-derivable. Capture is curated at the source — log durable decisions only, or the
|
||||
store becomes noise.
|
||||
|
||||
## GBrain Search Guidance (configured by /sync-gbrain)
|
||||
<!-- gstack-gbrain-search-guidance:start -->
|
||||
|
||||
@@ -870,4 +1014,10 @@ file globs. Run `/sync-gbrain` after meaningful code changes; for ongoing
|
||||
auto-sync across all worktrees, run `gbrain autopilot --install` once per
|
||||
machine — gbrain's daemon handles incremental refresh on a schedule.
|
||||
|
||||
Safety: don't run `/sync-gbrain` while `gbrain autopilot` is active — the
|
||||
orchestrator refuses destructive source ops when it detects a running autopilot
|
||||
to avoid racing it (#1734). Prefer registering user repos with `gbrain sources
|
||||
add --path <dir>` (no `--url`): URL-managed sources can auto-reclone, and the
|
||||
sync code walk for them requires an explicit `--allow-reclone` opt-in.
|
||||
|
||||
<!-- gstack-gbrain-search-guidance:end -->
|
||||
|
||||
+61
-2
@@ -106,6 +106,22 @@ bun run build
|
||||
bin/dev-teardown
|
||||
```
|
||||
|
||||
### Brain-aware blocks in a dev workspace (gbrain installed)
|
||||
|
||||
If gbrain is installed and usable (`bin/gstack-gbrain-detect --is-ok` exits 0),
|
||||
`bin/dev-setup` keeps your tracked `SKILL.md` files canonical and renders the
|
||||
brain-aware variant (the `GBRAIN_CONTEXT_LOAD` / `GBRAIN_SAVE_RESULTS` blocks)
|
||||
into `.claude/gstack-rendered/` (gitignored, per-workspace). It then repoints the
|
||||
workspace's `SKILL.md` symlinks at that render, so your Claude sessions get the
|
||||
full gbrain experience while `git status` stays clean. Under the hood, dev-setup
|
||||
passes `GSTACK_SKIP_GBRAIN_REGEN=1` inline to the nested `./setup` (so it never
|
||||
dirties tracked source) and runs `gen:skill-docs:user --out-dir .claude/gstack-rendered`,
|
||||
which rewrites only the section-base paths to point at the render. `bin/dev-teardown`
|
||||
removes the render. To make the blocks live across your *other* projects' Claude
|
||||
sessions, run `gstack-config gbrain-refresh`, which renders them into the global
|
||||
install (`~/.claude/skills/gstack`), guarded so it never touches a symlinked or
|
||||
non-gstack directory.
|
||||
|
||||
## Testing & evals
|
||||
|
||||
### Setup
|
||||
@@ -160,6 +176,18 @@ EVALS=1 bun test test/skill-e2e-*.test.ts
|
||||
- Saves full NDJSON transcripts and failure JSON for debugging
|
||||
- Tests live in `test/skill-e2e-*.test.ts` (split by category), runner logic in `test/helpers/session-runner.ts`
|
||||
|
||||
**Hermetic by default.** Every E2E runner (claude -p, the real-PTY plan-mode
|
||||
runner, the Agent SDK runner, plus the codex and gemini runners) spawns its child
|
||||
through `test/helpers/hermetic-env.ts`: an allowlist-scrubbed environment, a fresh
|
||||
seeded `CLAUDE_CONFIG_DIR`, a temp `GSTACK_HOME`, and `--strict-mcp-config`. Your
|
||||
operator `~/.claude` config, MCP servers (gbrain, Conductor), skills, `~/.gstack`
|
||||
decision logs, and `CONDUCTOR_*` env never leak into the child, so local eval
|
||||
signal matches CI instead of disagreeing for reasons unrelated to the code under
|
||||
test. Set `EVALS_HERMETIC=0` to debug against your real operator state (this also
|
||||
drops `--strict-mcp-config`). The wiring is pinned by `test/hermetic-wiring.test.ts`
|
||||
(a free static tripwire) and two gate-tier isolation canaries in
|
||||
`test/skill-e2e-hermetic-canary.test.ts`.
|
||||
|
||||
### E2E observability
|
||||
|
||||
When E2E tests run, they produce machine-readable artifacts in `~/.gstack-dev/`:
|
||||
@@ -182,6 +210,25 @@ bun run eval:compare # compare two runs — shows per-test deltas + Take
|
||||
bun run eval:summary # aggregate stats + per-test efficiency averages across runs
|
||||
```
|
||||
|
||||
**Detached runs for agents and long suites.** When an agent (or you, for a run
|
||||
you don't want to babysit) launches a long eval, use the `eval:bg*` scripts. They
|
||||
wrap the eval command in `bin/gstack-detach`: a fresh session that escapes a
|
||||
turn-boundary SIGTERM, a `caffeinate` wrapper that blocks idle-sleep, a machine-wide
|
||||
`gstack-evals` lock so concurrent worktrees serialize instead of saturating the
|
||||
model API, a run-scoped log under `~/.gstack-dev/eval-runs/`, a per-tier watchdog,
|
||||
and a guaranteed `### gstack-detach EXIT=<code> ###` sentinel so a poller never
|
||||
mistakes silence for success.
|
||||
|
||||
```bash
|
||||
bun run eval:bg # detached test:evals (diff-based)
|
||||
bun run eval:bg:all # detached test:evals:all
|
||||
bun run eval:bg:gate # detached gate-tier suite
|
||||
bun run eval:bg:periodic # detached periodic-tier suite
|
||||
```
|
||||
|
||||
Each prints its log path. Humans running `bun run test:evals` foreground in their
|
||||
own terminal don't need this — Ctrl-C is intended there.
|
||||
|
||||
**Eval comparison commentary:** `eval:compare` generates natural-language Takeaway sections interpreting what changed between runs — flagging regressions, noting improvements, calling out efficiency gains (fewer turns, faster, cheaper), and producing an overall summary. This is driven by `generateCommentary()` in `eval-store.ts`.
|
||||
|
||||
Artifacts are never cleaned up — they accumulate in `~/.gstack-dev/` for post-mortem debugging and trend analysis.
|
||||
@@ -232,6 +279,14 @@ For template authoring best practices (natural language over bash-isms, dynamic
|
||||
|
||||
To add a browse command, add it to `browse/src/commands.ts`. To add a snapshot flag, add it to `SNAPSHOT_FLAGS` in `browse/src/snapshot.ts`. Then rebuild.
|
||||
|
||||
**Don't bundle puppeteer/Chromium in a skill.** `browse` is the one shared
|
||||
Chromium per box, including offline local-render workloads. A skill that needs to
|
||||
rasterize its own HTML/JSON (diagrams, cards, og-images) should route through
|
||||
`browse` — `screenshot --selector` for visual output, `load-html` + `js --out` for
|
||||
bytes a render function returns — instead of `npm i puppeteer` and downloading a
|
||||
second Chromium that drifts out of version sync. One install to pin, one daemon to
|
||||
manage.
|
||||
|
||||
## Jargon list (V1 writing style)
|
||||
|
||||
gstack's Writing Style section (injected into every tier-≥2 skill's preamble)
|
||||
@@ -326,13 +381,17 @@ If you're using [Conductor](https://conductor.build) to run multiple Claude Code
|
||||
|
||||
| Hook | Script | What it does |
|
||||
|------|--------|-------------|
|
||||
| `setup` | `bin/dev-setup` | Copies `.env` from main worktree, installs deps, symlinks skills |
|
||||
| `archive` | `bin/dev-teardown` | Removes skill symlinks, cleans up `.claude/` directory |
|
||||
| `setup` | `bin/dev-setup` | Copies `.env` from main worktree, installs deps, symlinks skills, runs `./setup` non-interactively, and (if gbrain is installed) renders brain-aware blocks into `.claude/gstack-rendered/` without dirtying tracked source |
|
||||
| `archive` | `bin/dev-teardown` | Removes skill symlinks, the `.claude/gstack-rendered/` render, and cleans up `.claude/` directory |
|
||||
|
||||
When Conductor creates a new workspace, `bin/dev-setup` runs automatically. It detects the main worktree (via `git worktree list`), copies your `.env` so API keys carry over, and sets up dev mode — no manual steps needed.
|
||||
|
||||
`bin/dev-setup` runs `./setup` fully non-interactively (it passes `--plan-tune-hooks=prompt` and closes stdin), so a forwarded Conductor TTY can never hang on a hidden setup prompt. It also never installs the plan-tune Claude Code hooks, which means a throwaway workspace can't rewrite your global `~/.claude/settings.json` to point at an ephemeral worktree path. To install the plan-tune hooks deliberately, run `./setup --plan-tune-hooks` outside dev-setup (or `gstack-config set plan_tune_hooks yes`).
|
||||
|
||||
**First-time setup:** Put your `ANTHROPIC_API_KEY` in `.env` in the main repo (see `.env.example`). Every Conductor workspace inherits it automatically.
|
||||
|
||||
**`GSTACK_*` env prefix (Conductor-injected keys).** Conductor explicitly strips `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` from every workspace's process env. The `.env` copy path doesn't restore them either — the strip happens after env inheritance. Users who want paid evals, `/sync-gbrain` embeddings, or `claude-agent-sdk` calls to work in a Conductor workspace must set `GSTACK_ANTHROPIC_API_KEY` and `GSTACK_OPENAI_API_KEY` in Conductor's workspace env config; Conductor passes those through untouched. On the gstack side, TS entry points import `lib/conductor-env-shim.ts` as a side effect, which promotes `GSTACK_FOO_API_KEY` to `FOO_API_KEY` when the canonical name is empty. If you add a new TS entry point that hits a paid API, add `import "../lib/conductor-env-shim";` to the top of the file. Today the shim is imported from `bin/gstack-gbrain-sync.ts`, `bin/gstack-model-benchmark`, `scripts/preflight-agent-sdk.ts`, and `test/helpers/e2e-helpers.ts`.
|
||||
|
||||
## Things to know
|
||||
|
||||
- **SKILL.md files are generated.** Edit the `.tmpl` template, not the `.md`. Run `bun run gen:skill-docs` to regenerate.
|
||||
|
||||
@@ -31,16 +31,21 @@ The last 10% of completeness that teams used to skip? It costs seconds now.
|
||||
|
||||
---
|
||||
|
||||
## 1. Boil the Lake
|
||||
## 1. Boil the Ocean
|
||||
|
||||
AI-assisted coding makes the marginal cost of completeness near-zero. When
|
||||
the complete implementation costs minutes more than the shortcut — do the
|
||||
"Don't boil the ocean" was the right advice when engineering time was the
|
||||
bottleneck. That era is over. AI-assisted coding makes the marginal cost of
|
||||
completeness near-zero, so the old caution has quietly turned into an excuse.
|
||||
When the complete implementation costs minutes more than the shortcut — do the
|
||||
complete thing. Every time.
|
||||
|
||||
**Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module,
|
||||
full feature implementation, all edge cases, complete error paths. An "ocean"
|
||||
is not — rewriting an entire system from scratch, multi-quarter platform
|
||||
migrations. Boil lakes. Flag oceans as out of scope.
|
||||
**Ocean, lakes first:** The ocean is the destination — 100% test coverage for a
|
||||
module, full feature implementation, all edge cases, complete error paths. You
|
||||
get there one lake at a time: each lake is a boilable unit, not the ceiling.
|
||||
"That's boiling the ocean" is no longer a reason to ship a shortcut — boiling
|
||||
the ocean is the goal. The only thing still out of scope is genuinely unrelated
|
||||
work: a multi-quarter platform migration that has nothing to do with the task at
|
||||
hand. Flag that as separate scope. Boil everything else.
|
||||
|
||||
**Completeness is cheap.** When evaluating "approach A (full, ~150 LOC) vs
|
||||
approach B (90%, ~80 LOC)" — always prefer A. The 70-line delta costs
|
||||
@@ -144,7 +149,7 @@ think it's better, state what context you might be missing, and ask. Never act.
|
||||
|
||||
## How They Work Together
|
||||
|
||||
Boil the Lake says: **do the complete thing.**
|
||||
Boil the Ocean says: **do the complete thing.**
|
||||
Search Before Building says: **know what exists before you decide what to build.**
|
||||
|
||||
Together: search first, then build the complete version of the right thing.
|
||||
|
||||
@@ -204,7 +204,10 @@ Each skill feeds into the next. `/office-hours` writes a design doc that `/plan-
|
||||
| `/browse` | **QA Engineer** | Give the agent eyes. Real Chromium browser, real clicks, real screenshots. ~100ms per command. `/open-gstack-browser` launches GStack Browser with sidebar, anti-bot stealth, and auto model routing. |
|
||||
| `/setup-browser-cookies` | **Session Manager** | Import cookies from your real browser (Chrome, Arc, Brave, Edge) into the headless session. Test authenticated pages. |
|
||||
| `/autoplan` | **Review Pipeline** | One command, fully reviewed plan. Runs CEO → design → eng review automatically with encoded decision principles. Surfaces only taste decisions for your approval. |
|
||||
| `/spec` | **Spec Author** | Turn vague intent into a precise, executable spec in five phases (why, scope, technical with mandatory code-reading, draft, file). Codex quality gate before file (blocks below 7/10), fail-closed secret redaction, dedupe against existing issues, archive to `$GSTACK_STATE_ROOT/projects/$SLUG/specs/` for team-corpus recall. `--execute` spawns `claude -p` in a fresh worktree; `/ship` auto-closes the source issue on merge. Plan-mode aware. |
|
||||
| `/learn` | **Memory** | Manage what gstack learned across sessions. Review, search, prune, and export project-specific patterns, pitfalls, and preferences. Learnings compound across sessions so gstack gets smarter on your codebase over time. |
|
||||
| `/make-pdf` | **Publisher** | Markdown in, publication-quality document out. Mermaid and excalidraw fences render as vector diagrams, fully offline. Images scale to the page and never truncate; wide diagrams get their own landscape page. `--to html` emits one self-contained file, `--to docx` a Word doc. |
|
||||
| `/diagram` | **Diagram Maker** | English in, editable diagram out. Emits a triplet: mermaid source, `.excalidraw` you can open and edit on excalidraw.com (hand-drawn style), and rendered SVG/PNG. Zero network. Embed the source in markdown and `/make-pdf` renders it. |
|
||||
|
||||
### Which review should I use?
|
||||
|
||||
@@ -229,6 +232,8 @@ Each skill feeds into the next. `/office-hours` writes a design doc that `/plan-
|
||||
| `/setup-gbrain` | **GBrain Onboarding** — from zero to running gbrain in under 5 minutes. PGLite local, Supabase existing URL, or auto-provision a new Supabase project via Management API. MCP registration for Claude Code + per-repo trust triad (read-write/read-only/deny). [Full guide](USING_GBRAIN_WITH_GSTACK.md). |
|
||||
| `/sync-gbrain` | **Keep Brain Current** — re-index this repo's code into gbrain via `gbrain sources add` + `gbrain sync --strategy code`, refresh the `## GBrain Search Guidance` block in CLAUDE.md, and auto-remove guidance when the capability check fails. `--incremental` (default), `--full`, `--dry-run`. Idempotent; safe to re-run. |
|
||||
| `/gstack-upgrade` | **Self-Updater** — upgrade gstack to latest. Detects global vs vendored install, syncs both, shows what changed. |
|
||||
| `/ios-qa` | **iOS Live-Device QA (v1.43.0.0+)** — drive a real iPhone over USB CoreDevice via an embedded `StateServer` in the app. Read Swift source, codegen typed `@Observable` accessors, run the agent loop. Optional `--tailnet` flag exposes the device to OpenClaw or any HTTP-capable agent on your Tailscale tailnet so remote agents can run iOS QA without ever touching the hardware. Capability-tier allowlist (observe/interact/mutate/restore), per-device session lock, audit log. |
|
||||
| `/ios-fix`, `/ios-design-review`, `/ios-clean`, `/ios-sync` | iOS bug-fix loop, designer's-eye HIG audit, debug-bridge cleanup, and accessor resync. See `docs/skills.md`. End-to-end walkthrough: [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md). |
|
||||
|
||||
### New binaries (v0.19)
|
||||
|
||||
@@ -238,6 +243,8 @@ Beyond the slash-command skills, gstack ships standalone CLIs for workflows that
|
||||
|---------|-------------|
|
||||
| `gstack-model-benchmark` | **Cross-model benchmark** — run the same prompt through Claude, GPT (via Codex CLI), and Gemini; compare latency, tokens, cost, and (optionally) LLM-judge quality score. Auth detected per provider, unavailable providers skip cleanly. Output as table, JSON, or markdown. `--dry-run` validates flags + auth without spending API calls. |
|
||||
| `gstack-taste-update` | **Design taste learning** — writes approvals and rejections from `/design-shotgun` into a persistent per-project taste profile. Decays 5%/week. Feeds back into future variant generation so the system learns what you actually pick. |
|
||||
| `gstack-ios-qa-daemon` | **iOS QA daemon** — Mac-side broker between an agent and a connected iPhone over USB CoreDevice. Loopback by default; `--tailnet` opens a Tailscale-facing listener with identity-gated capability tiers. Single-instance via flock on `~/.gstack/ios-qa-daemon.pid`. See [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md). |
|
||||
| `gstack-ios-qa-mint` | **iOS allowlist manager** — owner-grant CLI for the tailnet allowlist. `grant`/`revoke`/`list` against `~/.gstack/ios-qa-allowlist.json` (mode 0600). Remote agents never auto-allowlist; this is the explicit-intent path. |
|
||||
|
||||
### Continuous checkpoint mode (opt-in, local by default)
|
||||
|
||||
@@ -388,13 +395,14 @@ I open sourced how I build software. You can fork it and make it your own.
|
||||
/setup-gbrain
|
||||
```
|
||||
|
||||
Three paths, pick one:
|
||||
Four paths, pick one:
|
||||
|
||||
- **Supabase, existing URL** — your cloud agent already provisioned a brain; paste the Session Pooler URL, now this laptop uses the same data.
|
||||
- **Supabase, auto-provision** — paste a Supabase Personal Access Token; the skill creates a new project, polls to healthy, fetches the pooler URL, hands it to `gbrain init`. ~90 seconds end-to-end.
|
||||
- **PGLite local** — zero accounts, zero network, ~30 seconds. Isolated brain on this Mac only. Great for try-first; migrate to Supabase later with `/setup-gbrain --switch`.
|
||||
- **Remote gbrain MCP** — your brain runs on another machine (Tailscale, ngrok, internal LAN) or a teammate's server; paste an MCP URL and bearer token. Optionally pair with a local PGLite for symbol-aware code search in split-engine mode. Best for cross-machine memory without standing up a local DB.
|
||||
|
||||
After init, the skill offers to register gbrain as an MCP server for Claude Code (`claude mcp add gbrain -- gbrain serve`) so `gbrain search`, `gbrain put_page`, etc. show up as first-class typed tools — not bash shell-outs.
|
||||
After init, the skill offers to register gbrain as an MCP server for Claude Code (`claude mcp add gbrain -- gbrain serve`) so `gbrain search`, `gbrain put`, etc. show up as first-class typed tools — not bash shell-outs.
|
||||
|
||||
**Keeping the brain current.** Run `/sync-gbrain` from any repo to re-index its code into gbrain (incremental by default, `--full` for a full reindex, `--dry-run` to preview). The skill registers the cwd as a federated source via `gbrain sources add`, runs `gbrain sync --strategy code`, and writes a `## GBrain Search Guidance` block to your project's CLAUDE.md so the agent prefers `gbrain search`/`code-def`/`code-refs` over Grep. The block is removed automatically if the capability check fails — no stale guidance pointing at tools that aren't installed.
|
||||
|
||||
@@ -412,6 +420,8 @@ The skill asks once per repo. The decision is sticky across worktrees and branch
|
||||
gstack-brain-init
|
||||
```
|
||||
|
||||
**Running gstack in Conductor?** Conductor explicitly strips `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` from every workspace's process env, so paid evals and gbrain embeddings won't work out of the box. Set `GSTACK_ANTHROPIC_API_KEY` and `GSTACK_OPENAI_API_KEY` in Conductor's workspace env config instead — gstack's TS entry points promote them to canonical names at runtime. Full details and the contributor checklist for adding the import to new entry points: [Conductor + GSTACK_* env vars](USING_GBRAIN_WITH_GSTACK.md#conductor--gstack_-env-vars).
|
||||
|
||||
**Full monty — every scenario, every flag, every bin helper, every troubleshooting step:** [USING_GBRAIN_WITH_GSTACK.md](USING_GBRAIN_WITH_GSTACK.md)
|
||||
|
||||
Other references: [docs/gbrain-sync.md](docs/gbrain-sync.md) (sync-specific guide) • [docs/gbrain-sync-errors.md](docs/gbrain-sync-errors.md) (error index)
|
||||
@@ -421,7 +431,8 @@ Other references: [docs/gbrain-sync.md](docs/gbrain-sync.md) (sync-specific guid
|
||||
| Doc | What it covers |
|
||||
|-----|---------------|
|
||||
| [Skill Deep Dives](docs/skills.md) | Philosophy, examples, and workflow for every skill (includes Greptile integration) |
|
||||
| [Builder Ethos](ETHOS.md) | Builder philosophy: Boil the Lake, Search Before Building, three layers of knowledge |
|
||||
| [Diagrams & Document Formats](docs/howto-diagrams-and-formats.md) | Mermaid/excalidraw fences in PDFs, image sizing and safety defaults, `--to html\|docx`, `/diagram` triplets |
|
||||
| [Builder Ethos](ETHOS.md) | Builder philosophy: Boil the Ocean, Search Before Building, three layers of knowledge |
|
||||
| [Using GBrain with GStack](USING_GBRAIN_WITH_GSTACK.md) | Every path, flag, bin helper, and troubleshooting step for `/setup-gbrain` |
|
||||
| [GBrain Sync](docs/gbrain-sync.md) | Cross-machine memory setup, privacy modes, troubleshooting |
|
||||
| [Architecture](ARCHITECTURE.md) | Design decisions and system internals |
|
||||
|
||||
@@ -2,11 +2,7 @@
|
||||
name: gstack
|
||||
preamble-tier: 1
|
||||
version: 1.1.0
|
||||
description: |
|
||||
Fast headless browser for QA testing and site dogfooding. Navigate pages, interact with
|
||||
elements, verify state, diff before/after, take annotated screenshots, test responsive
|
||||
layouts, forms, uploads, dialogs, and capture bug evidence. Use when asked to open or
|
||||
test a site, verify a deployment, dogfood a user flow, or file a bug with screenshots. (gstack)
|
||||
description: Fast headless browser for QA testing and site dogfooding. (gstack)
|
||||
allowed-tools:
|
||||
- Bash
|
||||
- Read
|
||||
@@ -21,6 +17,14 @@ triggers:
|
||||
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
|
||||
<!-- Regenerate: bun run gen:skill-docs -->
|
||||
|
||||
|
||||
## When to invoke this skill
|
||||
|
||||
Navigate pages, interact with
|
||||
elements, verify state, diff before/after, take annotated screenshots, test responsive
|
||||
layouts, forms, uploads, dialogs, and capture bug evidence. Use when asked to open or
|
||||
test a site, verify a deployment, dogfood a user flow, or file a bug with screenshots.
|
||||
|
||||
## Preamble (run first)
|
||||
|
||||
```bash
|
||||
@@ -41,6 +45,16 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
|
||||
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
|
||||
REPO_MODE=${REPO_MODE:-unknown}
|
||||
echo "REPO_MODE: $REPO_MODE"
|
||||
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
|
||||
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
|
||||
echo "SESSION_KIND: $_SESSION_KIND"
|
||||
# Conductor host: AskUserQuestion is unreliable here (native disabled, MCP
|
||||
# variant flaky), so skills render decisions as prose instead of calling the
|
||||
# tool. Gated on !headless so an eval/CI run INSIDE Conductor (GSTACK_HEADLESS)
|
||||
# still BLOCKs rather than rendering prose to nobody.
|
||||
if [ "$_SESSION_KIND" != "headless" ] && { [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ]; }; then
|
||||
echo "CONDUCTOR_SESSION: true"
|
||||
fi
|
||||
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
|
||||
echo "LAKE_INTRO: $_LAKE_SEEN"
|
||||
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
|
||||
@@ -56,7 +70,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
|
||||
echo "QUESTION_TUNING: $_QUESTION_TUNING"
|
||||
mkdir -p ~/.gstack/analytics
|
||||
if [ "$_TEL" != "off" ]; then
|
||||
echo '{"skill":"gstack","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
echo '{"skill":"gstack","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
fi
|
||||
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
|
||||
if [ -f "$_PF" ]; then
|
||||
@@ -98,6 +112,19 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
|
||||
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
|
||||
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
|
||||
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
|
||||
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
|
||||
# Claude Code exposes plan mode via system reminders; we detect best-effort
|
||||
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
|
||||
# fall back to "inactive". Codex hosts and Claude execution mode both end up
|
||||
# inactive, which is the safe default (defaults to file+execute pipeline).
|
||||
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
else
|
||||
export GSTACK_PLAN_MODE="inactive"
|
||||
fi
|
||||
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
|
||||
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
|
||||
```
|
||||
|
||||
@@ -107,7 +134,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
|
||||
|
||||
## Skill Invocation During Plan Mode
|
||||
|
||||
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
|
||||
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
|
||||
|
||||
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
|
||||
|
||||
@@ -142,7 +169,7 @@ touch ~/.gstack/.writing-style-prompted
|
||||
|
||||
Skip if `WRITING_STYLE_PENDING` is `no`.
|
||||
|
||||
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Lake** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
|
||||
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Ocean** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
|
||||
|
||||
```bash
|
||||
open https://garryslist.org/posts/boil-the-ocean
|
||||
@@ -153,7 +180,7 @@ Only run `open` if yes. Always run `touch`.
|
||||
|
||||
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
|
||||
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
|
||||
|
||||
Options:
|
||||
- A) Help gstack get better! (recommended)
|
||||
@@ -229,6 +256,7 @@ Key routing rules:
|
||||
- Ship/deploy/PR → invoke /ship or /land-and-deploy
|
||||
- Save progress → invoke /context-save
|
||||
- Resume context → invoke /context-restore
|
||||
- Author a backlog-ready spec/issue → invoke /spec
|
||||
```
|
||||
|
||||
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
|
||||
@@ -473,9 +501,7 @@ Replace `SKILL_NAME`, `OUTCOME`, and `USED_BROWSE` before running.
|
||||
|
||||
## Plan Status Footer
|
||||
|
||||
In plan mode before ExitPlanMode: if the plan file lacks `## GSTACK REVIEW REPORT`, run `~/.claude/skills/gstack/bin/gstack-review-read` and append the standard runs/status/findings table. With `NO_REVIEWS` or empty, append a 5-row placeholder with verdict "NO REVIEWS YET — run `/autoplan`". If a richer report exists, skip.
|
||||
|
||||
PLAN MODE EXCEPTION — always allowed (it's the plan file).
|
||||
Skills that run plan reviews (`/plan-*-review`, `/codex review`) include the EXIT PLAN MODE GATE blocking checklist at the end of the skill, which verifies the plan file ends with `## GSTACK REVIEW REPORT` before ExitPlanMode is called. Skills that don't run plan reviews (operational skills like `/ship`, `/qa`, `/review`) typically don't operate in plan mode and have no review report to verify; this footer is a no-op for them. Writing the plan file is the one edit allowed in plan mode.
|
||||
|
||||
If `PROACTIVE` is `false`: do NOT proactively invoke or suggest other gstack skills during
|
||||
this session. Only run skills the user explicitly invokes. This preference persists across
|
||||
@@ -488,6 +514,7 @@ quality gates that produce better results than answering inline.
|
||||
|
||||
**Routing rules — when you see these patterns, INVOKE the skill via the Skill tool:**
|
||||
- User describes a new idea, asks "is this worth building", brainstorms, pitches a concept → invoke `/office-hours`
|
||||
- User asks to spec something out, file an issue, write up a ticket, "turn this into a GitHub issue", "backlog item" → invoke `/spec`
|
||||
- User asks about strategy, scope, ambition, "think bigger", "what should we build" → invoke `/plan-ceo-review`
|
||||
- User asks to review architecture, lock in the plan, "does this design make sense" → invoke `/plan-eng-review`
|
||||
- User asks about design system, brand, visual identity, "how should this look" → invoke `/design-consultation`
|
||||
@@ -897,10 +924,10 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
|
||||
| `cookies` | All cookies as JSON |
|
||||
| `css <sel> <prop>` | Computed CSS value |
|
||||
| `dialog [--clear]` | Dialog messages |
|
||||
| `eval <file>` | Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. |
|
||||
| `eval <file> [--out <file>] [--raw]` | Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. With --out <file>, the result is written to disk (base64 data URL decoded to bytes unless --raw); --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel). |
|
||||
| `inspect [selector] [--all] [--history]` | Deep CSS inspection via CDP — full rule cascade, box model, computed styles |
|
||||
| `is <prop> <sel|@ref>` | State check on element. Valid <prop> values: visible, hidden, enabled, disabled, checked, editable, focused (case-sensitive). <sel> accepts a CSS selector OR an @ref token from a prior snapshot (e.g. @e3, @c1) — refs are interchangeable with selectors anywhere a selector is expected. |
|
||||
| `js <expr>` | Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. |
|
||||
| `js <expr> [--out <file>] [--raw]` | Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. With --out <file>, the result is written to disk instead of returned (a base64 data URL is decoded to raw bytes unless --raw is given) — ideal for rasterizing local renders to PNG without serializing megabytes back through the CLI. --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel). |
|
||||
| `network [--clear]` | Network requests |
|
||||
| `perf` | Page load timings |
|
||||
| `storage | storage set <key> <value>` | Read both localStorage and sessionStorage as JSON. With "set <key> <value>", write to localStorage only (sessionStorage is read-only via this command — set it with `js sessionStorage.setItem(...)`). |
|
||||
@@ -946,6 +973,7 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
|
||||
| `disconnect` | Disconnect headed browser, return to headless mode |
|
||||
| `focus [@ref]` | Bring headed browser window to foreground (macOS) |
|
||||
| `handoff [message]` | Open visible Chrome at current page for user takeover |
|
||||
| `memory [--json]` | Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. JSON output with --json. |
|
||||
| `restart` | Restart server |
|
||||
| `resume` | Re-snapshot after user takeover, return control to AI |
|
||||
| `state save|load <name>` | Save/load browser state (cookies + URLs) |
|
||||
|
||||
@@ -32,6 +32,7 @@ quality gates that produce better results than answering inline.
|
||||
|
||||
**Routing rules — when you see these patterns, INVOKE the skill via the Skill tool:**
|
||||
- User describes a new idea, asks "is this worth building", brainstorms, pitches a concept → invoke `/office-hours`
|
||||
- User asks to spec something out, file an issue, write up a ticket, "turn this into a GitHub issue", "backlog item" → invoke `/spec`
|
||||
- User asks about strategy, scope, ambition, "think bigger", "what should we build" → invoke `/plan-ceo-review`
|
||||
- User asks to review architecture, lock in the plan, "does this design make sense" → invoke `/plan-eng-review`
|
||||
- User asks about design system, brand, visual identity, "how should this look" → invoke `/design-consultation`
|
||||
|
||||
@@ -1,5 +1,359 @@
|
||||
# TODOS
|
||||
|
||||
## NEXT PRIORITY
|
||||
|
||||
### P1: #1882 — portable skill-install prefix (non-`gstack` install dirs break silently)
|
||||
|
||||
**What:** Every generated SKILL.md hardcodes the literal `~/.claude/skills/gstack/...`
|
||||
for its `bin/`/asset calls (the per-invocation telemetry/config preamble plus ~9
|
||||
resolvers). `setup` wires the top-level skill symlinks for any directory name, so
|
||||
installing at `~/.claude/skills/<other>` leaves every internal `bin` reference
|
||||
pointing at a non-existent `~/.claude/skills/gstack/` path — failing **silently, at
|
||||
skill-invocation time**. Make the emitted references portable: resolve the install
|
||||
root at runtime (the preamble already defines `GSTACK_ROOT`/`GSTACK_BIN` in
|
||||
`scripts/resolvers/preamble/generate-preamble-bash.ts` but the literals don't use
|
||||
them) and emit `$GSTACK_BIN`-relative paths instead of the hardcoded prefix.
|
||||
|
||||
**Why:** Filed as #1882. Split out of the June 2026 fix wave (decision A) once
|
||||
implementation showed it is a host-config/design change, not a fix-wave patch. The
|
||||
urgent half — the guard/freeze/careful frontmatter hooks broken on CC 2.1.162 — was
|
||||
already fixed in that wave (#1871) with a literal `$HOME`-anchored path, because
|
||||
frontmatter hooks run before any runtime variable exists and cannot use `$GSTACK_BIN`.
|
||||
So #1882 is now purely the body-preamble portability work.
|
||||
|
||||
**Pros:** Unblocks installs at any directory name; removes a whole class of silent
|
||||
invocation-time failures.
|
||||
**Cons:** Touches the most load-bearing bash in the repo (every skill's preamble);
|
||||
a silent mistake breaks all 52 skills. High blast radius — needs its own focused PR.
|
||||
|
||||
**Context / where to start:**
|
||||
- Rewire `ctx.paths.binDir` (and browse/design dir paths) + the ~9 resolvers that
|
||||
emit the literal (`testing.ts`, `review.ts`, `design.ts`, `browse.ts`,
|
||||
`redact-doc.ts`, `tasks-section.ts`, `preamble/generate-*.ts`) to use the
|
||||
preamble-defined `$GSTACK_ROOT`/`$GSTACK_BIN`.
|
||||
- Ensure `GSTACK_ROOT`/`GSTACK_BIN` are defined before first use in EVERY skill's
|
||||
preamble (verify the telemetry preamble's first bin call is after the definition).
|
||||
- **Test conflict (verified):** `test/gen-skill-docs.test.ts:1942` and the sibling
|
||||
ship assertion currently *assert* generated Claude output `.toContain('~/.claude/skills/gstack')`
|
||||
as a guardrail that Codex-host paths don't leak. These must be rewritten to match
|
||||
the new portable scheme.
|
||||
- Regenerate all 52 SKILL.md (`bun run scripts/gen-skill-docs.ts --host all`); never
|
||||
hand-edit generated files. Bisect: resolver/host-config change commit, then the
|
||||
52-file regen commit.
|
||||
- Smoke-test a skill invocation from a non-`gstack` install dir to prove the fix.
|
||||
- Sibling of #349 (the `$CLAUDE_CONFIG_DIR` / `~/.claude` path issue).
|
||||
|
||||
## Test infrastructure
|
||||
|
||||
### ✅ DONE (v1.53.1.0): Rebaseline parity-suite (v1.44.1 → v1.53.0.0)
|
||||
|
||||
**What:** `test/parity-suite.test.ts` checked every skill's SKILL.md size against
|
||||
the frozen `test/fixtures/parity-baseline-v1.44.1.json`. Five planning skills had
|
||||
crept past the 1.05x ceiling: `plan-ceo-review` (1.052), `plan-eng-review` (1.062),
|
||||
`plan-design-review` (1.068), `investigate` (1.053), `office-hours` (1.065) — growth
|
||||
from the brain-aware-planning releases (v1.49–v1.52) plus the v1.53 redaction guard.
|
||||
|
||||
**Resolved:** Captured a fresh baseline at HEAD via
|
||||
`bun run scripts/capture-baseline.ts --tag v1.53.0.0` and re-pointed the test at
|
||||
`test/fixtures/parity-baseline-v1.53.0.0.json`. The per-skill 1.05 ratio is kept, so
|
||||
future bloat is still caught — only the stale anchor moved. Mirrors the earlier
|
||||
`skill-size-budget` rebase (v1.44.1 → v1.47.0.0). Historical v1.44.1 / v1.46.0.0 /
|
||||
v1.47.0.0 baselines retained in `test/fixtures/` for the v1→v2 audit trail. The
|
||||
captured skill bytes match `origin/main` exactly (the rebasing branch left every
|
||||
SKILL.md untouched). `bun test` is green again.
|
||||
|
||||
## Token-reduction follow-ups (Phase B, filed via /plan-eng-review on the plan-ceo-review carve)
|
||||
|
||||
### P3: Carve the always-loaded `{{PREAMBLE}}` reference blocks into an on-demand doc
|
||||
|
||||
**What:** The per-skill section carves (`/ship` v1.54, `/plan-ceo-review` v1.56) yield
|
||||
real but bounded wins (-42% to -59% on the carved skill) because the shared
|
||||
`{{PREAMBLE}}` (~40-50KB on every tier-3/4 skill) is the dominant always-loaded cost
|
||||
and stays inline. Move the rarely-needed preamble REFERENCE blocks (the AskUserQuestion
|
||||
split-rules and the CJK / lone-surrogate escaping reference) into an on-demand
|
||||
section-style doc the agent reads only when it hits those edge cases, leaving the hot
|
||||
path (voice, completeness principle, recommendation format) inline.
|
||||
|
||||
**Why:** Highest-ROI remaining token target. One preamble carve helps EVERY tier-≥2
|
||||
skill at once, not one skill per PR. The eng-review on the plan-ceo carve flagged that
|
||||
per-skill carves stay modest precisely because the preamble dominates the always-loaded
|
||||
surface.
|
||||
|
||||
**Pros:** A single change reduces always-loaded cost across the whole skill pack.
|
||||
**Cons:** The preamble is load-bearing and shared; a botched carve regresses every skill.
|
||||
Needs the same union-parity + per-push freshness guards the section carves use, applied
|
||||
corpus-wide.
|
||||
|
||||
**Context:** Builds on the v2 section pipeline (`scripts/resolvers/sections.ts`,
|
||||
`{{SECTION:id}}` / `{{SECTION_INDEX}}`). The preamble source is
|
||||
`scripts/resolvers/preamble.ts`. Measure which sub-blocks are cold (escaping reference,
|
||||
split-rules) vs hot (voice, recommendation format) before cutting. Validate on one skill,
|
||||
then roll corpus-wide.
|
||||
|
||||
**Effort estimate:** L (human team) → M (CC+gstack)
|
||||
**Priority:** P3
|
||||
**Depends on / blocked by:** The section pipeline (shipped v1.54). No hard blocker.
|
||||
|
||||
## gbrowser memory follow-ups (filed via /plan-eng-review + /codex on the v1.49 leak-fix PR)
|
||||
|
||||
These four items came out of the memory-leak investigation that shipped
|
||||
the `$B memory` diagnostic + the four leak fixes. They were
|
||||
deliberately deferred from that PR (already 14 commits / ~12 files);
|
||||
each stands alone and any one could ship independently.
|
||||
|
||||
### P2: MV3 extension service worker memory profile
|
||||
|
||||
**What:** The `/memory` endpoint snapshot enumerates pages but does
|
||||
not enumerate the gstack baked-in extension's service-worker target.
|
||||
A long-running MV3 service worker can leak through retained DOM
|
||||
snapshots, message ports that never close, alarms that re-arm, and
|
||||
caches that grow without bound. The diagnostic should call
|
||||
`Target.getTargets` with a filter for `service_worker` and include
|
||||
each one in `tabs[]` (or a sibling `serviceWorkers[]` array) with the
|
||||
same `Performance.getMetrics` data.
|
||||
|
||||
**Why:** Codex's outside-voice review on the eng-review surfaced this
|
||||
class of leak (the extension is part of the gbrowser process tree but
|
||||
invisible to today's snapshot). Until we surface it, a SW leak shows
|
||||
up only in the parent process RSS with no per-target attribution.
|
||||
|
||||
**Pros:** Closes the per-target attribution gap for the
|
||||
single-most-likely future leak source (our own extension).
|
||||
**Cons:** Extension SW lifecycle is asymmetric vs page lifecycle;
|
||||
auto-attach + filter is one more piece of CDP plumbing.
|
||||
|
||||
**Context:** Codex finding #4 on the eng-review outside voice. Not
|
||||
in scope of the v1.49 PR; deliberately deferred to keep the PR to
|
||||
the four highest-confidence leak fixes.
|
||||
|
||||
**Priority:** P2. **Effort:** M.
|
||||
|
||||
---
|
||||
|
||||
### P2: Native + GPU memory breakdown in `$B memory`
|
||||
|
||||
**What:** `$B memory` shows Bun RSS + per-tab JS heap + Chromium
|
||||
process tree (PIDs + types + CPU time) but the per-process RSS is
|
||||
absent — `SystemInfo.getProcessInfo` doesn't expose RSS and the eng
|
||||
review (D2 USE_CDP) explicitly chose CDP over shelling to `ps`. The
|
||||
honest next step is to surface what CDP DOES give for the other
|
||||
memory categories: `Memory.getDOMCounters` per target (node + listener
|
||||
counts), `SystemInfo.getInfo` for GPU memory, `Memory.getAllTimeSamplingProfile`
|
||||
for a sampled native estimate.
|
||||
|
||||
**Why:** Codex's outside-voice review flagged that
|
||||
`Performance.getMetrics` misses native memory, GPU memory, video
|
||||
buffers, Skia, network cache, extension process RSS, and
|
||||
browser-process RSS — all the categories where a 160 GB leak would
|
||||
actually live. A diagnostic that misses the categories where the
|
||||
leak class lives undersells itself.
|
||||
|
||||
**Pros:** Per-process category breakdown closes the gap between
|
||||
"Activity Monitor says 160 GB" and what the diagnostic shows.
|
||||
**Cons:** Each CDP method has its own quirks; this is a real
|
||||
implementation pass, not a one-line addition.
|
||||
|
||||
**Context:** Codex finding #5 on the eng-review outside voice. Not
|
||||
in scope of the v1.49 PR; deliberately deferred.
|
||||
|
||||
**Priority:** P2. **Effort:** M.
|
||||
|
||||
---
|
||||
|
||||
### P3: Single-context CDP listener for Network.loadingFinished
|
||||
|
||||
**What:** `wirePageEvents` attaches a `page.on('requestfinished')`
|
||||
listener PER PAGE. The D10 fix removed the body-materialization leak
|
||||
inside that listener but kept the per-page listener architecture
|
||||
(7 listeners attached per tab — close, framenavigated, dialog,
|
||||
console, request, response, requestfinished). The stretch goal from
|
||||
D10 was to replace the per-page `requestfinished` listener with a
|
||||
single context-level CDP listener via
|
||||
`Target.setAutoAttach({autoAttach: true, waitForDebuggerOnStart: false,
|
||||
flatten: true})` and a browser-wide `Network.loadingFinished` event
|
||||
handler.
|
||||
|
||||
**Why:** Going from N to 1 listener for the request-size capture is
|
||||
structurally the right architecture and removes one piece of per-tab
|
||||
memory pressure. The body-materialization fix already addressed the
|
||||
acute leak; this is the architectural cleanup that prevents similar
|
||||
leaks in the same class.
|
||||
|
||||
**Pros:** One listener per browser instead of one per tab.
|
||||
**Cons:** `Target.setAutoAttach` plumbing is more code than the
|
||||
straight per-page listener; the marginal memory win is small on top
|
||||
of the body-fetch fix that already landed.
|
||||
|
||||
**Context:** D10 stretch goal on the eng-review. The minimal-risk
|
||||
fix shipped in v1.49 (replaces `await res.body()` with
|
||||
`await req.sizes()`, preserving the per-page listener); this is the
|
||||
architectural follow-up.
|
||||
|
||||
**Priority:** P3. **Effort:** M-L.
|
||||
|
||||
---
|
||||
|
||||
### P3: Real-Chromium peak-RSS reproducer (periodic tier)
|
||||
|
||||
**What:** The gate-tier reproducer
|
||||
(`browse/test/memory-leak-reproducer.test.ts`) pins the invariant
|
||||
that `res.body()` is never called during a burst of
|
||||
`requestfinished` events. It uses a fake page; it does NOT spin up a
|
||||
real Chromium nor measure peak Bun RSS during a real concurrent fetch
|
||||
burst. A periodic-tier follow-up should: spin up a real headless
|
||||
Chromium, navigate to a fixture page that concurrently fetches 500
|
||||
mixed responses (small JSON, 100 KB images, 10 MB chunked,
|
||||
gzip-compressed 2 MB), sample `process.memoryUsage().heapUsed` every
|
||||
100 ms during the burst, assert `peak_heap < 200 MB above baseline`
|
||||
AND `post-gc_heap < 30 MB above baseline`. Also include a single-tab
|
||||
WebGL canvas variant that grows to >4 GB and asserts the per-tab RSS
|
||||
toast fires.
|
||||
|
||||
**Why:** Codex flagged that the leak's real failure mode is transient
|
||||
amplification under concurrent burst, not retained leak — a steady-state
|
||||
heap test misses it. The fake-page gate-tier test catches the
|
||||
listener-architecture regression; the periodic real-browser test
|
||||
catches the actual peak-RSS class.
|
||||
|
||||
**Pros:** Closes the "did we actually demonstrate the OOM is fixed"
|
||||
question with hard numbers. Feeds the ANGLE_B_NUMBERS CHANGELOG
|
||||
release-summary table.
|
||||
**Cons:** Periodic tier costs minutes of CI time and money per run;
|
||||
real-browser memory tests are inherently flaky.
|
||||
|
||||
**Context:** Codex outside-voice finding on the eng-review; D7
|
||||
ANGLE_B_NUMBERS CHANGELOG framing needs this reproducer's numbers
|
||||
before /ship time.
|
||||
|
||||
**Priority:** P3. **Effort:** M.
|
||||
|
||||
---
|
||||
|
||||
## design daemon: follow-ups (filed v1.45.0.0 via /ship review army)
|
||||
|
||||
### ✅ DONE (v1.45.0.0): Tighten daemon test coverage
|
||||
|
||||
**Resolved in commit `6b037c55` (same PR):** All 5 test gaps filled before
|
||||
landing. Per-file totals after: serve 16, daemon 34, daemon-discovery 23,
|
||||
feedback-roundtrip-daemon 4 = 77 (+10 from initial ship). Specifically:
|
||||
- Idle-shutdown actually fires (spawn-based, daemon process observed exiting,
|
||||
state file removed).
|
||||
- Bare GET polling doesn't reset idle (hammers `/api/progress` in background,
|
||||
daemon still idles out).
|
||||
- Idle-with-active-boards extends, then force-shuts after MAX_EXTENSIONS
|
||||
(with `DESIGN_DAEMON_EXTENSION_MS=1500` + `MAX_EXTENSIONS=2`).
|
||||
- Concurrent `ensureDaemon()` race converges on one daemon (lock wins).
|
||||
- Stale-lock reclaim (dead PID succeeds, alive unrelated PID refuses).
|
||||
- Malformed-JSON + non-object + array-body + missing-html negatives for
|
||||
`POST /api/boards` and `POST /boards/<id>/api/reload`.
|
||||
|
||||
### P3: Minor maintainability nits from /ship review
|
||||
|
||||
- `design/src/cli.ts` and `design/src/serve.ts` both have a small `openBrowser`
|
||||
helper with identical darwin/linux/else branches. Extract a shared
|
||||
`design/src/open-browser.ts`.
|
||||
- `design/src/daemon-client.ts:320` (`AbortSignal.timeout(2000)`) and `:357`
|
||||
(`delay(50)`) use bare numeric literals while sibling timeouts are named
|
||||
constants. Promote to `SHUTDOWN_POST_TIMEOUT_MS` and `ALIVE_POLL_INTERVAL_MS`.
|
||||
- `design/src/daemon-state.ts:21` `serverPath` field is written
|
||||
(`daemon.ts:541`) but never read by production code. Either remove or
|
||||
document the forensic intent.
|
||||
|
||||
### P3: Daemon scope deferred from v1.45.0.0 plan
|
||||
|
||||
Originally listed in the plan's "TODOs surfaced for later" section:
|
||||
|
||||
- Per-daemon scoped auth tokens (only relevant once a tunnel/share use case appears).
|
||||
- Optional persistent board history on disk in
|
||||
`~/.gstack/projects/$SLUG/designs/history/` so submitted boards survive
|
||||
daemon restarts.
|
||||
- Windows spawn branch lifted from browse (V1 daemon is macOS + Linux;
|
||||
Windows users fall back to legacy `--no-daemon` per-process server).
|
||||
- `$D board list` / `$D board stop <id>` per-board ops CLI (V1 has only
|
||||
`$D daemon status` / `stop`).
|
||||
- Cross-worktree daemon attach (conductor sibling worktrees of the same
|
||||
repo currently each spawn their own daemon — matches browse; revisit
|
||||
if it causes friction).
|
||||
|
||||
---
|
||||
|
||||
## browse server: terminal-agent teardown follow-ups (filed v1.41 via /plan-eng-review)
|
||||
|
||||
### ✅ DONE (v1.44.0.0): Identity-based terminal-agent kill (replace pkill regex with PID)
|
||||
|
||||
**Resolved:** Bundled into the v1.44.0.0 long-lived-sidebar PR as Commit 0.
|
||||
`browse/src/terminal-agent-control.ts` is the new home for `readAgentRecord`,
|
||||
`writeAgentRecord`, `clearAgentRecord`, and `killAgentByRecord`. The agent
|
||||
writes `<stateDir>/terminal-agent-pid` (JSON `{pid, gen, startedAt}`) at boot
|
||||
and clears it on SIGTERM/SIGINT. `cli.ts` and `server.ts` both route through
|
||||
`killAgentByRecord` instead of `pkill -f terminal-agent\.ts`. The new
|
||||
`browse/test/terminal-agent-pid-identity.test.ts` is the static-grep tripwire
|
||||
that fails CI if `pkill ... terminal-agent` or `spawnSync('pkill', ...)`
|
||||
reappears in any source file.
|
||||
|
||||
---
|
||||
|
||||
### P3: shutdown() reads module-level `config`, not `cfg.config` (composition gap)
|
||||
|
||||
**What:** `browse/src/server.ts:shutdown()` reads `path.dirname(config.stateFile)`
|
||||
where `config` is the module-level value resolved at import time, not the
|
||||
`cfg.config` passed into `buildFetchHandler`. Same gap applies to
|
||||
`cleanSingletonLocks(resolveChromiumProfile())` at server.ts:1298 — should
|
||||
read `cfg.chromiumProfile`.
|
||||
|
||||
**Why:** Embedders today happen to share state-dir resolution with the CLI
|
||||
(both go through `resolveConfig()` against the same env), so this doesn't
|
||||
bite. But if an embedder ever passes a divergent `cfg.config` (e.g., a test
|
||||
harness pointing at a temp dir), shutdown will operate on the wrong paths.
|
||||
The `ownsTerminalAgent` flag exposes the problem without fixing it.
|
||||
|
||||
**Pros:** Closes the embedder-composition story properly. Pairs with
|
||||
`cfg.chromiumProfile` to give a single coherent "this factory teardown
|
||||
respects cfg" contract.
|
||||
|
||||
**Cons:** Pre-existing — not a regression. Two call sites today (1285 for
|
||||
terminal files, 1298 for chromium locks). Threading `cfg.config` and
|
||||
`cfg.chromiumProfile` into the right closures is straightforward but
|
||||
broader than the v1.41 fix.
|
||||
|
||||
**Context:** Flagged by both Codex and Claude subagent in the /plan-eng-review
|
||||
dual voices. Documented as out-of-scope in the v1.41 plan; same shape as the
|
||||
`chromiumProfile` PR-body note to the gbrowser team.
|
||||
|
||||
**Depends on:** None.
|
||||
|
||||
---
|
||||
|
||||
### P3: Ownership-object refactor if a 4th caller-owned teardown gate appears
|
||||
|
||||
**What:** Today `ServerConfig` has three caller-owned teardown gates:
|
||||
`xvfb?` (presence ⇒ don't close), `proxyBridge?` (same), and now
|
||||
`ownsTerminalAgent` (explicit boolean). If a 4th gate appears, collapse to
|
||||
`cfg.callerOwns?: Set<'terminalAgent' | 'xvfb' | 'proxyBridge' | ...>` or
|
||||
similar.
|
||||
|
||||
**Why:** Three independent flags is below the refactor threshold — each
|
||||
field has clear, distinct semantics and the JSDoc voice is consistent. A
|
||||
fourth tips the cost balance: the per-field surface gets noisy, and
|
||||
"what does this factory own?" becomes a question you have to ask of three
|
||||
or four scattered fields instead of one explicit set.
|
||||
|
||||
**Pros:** Single source of truth for "what gstack tears down". Trivial
|
||||
extension surface for future caller-owned resources. Easier to assert in
|
||||
tests ("the set should contain X, not Y").
|
||||
|
||||
**Cons:** Premature today. The polarity-inversion note in the
|
||||
`ownsTerminalAgent` JSDoc only hurts a little — it's one anomaly, not a
|
||||
pattern. Refactoring now to an ownership object would touch every embedder.
|
||||
|
||||
**Context:** Recommended by Claude subagent during /plan-ceo-review dual
|
||||
voice (autoplan). Trigger: a 4th caller-owned teardown gate in this same
|
||||
`ServerConfig` shape.
|
||||
|
||||
**Depends on:** A 4th gate to motivate the refactor.
|
||||
|
||||
---
|
||||
|
||||
## /sync-gbrain memory stage perf follow-up
|
||||
|
||||
### P2: Investigate `gbrain import` perf on large staging dirs
|
||||
@@ -457,7 +811,24 @@ reads it yet.
|
||||
|
||||
**Effort:** L (human: ~1 week / CC: ~4h)
|
||||
**Priority:** P0
|
||||
**Depends on:** 2+ weeks of v1 dogfood, profile diversity check passing.
|
||||
**Depends on:** **90+ days of v1 dogfood stable across 3+ skills** (per
|
||||
`docs/designs/PLAN_TUNING_V0.md` §"Deferred to v2" E1 acceptance criteria).
|
||||
Distinct from the lighter-weight diversity-display gate
|
||||
(`sample_size >= 20 AND skills_covered >= 3 AND question_ids_covered >= 8
|
||||
AND days_span >= 7`) used in /plan-tune to render the inferred column —
|
||||
display is a UI affordance, promotion to E1 needs a much higher bar
|
||||
because behavioral adaptation is consequential and hard to revert. Prior
|
||||
versions of this card cited "2+ weeks" which conflicted with V0 — V0 wins.
|
||||
|
||||
**Substrate risk (Codex outside-voice, Phase A review 2026-05-26):** Generated
|
||||
skill prose is agent-compliance-based. Tests can verify templates contain the
|
||||
right reads of `~/.gstack/developer-profile.json` and the right decision
|
||||
points, but tests cannot prove agents obey them at runtime. E1 ships
|
||||
adaptations as **advisory annotations on AskUserQuestion recommendations**
|
||||
("Recommended via your profile: <choice>") until there's a hard runtime
|
||||
execution path. Do NOT gate any AUTO_DECIDE on inferred profile alone in v1
|
||||
of E1; explicit per-question preferences remain the only AUTO_DECIDE
|
||||
source.
|
||||
|
||||
### E3 — `/plan-tune narrative` + `/plan-tune vibe`
|
||||
|
||||
@@ -1643,6 +2014,49 @@ Shipped in v0.6.5. TemplateContext in gen-skill-docs.ts bakes skill name into pr
|
||||
**Priority:** P2
|
||||
**Depends on:** CDP patches proving the value of anti-bot stealth first
|
||||
|
||||
## /spec follow-ups (deferred from v1.47.0.0 via /plan-ceo-review SCOPE EXPANSION)
|
||||
|
||||
### P2: `/spec --epic` mode (parent issue + child issues + dependency graph)
|
||||
|
||||
**Priority:** P2
|
||||
|
||||
**What:** Add `--epic` flag that produces an Epic issue (parent) plus N child issues with explicit dependency graph and topological order. Emits multiple `gh issue create` calls with parent linkage in child bodies.
|
||||
|
||||
**Why:** Multi-week initiatives often span 3-5 specs that share context but ship sequentially. Today `/spec --epic` would let users author the full initiative in one session and file all linked issues atomically. The Epic template already exists in `spec/SKILL.md.tmpl` (carried over from PR #1698); only the flag routing + multi-issue `gh` orchestration is missing.
|
||||
|
||||
**Pros:**
|
||||
- Closes the multi-issue workflow gap that `/spec` v1 doesn't cover.
|
||||
- Parent + child linkage means project boards show the full initiative at-a-glance.
|
||||
- Composes cleanly with existing `--execute` (spawn an agent on the parent epic; agent files children as it works).
|
||||
|
||||
**Cons:**
|
||||
- More gh API surface (one create per child, parent-link edit pass).
|
||||
- Dependency-graph rendering in markdown is fiddly across GitHub vs GitLab renderers.
|
||||
|
||||
**Context:** Considered in `/plan-ceo-review` SCOPE EXPANSION (D5), deferred 2026-05-25 in favor of shipping the 5 critical-path expansions (--execute, --dedupe, archive, quality gate, --audit). Re-evaluate once v1.47 ships and we see how often users hit "this should be 3 issues" in real /spec sessions.
|
||||
|
||||
**Depends on:** v1.47.0.0 `/spec` lands first; need real usage data to calibrate the multi-issue surface.
|
||||
|
||||
### P3: `/spec --dedupe` semantic matching (LLM-based) for v1.1
|
||||
|
||||
**Priority:** P3
|
||||
|
||||
**What:** Upgrade `--dedupe`'s string match against `gh issue list --search` to LLM-based semantic similarity. Today's v1 picks string overlap on title keywords; semantic match would catch "the sidebar terminal flakes on reload" matching an existing issue titled "PTY reconnect fails after extension restart" where keyword overlap is zero.
|
||||
|
||||
**Why:** String match has high precision but low recall — it misses near-duplicates with different vocabulary. LLM semantic match catches more dupes but costs ~$0.01-0.05 per spec dispatch and adds 5-10s latency.
|
||||
|
||||
**Pros:**
|
||||
- Catches dupes string match misses.
|
||||
- One more reason `/spec` is more useful than freehand authoring.
|
||||
|
||||
**Cons:**
|
||||
- Paid + slower. Most v1 users probably don't hit enough false-negatives to justify the cost.
|
||||
- Adds another LLM-judged decision to a skill that already has the quality gate.
|
||||
|
||||
**Context:** Considered in `/plan-ceo-review` build-time decisions; chose string match for v1 to keep the dedupe path free + fast. Revisit if v1 produces a meaningful false-negative rate in real use.
|
||||
|
||||
**Depends on:** v1.47.0.0 ships; gather real false-negative data from the v1 string matcher.
|
||||
|
||||
## Completed
|
||||
|
||||
### Slim preamble + real-PTY plan-mode E2E harness (v1.13.1.0)
|
||||
@@ -1750,3 +2164,254 @@ Shipped in v0.6.5. TemplateContext in gen-skill-docs.ts bakes skill name into pr
|
||||
### Auto-upgrade mode + smart update check
|
||||
- Config CLI (`bin/gstack-config`), auto-upgrade via `~/.gstack/config.yaml`, 12h cache TTL, exponential snooze backoff (24h→48h→1wk), "never ask again" option, vendored copy sync on upgrade
|
||||
**Completed:** v0.3.8
|
||||
|
||||
---
|
||||
|
||||
## Brain-aware planning follow-ups (filed v1.48.0.0 via /plan-ceo-review + /plan-eng-review)
|
||||
|
||||
These are the deferred cherry-picks (E2/E3/E4) from the v1.48 brain-aware
|
||||
planning plan at `~/.claude/plans/hm-interesting-well-why-dapper-eagle.md`.
|
||||
The foundation (Phase 0 entity model + Phase 0.5 cache + Phase 1 preflight
|
||||
+ Phase 1.5 trust policy + Phase 2 write-back scaffolding) ships in
|
||||
v1.48.0.0. These follow-ups extend it.
|
||||
|
||||
### P2: /gstack-reflect nightly synthesis skill (E2)
|
||||
|
||||
**What:** Scheduled skill that reads weekly `gstack/skill-run` + takes +
|
||||
`get_recent_salience` and synthesizes a `gstack/insight` page surfaced at
|
||||
next skill preflight.
|
||||
|
||||
**Why:** Cross-time pattern detection is the compounding move. "You ran 4
|
||||
plan-ceo on infra this week, 0 on product — is product work getting
|
||||
starved?" surfaces patterns the user wouldn't notice.
|
||||
|
||||
**Pros:** Brain compounds across TIME, not just across skills. Patterns
|
||||
become actionable.
|
||||
|
||||
**Cons:** "You're starving product work" is high-judgment territory; needs
|
||||
opt-out per project, careful insight templates.
|
||||
|
||||
**Context:** Deferred from v1.48.0.0 cherry-pick (D4) — wait 4-6 weeks for
|
||||
real `gstack/skill-run` data to accumulate before designing the reflection
|
||||
layer against real patterns instead of imagined ones.
|
||||
|
||||
**Effort:** L (human ~1-2 days, CC ~4-6h)
|
||||
|
||||
**Depends on:** Phase 0 (gstack/skill-run page type from v1.48.0.0) +
|
||||
~6 weeks of accumulated data
|
||||
|
||||
### P3: Cross-machine brain-cache sync (E3)
|
||||
|
||||
**What:** Push compressed digests through the gstack-brain-sync git pipeline
|
||||
so the brain-cache survives moving between Macs / Conductor workspaces.
|
||||
|
||||
**Why:** Eliminates the cold-miss tax on every new machine (~1-2s once per
|
||||
machine per day).
|
||||
|
||||
**Pros:** Instant warm cache on new machines.
|
||||
|
||||
**Cons:** Cache poisoning risk if not designed carefully (hash invariants,
|
||||
endpoint-binding, conflict resolution).
|
||||
|
||||
**Context:** Deferred from v1.48.0.0 cherry-pick (D5) — single-machine
|
||||
cache is fine for V1; correctness risk needs its own design pass.
|
||||
|
||||
**Effort:** M (human ~4h, CC ~30min)
|
||||
|
||||
**Depends on:** Brain-cache layer from v1.48.0.0
|
||||
|
||||
### P3: /gstack-onboarding dedicated skill (E4)
|
||||
|
||||
**What:** Guided 5-minute setup skill for new gstack installs: walks user
|
||||
through reading CLAUDE.md + README + recent commits to build `gstack/product`
|
||||
and active goals with explicit AUQs.
|
||||
|
||||
**Why:** Better UX than the inline bootstrap (which only fires when a
|
||||
planning skill is invoked).
|
||||
|
||||
**Pros:** Cleaner cold-start, explicit ceremony.
|
||||
|
||||
**Cons:** Inline bootstrap (in scope for v1.48) already covers the
|
||||
cold-start path adequately.
|
||||
|
||||
**Context:** Deferred from v1.48.0.0 cherry-pick (D6) — observe inline
|
||||
bootstrap performance first; add dedicated skill if friction is real.
|
||||
|
||||
**Effort:** S (human ~2h, CC ~15min)
|
||||
|
||||
**Depends on:** Inline bootstrap subcommand from v1.48.0.0
|
||||
|
||||
### P2: Upstream gbrain takes_add + takes_resolve MCP ops
|
||||
|
||||
**What:** Add `mcp__gbrain__takes_add` and `mcp__gbrain__takes_resolve`
|
||||
ops in `~/git/gbrain/src/core/operations.ts`. Extract the markdown-fence
|
||||
mirror logic from `commands/takes.ts:570` into a reusable
|
||||
`engine.resolveTake()` helper.
|
||||
|
||||
**Why:** Unlocks Phase 2 calibration write-back without the fence-block
|
||||
fallback. ~150 LOC. Already on gbrain's v0.31.x roadmap.
|
||||
|
||||
**Pros:** Clean Phase 2 path, removes the "fall back to put_page" smell.
|
||||
|
||||
**Cons:** Lives in upstream gbrain repo, not helsinki — separate PR.
|
||||
|
||||
**Context:** Phase 2 write-back is already wired in v1.48.0.0 behind the
|
||||
BRAIN_CALIBRATION_WRITEBACK feature flag (default off). Flag flips to
|
||||
true once upstream gbrain ships these ops. ~50 LOC follow-up in
|
||||
helsinki to swap the fallback for the preferred op.
|
||||
|
||||
**Effort:** S (human ~1d, CC ~1h) in gbrain repo; trivial wire-up in
|
||||
helsinki.
|
||||
|
||||
**Depends on:** None (parallel-track from v1.48.0.0)
|
||||
|
||||
### P3: Background-refresh hook supervision
|
||||
|
||||
**What:** Codex outside-voice raised that "background refresh at skill END"
|
||||
is hand-wavy. Add proper process supervision: PID file, timeout, failure
|
||||
log, cross-platform spawn.
|
||||
|
||||
**Why:** Current implementation backgrounds with `&` which works but
|
||||
leaves no observability when a refresh fails.
|
||||
|
||||
**Context:** Deferred from v1.48.0.0 codex tension T3. Stays low priority
|
||||
until users report stale digests where a background refresh silently
|
||||
failed.
|
||||
|
||||
**Effort:** S (human ~2h, CC ~20min)
|
||||
|
||||
### P2: Re-verify calibration takes when gbrain v0.42+ lands
|
||||
|
||||
**What:** When upstream gbrain ships `takes_add` MCP op and we flip
|
||||
`BRAIN_CALIBRATION_WRITEBACK` from FALSE to TRUE, re-run the manual
|
||||
probe in `docs/gbrain-write-surfaces.md` against `/office-hours` and
|
||||
confirm `gbrain takes_list` surfaces a `kind=bet` entry with the
|
||||
expected weight (0.9 for office-hours, per
|
||||
`scripts/brain-cache-spec.ts:151-157`).
|
||||
|
||||
**Why:** Today the calibration take path falls back to writing inside a
|
||||
`gbrain put` fence block because `takes_add` isn't available yet. Once
|
||||
v0.42+ ships, the agent will call `takes_add` directly — we should
|
||||
confirm the new path actually persists a queryable take.
|
||||
|
||||
**Context:** v1.50.0.0 plan §"NOT in scope". The fence-block fallback
|
||||
test (`test/takes-fence-fallback.test.ts`) covers wiring for both paths;
|
||||
this TODO is about live verification of the preferred path when it
|
||||
becomes available.
|
||||
|
||||
**Effort:** XS (human ~15min, CC ~5min)
|
||||
|
||||
**Depends on:** Upstream gbrain v0.42+ release shipping `takes_add` MCP
|
||||
op (separate TODO above).
|
||||
|
||||
### P2: Extend brain-writeback E2E to the other 4 planning skills
|
||||
|
||||
**What:** `test/skill-e2e-office-hours-brain-writeback.test.ts` covers
|
||||
the brain-writeback path for `/office-hours` only. Adding parallel
|
||||
tests for `/plan-ceo-review`, `/plan-eng-review`, `/plan-design-review`,
|
||||
and `/plan-devex-review` would bring per-skill agent-obedience coverage
|
||||
to parity with the resolver unit test
|
||||
(`test/resolvers-gbrain-save-results.test.ts`, which covers wiring for
|
||||
all 5).
|
||||
|
||||
**Why:** The resolver test proves the right instructions get emitted;
|
||||
the E2E proves the agent actually obeys. Today we only have that
|
||||
end-to-end signal for one of five planning skills.
|
||||
|
||||
**Context:** v1.50.0.0 plan §"NOT in scope". Extract `makeFakeGbrain`
|
||||
into `test/helpers/fake-gbrain.ts` when the second consumer arrives
|
||||
(YAGNI for one consumer today).
|
||||
|
||||
**Effort:** S (human ~1d, CC ~1h). Periodic-tier (~$2-4 total for 4
|
||||
runs).
|
||||
|
||||
**Depends on:** None.
|
||||
|
||||
### P2: Real-session carve canary (E3, deferred from carve-guard plan)
|
||||
|
||||
**What:** Wire a real-session section-Read-miss canary on top of the
|
||||
carved skills. When a real user session drives a carved skill and the
|
||||
agent does NOT Read a section the skeleton's STOP directive pointed it
|
||||
at, log it (salted, content-free) to
|
||||
`~/.gstack/analytics/section-reads.jsonl` and surface drift via
|
||||
`bun run eval:summary`. Non-blocking alert, never a merge gate
|
||||
(real-session data is non-deterministic).
|
||||
|
||||
**Why:** The static (E2) + behavioral (T2) guards prove carves are
|
||||
structurally sound and that a real agent Reads sections in a controlled
|
||||
eval. They do NOT see production drift — a prompt-context change that
|
||||
makes live agents start skipping a section. The canary is the only
|
||||
mechanism that catches that, from real usage.
|
||||
|
||||
**Context:** Deferred from the carve-guard-hardening plan (D5→T2, codex
|
||||
outside-voice #7). `test/helpers/transcript-section-logger.ts` exists but
|
||||
is built for deterministic test transcripts + ship action fingerprints,
|
||||
NOT real-session drift — it needs rework before it can back this. Ship
|
||||
the deterministic guards first; add this once they've proven useful. The
|
||||
carved-skill set + each skill's `requiredReads` are already declared in
|
||||
`test/helpers/carve-guards.ts`, so the canary reads its expectations
|
||||
from there.
|
||||
|
||||
**Effort:** M (human ~2d, CC ~4h).
|
||||
|
||||
**Depends on:** `transcript-section-logger.ts` real-session-drift rework.
|
||||
|
||||
### P2: Harden behavioral section-loading test hermeticity
|
||||
|
||||
**What:** `captureSectionReads` in `test/helpers/auq-sdk-capture.ts` accepts ANY
|
||||
Read whose path matches `sections/<file>.md`. The skeleton's STOP-Read directive
|
||||
points at the gstack-root install path (`scripts/resolvers/sections.ts` builds it
|
||||
from `ctx.paths.skillRoot`), not the planted fixture copy. So a run can satisfy
|
||||
the section-read assertion by reading the GLOBAL install's section instead of the
|
||||
hermetic fixture.
|
||||
|
||||
**Why:** A behavioral test that passes by reading the global install doesn't prove
|
||||
THIS branch's carved section loads. If the fixture's section were broken but the
|
||||
global install's weren't, the test would still pass.
|
||||
|
||||
**Context:** Codex outside-voice finding on the carve-guard ship (v1.57.0.0).
|
||||
Pre-existing in `auq-sdk-capture.ts` — affects `skill-e2e-ship-section-loading`,
|
||||
`skill-e2e-plan-ceo-review-section-loading`, and the new
|
||||
`carve-section-loading.test.ts`. Fix: match the fixture's ABSOLUTE sections path
|
||||
(the `planDir` copy), not a bare `sections/<file>.md` regex; or rewrite the STOP
|
||||
path to the fixture during the run.
|
||||
|
||||
**Effort:** S (human ~3h, CC ~30min). **Depends on:** None.
|
||||
|
||||
### P3: Content-hash diagram render cache for make-pdf
|
||||
|
||||
**What:** Cache rendered diagram SVG/PNG in `~/.gstack/cache/diagram-render/`,
|
||||
keyed on `sha256(fence source + bundle version + render options)`, so repeat
|
||||
`make-pdf` runs skip the browse render tab for unchanged diagrams.
|
||||
|
||||
**Why:** Every run currently re-renders every fence (~150-300ms each). Docs with
|
||||
10+ diagrams pay seconds per iteration during write-preview loops. Codex
|
||||
outside-voice flagged the missing cache story during the eng review of the
|
||||
diagram engine plan (2026-06-11, D7).
|
||||
|
||||
**Context:** The diagram-render bundle ships a `BUILD_INFO.json` with a content
|
||||
hash (see `lib/diagram-render/`) — use that as the bundle-version cache key
|
||||
component so bundle bumps invalidate cleanly. Invalidation surface is the main
|
||||
risk: stale renders after a mermaid theme change must not survive. Only worth
|
||||
building once users hit multi-diagram docs; wedge perf is fine without it.
|
||||
|
||||
**Effort:** S (human ~1d, CC ~30min). **Depends on:** diagram engine wedge
|
||||
shipping (lib/diagram-render bundle versioning).
|
||||
|
||||
### P3: Dedupe the make-pdf e2e gate-test harness
|
||||
|
||||
**What:** Five e2e files (`combined-gate`, `emoji-gate`, `diagram-gate`,
|
||||
`landscape-gate`, `format-gate`) each hand-roll the same prerequisite probe
|
||||
(binary/browse/poppler checks with CI hard-fail vs local skip), mkdtemp/rm
|
||||
lifecycle, and child-timeout constants. Extract a shared
|
||||
`make-pdf/test/e2e/helpers.ts` (prerequisites(), withWorkDir(), runGenerate()).
|
||||
|
||||
**Why:** Review-army maintainability finding on v1.58.0.0 — the boilerplate
|
||||
diverges a little more with each new gate (diagram-gate now captures stderr
|
||||
via Bun.spawnSync while the others use execFileSync), and a future fix to the
|
||||
CI-hard-fail contract has to land five times.
|
||||
|
||||
**Context:** Deferred at ship time (D8.2) because it's test-only churn across
|
||||
five green files at the tail of a release. Zero user-facing value; pure DRY.
|
||||
|
||||
**Effort:** S (human ~3h, CC ~20min). **Depends on:** None.
|
||||
|
||||
@@ -16,7 +16,16 @@ This is the full monty: every scenario, every flag, every helper bin, every trou
|
||||
|
||||
That's it. The skill detects your current state, asks three questions at most, and walks you through install, init, MCP registration for Claude Code, and per-repo trust policy. On a clean Mac with nothing installed it finishes in under five minutes. On a Mac where something's already set up it takes seconds (it detects the existing state and skips done work).
|
||||
|
||||
## The three paths
|
||||
## What you get after setup
|
||||
|
||||
Once `/setup-gbrain` finishes, your coding agent has two retrieval surfaces it didn't have before:
|
||||
|
||||
- **Semantic code search across this repo.** `gbrain search "browser security canary"` returns ranked file regions, not exact-match grep hits. `gbrain code-def`, `code-refs`, `code-callers`, `code-callees` walk the call graph by symbol — useful when you don't know which file holds the implementation but you know what it does. The agent prefers these over Grep when the question is semantic; CLAUDE.md gets a `## GBrain Search Guidance` block that teaches it the routing rules.
|
||||
- **Cross-session memory.** Plans, retros, decisions, and learnings from past sessions live in `~/.gstack/` and (if you opted in to artifacts sync) get pushed to a private git repo that gbrain indexes. `gbrain search "what did we decide about auth?"` actually finds the prior CEO plan instead of you re-describing context every session.
|
||||
|
||||
If you also enabled remote MCP (Path 4 below), brain queries route to a shared brain server that other machines can write to — your laptop, your desktop, and a teammate's machine all see the same memory.
|
||||
|
||||
## The four paths
|
||||
|
||||
You pick one when the skill asks "Where should your brain live?"
|
||||
|
||||
@@ -48,10 +57,25 @@ Best for: you'd rather click through supabase.com yourself than paste a PAT.
|
||||
|
||||
Best for: try-it-first, no account, no cloud, no sharing. Or a dedicated "this Mac's brain" that stays isolated from any cloud agent.
|
||||
|
||||
**What happens:** `gbrain init --pglite`. Brain lives at `~/.gbrain/brain.pglite`. No network calls. Done in 30 seconds.
|
||||
**What happens:** `gbrain init --pglite`. Brain lives at `~/.gbrain/brain.pglite`. No network calls for the init itself. Done in 30 seconds.
|
||||
|
||||
**Embedding model.** When `VOYAGE_API_KEY` is set, gstack inits PGLite with `voyage-code-3` (1024-dim) — Voyage's code-specialized embedding model, which beats their general-purpose `voyage-4-large` and OpenAI `text-embedding-3-large` head-to-head on this codebase's symbol queries. Without `VOYAGE_API_KEY`, gbrain auto-selects (OpenAI 1536-dim when `OPENAI_API_KEY` is present, else falls down its provider chain). Either way, the embeddings call out to the chosen provider's API during sync — set the key for the provider you want before running `/sync-gbrain`.
|
||||
|
||||
This is the best first choice if you just want to see what gbrain feels like before committing to cloud. You can always migrate later with `/setup-gbrain --switch`.
|
||||
|
||||
### Path 4: Remote gbrain MCP (split-engine)
|
||||
|
||||
Best for: your brain runs on another machine you control (Tailscale, ngrok, internal LAN) or a teammate's server. You want the cross-machine memory benefit without standing up a local database, and you still want symbol-aware code search on this Mac.
|
||||
|
||||
**What happens:** You paste an MCP URL (e.g. `https://wintermute.tail554574.ts.net:3131/mcp`) and a bearer token. The skill verifies the URL over the wire, registers gbrain as an HTTP MCP in `~/.claude.json` at user scope, and offers to also stand up a tiny local PGLite for code search (~30 seconds, ~120 MB disk).
|
||||
|
||||
If you accept the local PGLite, you end up in **split-engine mode**:
|
||||
|
||||
- **Brain/context queries** (`mcp__gbrain__search`, `mcp__gbrain__query`, `mcp__gbrain__get_page`) route to the remote MCP. Plans, retros, learnings, cross-machine memory — all on the shared server.
|
||||
- **Code queries** (`gbrain code-def`, `code-refs`, `code-callers`, `code-callees`, `gbrain search` for code) route to the local PGLite via the `.gbrain-source` pin in each worktree. Indexed locally, fast, never leaves the machine.
|
||||
|
||||
The two engines are independent. Wiping the local PGLite doesn't touch the remote brain; rotating the remote MCP bearer doesn't affect local code search. This is also the right configuration if your remote brain admin can't (or shouldn't) index every developer's checkout — local code stays local.
|
||||
|
||||
## MCP registration for Claude Code
|
||||
|
||||
By default the skill asks "Give Claude Code a typed tool surface for gbrain?" If you say yes, it runs:
|
||||
@@ -60,7 +84,7 @@ By default the skill asks "Give Claude Code a typed tool surface for gbrain?" If
|
||||
claude mcp add gbrain -- gbrain serve
|
||||
```
|
||||
|
||||
That registers gbrain's stdio MCP server with Claude Code. Now `gbrain search`, `gbrain put_page`, `gbrain get_page`, etc. show up as first-class tools in every session, not bash shell-outs.
|
||||
That registers gbrain's stdio MCP server with Claude Code. Now `gbrain search`, `gbrain put`, `gbrain get`, etc. show up as first-class tools in every session, not bash shell-outs.
|
||||
|
||||
**If `claude` is not on PATH**, the skill skips MCP registration gracefully with a manual-register hint. The CLI resolver still works from any skill that shells out to `gbrain` — MCP is an upgrade, not a prerequisite.
|
||||
|
||||
@@ -95,6 +119,35 @@ SSH and HTTPS remote variants collapse to the same key: `https://github.com/foo/
|
||||
|
||||
Storage: `~/.gstack/gbrain-repo-policy.json`, mode 0600, schema-versioned so future migrations stay deterministic.
|
||||
|
||||
## Keeping the brain current with `/sync-gbrain`
|
||||
|
||||
`/setup-gbrain` is one-time onboarding. `/sync-gbrain` is the verb you run every time you want gbrain to see fresh changes in this repo's code.
|
||||
|
||||
```bash
|
||||
/sync-gbrain # incremental: mtime fast-path, ~seconds on a clean tree
|
||||
/sync-gbrain --full # full reindex (~25-35 minutes on a big Mac)
|
||||
/sync-gbrain --code-only # only the code stage; skip memory + brain-sync
|
||||
/sync-gbrain --dry-run # preview what would sync; no writes
|
||||
```
|
||||
|
||||
The skill runs three stages — code, memory, brain-sync — independently. A failure in one doesn't block the others. State persists to `~/.gstack/.gbrain-sync-state.json` so re-running picks up cleanly.
|
||||
|
||||
**What it does on a fresh worktree:**
|
||||
|
||||
1. **Pre-flight.** Checks `gbrain_local_status` (the local engine's health). If the engine is `broken-db` or `broken-config`, the skill STOPs with a remediation menu — it refuses to silently degrade. If the local engine is missing and you're in remote-MCP mode (Path 4), the code stage SKIPs cleanly and only brain-sync runs.
|
||||
2. **Code stage.** Registers the cwd as a federated source via `gbrain sources add`, writes a `.gbrain-source` pin file in the repo root (kubectl-style context — every worktree gets its own pin, so Conductor sibling worktrees don't collide), runs `gbrain sync --strategy code`.
|
||||
3. **Memory stage.** Stages your `~/.gstack/` transcripts + curated memory. In local-stdio MCP mode, ingests into the local engine. In remote-http MCP mode, persists staged markdown to `~/.gstack/transcripts/run-<pid>-<ts>/` for the remote brain admin's pull pipeline. The ingest timeout is 30 minutes by default; raise it for a big brain with `GSTACK_INGEST_TIMEOUT_MS` (accepts 1 min–24h). On timeout the gbrain import checkpoint is preserved, so the next `/sync-gbrain` resumes instead of starting over.
|
||||
4. **Brain-sync stage.** Pushes curated artifacts (plans, designs, retros) to your private artifacts repo if you have one configured.
|
||||
5. **CLAUDE.md guidance.** Capability-checks the round-trip (write a page → search → find it). If green, writes the `## GBrain Search Guidance` block to your project's CLAUDE.md. If red, REMOVES the block — the agent should never be told to use a tool that isn't installed.
|
||||
|
||||
**The watermark.** Sync state advances by commit hash. If gbrain hits a file it can't index (5 MB hard limit per file, or a file vanished mid-sync), the watermark stays put and subsequent syncs retry. To acknowledge an unfixable failure and move past it:
|
||||
|
||||
```bash
|
||||
gbrain sync --source <source-id> --skip-failed
|
||||
```
|
||||
|
||||
Re-runnable, idempotent, safe to run from multiple terminals on the same machine (locked at `~/.gstack/.sync-gbrain.lock`).
|
||||
|
||||
## Switching engines later
|
||||
|
||||
Picked PGLite and now want to join a team brain? One command:
|
||||
@@ -173,8 +226,8 @@ Gbrain itself ships with these that gstack wraps:
|
||||
| `gbrain migrate --to supabase --url ...` | Move a PGLite brain to Supabase (lossless, preserves source as backup) |
|
||||
| `gbrain migrate --to pglite` | Reverse migration |
|
||||
| `gbrain search "query"` | Search the brain |
|
||||
| `gbrain put_page --title "..." --tags "a,b" <<<"content"` | Write a page |
|
||||
| `gbrain get_page "<slug>"` | Fetch a page |
|
||||
| `gbrain put "<slug>" --content "<markdown-with-frontmatter>"` | Write a page (title/tags go in YAML frontmatter inside `--content`) |
|
||||
| `gbrain get "<slug>"` | Fetch a page |
|
||||
| `gbrain serve` | Start the MCP stdio server (used by `claude mcp add`) |
|
||||
|
||||
### Config files + state
|
||||
@@ -200,6 +253,26 @@ Gbrain itself ships with these that gstack wraps:
|
||||
| `SUPABASE_API_BASE` | `gstack-gbrain-supabase-provision` | Override the Management API host. Used by tests to point at a mock server. |
|
||||
| `GBRAIN_INSTALL_DIR` | `gstack-gbrain-install` | Override default install path (`~/gbrain`) |
|
||||
| `GSTACK_HOME` | every bin helper | Override `~/.gstack` state dir. Heavy test use. |
|
||||
| `VOYAGE_API_KEY` | `gbrain embed` subprocess; gstack PGLite init | When set, gstack inits PGLite with `voyage-code-3` (1024-dim), Voyage's code-specialized embedding model. Beats `voyage-4-large` and OpenAI `text-embedding-3-large` head-to-head on this codebase's symbol queries. See CHANGELOG v1.43.1.0 for the A/B numbers. |
|
||||
| `OPENAI_API_KEY` | `gbrain embed` subprocess | Used for embeddings during `gbrain sync` / `/sync-gbrain` when `VOYAGE_API_KEY` is not set (gbrain's auto-selected fallback, `text-embedding-3-large` 1536-dim). Without either key, pages are imported structurally (symbol tables, chunks) but semantic search degrades — you'll see `[gbrain] embedding failed for code file ...` in the sync log. |
|
||||
| `ANTHROPIC_API_KEY` | `claude-agent-sdk`, paid evals | Required for `bun run test:evals` and any direct `query()` call against Claude. |
|
||||
| `GSTACK_OPENAI_API_KEY` | `lib/conductor-env-shim.ts` | Conductor-injected fallback. Promoted to `OPENAI_API_KEY` when the canonical name is empty. |
|
||||
| `GSTACK_ANTHROPIC_API_KEY` | `lib/conductor-env-shim.ts` | Same pattern as above for Anthropic. |
|
||||
|
||||
## Conductor + GSTACK_* env vars
|
||||
|
||||
If you run gstack inside a [Conductor](https://conductor.build) workspace, **Conductor explicitly strips `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` from the workspace env.** Setting them in `~/.zshrc` or `.env` won't help — the strip happens after env inheritance. To get a usable API key into a workspace, set `GSTACK_ANTHROPIC_API_KEY` and `GSTACK_OPENAI_API_KEY` in Conductor's workspace env config instead. Conductor passes those through untouched.
|
||||
|
||||
`lib/conductor-env-shim.ts` bridges the gap on the gstack side: when imported as a side effect (`import "../lib/conductor-env-shim";`), it promotes `GSTACK_FOO_API_KEY` to `FOO_API_KEY` for any subprocess that doesn't see the canonical name. The shim is already wired into:
|
||||
|
||||
- `bin/gstack-gbrain-sync.ts` — so `/sync-gbrain` picks up OpenAI for embeddings
|
||||
- `bin/gstack-model-benchmark` — so `--judge` runs work without manual env mapping
|
||||
- `scripts/preflight-agent-sdk.ts` — so paid-eval auth probes work
|
||||
- `test/helpers/e2e-helpers.ts` — so `bun run test:evals` finds Anthropic
|
||||
|
||||
If you add a new TS entry point that hits a paid API or needs gbrain embeddings, add the same one-line import at the top. See [CONTRIBUTING.md "Conductor workspaces"](CONTRIBUTING.md#conductor-workspaces) for the contributor checklist.
|
||||
|
||||
`bin/gstack-codex-probe` is bash and doesn't read these directly — it relies on `~/.codex/` auth managed by the Codex CLI.
|
||||
|
||||
## Security model
|
||||
|
||||
@@ -267,6 +340,26 @@ You edited `~/.gstack/gbrain-repo-policy.json` by hand with legacy `allow` value
|
||||
|
||||
`/health` treats that as yellow, not red. Check `gbrain doctor --json | jq .checks` to see which sub-checks are warning. Typical causes: resolver MECE overlap (skill names clashing) or DB connection not yet configured.
|
||||
|
||||
### `/sync-gbrain` reports `OK` but `gbrain search` returns nothing semantic
|
||||
|
||||
Embeddings probably failed during import. Symbol queries (`code-def`, `code-refs`) still work because they don't need embeddings, but `gbrain search "<terms>"` falls back to a degraded BM25 path. Look in the sync output for lines like:
|
||||
|
||||
```
|
||||
[gbrain] embedding failed for code file <name>: OpenAI embedding requires OPENAI_API_KEY
|
||||
```
|
||||
|
||||
The fix is to put a provider API key in the process env before re-running. `VOYAGE_API_KEY` is preferred for code (gstack defaults PGLite to `voyage-code-3` when set); otherwise `OPENAI_API_KEY` falls back to `text-embedding-3-large`. On a bare Mac shell, source the key from `~/.zshrc` before calling. In Conductor, the `lib/conductor-env-shim.ts` shim promotes `GSTACK_ANTHROPIC_API_KEY` / `GSTACK_OPENAI_API_KEY` to their canonical names automatically; for `VOYAGE_API_KEY`, set it directly in your Conductor workspace env. Re-run `/sync-gbrain --code-only` to backfill embeddings on already-imported pages.
|
||||
|
||||
### `gbrain sync` blocked at a commit hash — `FILE_TOO_LARGE`
|
||||
|
||||
A file in your tree exceeds gbrain's 5 MB hard limit (`MAX_FILE_SIZE` in `gbrain/src/core/import-file.ts`). Common culprits: response replay caches, captured screenshots, large JSON fixtures. Gbrain doesn't honor `.gitignore`-style exclude lists for code sync; the only knob is acknowledging the failure:
|
||||
|
||||
```bash
|
||||
gbrain sync --source <source-id> --skip-failed
|
||||
```
|
||||
|
||||
Watermark advances past the offending commit. The same file fails again if it changes; re-skip when that happens.
|
||||
|
||||
### Switching PGLite → Supabase hangs
|
||||
|
||||
Another gstack session in a sibling Conductor workspace may be holding a lock on your local PGLite file via its preamble's `gstack-brain-sync` call. Close other workspaces, re-run `/setup-gbrain --switch`. The timeout is bounded at 180s so you'll never actually wait forever.
|
||||
@@ -286,7 +379,7 @@ Another gstack session in a sibling Conductor workspace may be holding a lock on
|
||||
## Related skills + next steps
|
||||
|
||||
- `/health` — includes a GBrain dimension (doctor status, sync queue depth, last-push age) in its 0-10 composite score. The dimension is omitted when gbrain isn't installed; running `/health` on a non-gbrain machine doesn't penalize that choice.
|
||||
- `/gstack-upgrade` — keeps gstack itself up to date. Does NOT upgrade gbrain independently. To bump gbrain, update `PINNED_COMMIT` in `bin/gstack-gbrain-install` and re-run `/setup-gbrain`.
|
||||
- `/gstack-upgrade` — keeps gstack itself up to date. Does NOT upgrade gbrain independently. gbrain installs at the latest HEAD by default; to refresh it, `git pull` in your gbrain clone (default `~/gbrain`) and re-run `/setup-gbrain`. Pin a specific commit with `gstack-gbrain-install --pinned-commit <sha>` if you need reproducibility. Installs below the minimum tested version are refused.
|
||||
- `/retro` — weekly retrospective pulls learnings and plans from your gbrain when memory sync is on, letting the retro reference cross-machine history.
|
||||
|
||||
Run `/setup-gbrain` and see what sticks.
|
||||
|
||||
+133
-122
@@ -2,16 +2,7 @@
|
||||
name: autoplan
|
||||
preamble-tier: 3
|
||||
version: 1.0.0
|
||||
description: |
|
||||
Auto-review pipeline — reads the full CEO, design, eng, and DX review skills from disk
|
||||
and runs them sequentially with auto-decisions using 6 decision principles. Surfaces
|
||||
taste decisions (close approaches, borderline scope, codex disagreements) at a final
|
||||
approval gate. One command, fully reviewed plan out.
|
||||
Use when asked to "auto review", "autoplan", "run all reviews", "review this plan
|
||||
automatically", or "make the decisions for me".
|
||||
Proactively suggest when the user has a plan file and wants to run the full review
|
||||
gauntlet without answering 15-30 intermediate questions. (gstack)
|
||||
Voice triggers (speech-to-text aliases): "auto plan", "automatic review".
|
||||
description: Auto-review pipeline — reads the full CEO, design, eng, and DX review skills from disk and runs them sequentially with auto-decisions using 6 decision principles. (gstack)
|
||||
benefits-from: [office-hours]
|
||||
triggers:
|
||||
- run all reviews
|
||||
@@ -30,6 +21,19 @@ allowed-tools:
|
||||
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
|
||||
<!-- Regenerate: bun run gen:skill-docs -->
|
||||
|
||||
|
||||
## When to invoke this skill
|
||||
|
||||
Surfaces
|
||||
taste decisions (close approaches, borderline scope, codex disagreements) at a final
|
||||
approval gate. One command, fully reviewed plan out.
|
||||
Use when asked to "auto review", "autoplan", "run all reviews", "review this plan
|
||||
automatically", or "make the decisions for me".
|
||||
Proactively suggest when the user has a plan file and wants to run the full review
|
||||
gauntlet without answering 15-30 intermediate questions.
|
||||
|
||||
Voice triggers (speech-to-text aliases): "auto plan", "automatic review".
|
||||
|
||||
## Preamble (run first)
|
||||
|
||||
```bash
|
||||
@@ -50,6 +54,16 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
|
||||
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
|
||||
REPO_MODE=${REPO_MODE:-unknown}
|
||||
echo "REPO_MODE: $REPO_MODE"
|
||||
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
|
||||
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
|
||||
echo "SESSION_KIND: $_SESSION_KIND"
|
||||
# Conductor host: AskUserQuestion is unreliable here (native disabled, MCP
|
||||
# variant flaky), so skills render decisions as prose instead of calling the
|
||||
# tool. Gated on !headless so an eval/CI run INSIDE Conductor (GSTACK_HEADLESS)
|
||||
# still BLOCKs rather than rendering prose to nobody.
|
||||
if [ "$_SESSION_KIND" != "headless" ] && { [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ]; }; then
|
||||
echo "CONDUCTOR_SESSION: true"
|
||||
fi
|
||||
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
|
||||
echo "LAKE_INTRO: $_LAKE_SEEN"
|
||||
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
|
||||
@@ -65,7 +79,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
|
||||
echo "QUESTION_TUNING: $_QUESTION_TUNING"
|
||||
mkdir -p ~/.gstack/analytics
|
||||
if [ "$_TEL" != "off" ]; then
|
||||
echo '{"skill":"autoplan","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
echo '{"skill":"autoplan","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
fi
|
||||
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
|
||||
if [ -f "$_PF" ]; then
|
||||
@@ -107,6 +121,19 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
|
||||
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
|
||||
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
|
||||
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
|
||||
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
|
||||
# Claude Code exposes plan mode via system reminders; we detect best-effort
|
||||
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
|
||||
# fall back to "inactive". Codex hosts and Claude execution mode both end up
|
||||
# inactive, which is the safe default (defaults to file+execute pipeline).
|
||||
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
else
|
||||
export GSTACK_PLAN_MODE="inactive"
|
||||
fi
|
||||
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
|
||||
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
|
||||
```
|
||||
|
||||
@@ -116,7 +143,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
|
||||
|
||||
## Skill Invocation During Plan Mode
|
||||
|
||||
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
|
||||
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
|
||||
|
||||
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
|
||||
|
||||
@@ -151,7 +178,7 @@ touch ~/.gstack/.writing-style-prompted
|
||||
|
||||
Skip if `WRITING_STYLE_PENDING` is `no`.
|
||||
|
||||
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Lake** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
|
||||
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Ocean** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
|
||||
|
||||
```bash
|
||||
open https://garryslist.org/posts/boil-the-ocean
|
||||
@@ -162,7 +189,7 @@ Only run `open` if yes. Always run `touch`.
|
||||
|
||||
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
|
||||
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
|
||||
|
||||
Options:
|
||||
- A) Help gstack get better! (recommended)
|
||||
@@ -238,6 +265,7 @@ Key routing rules:
|
||||
- Ship/deploy/PR → invoke /ship or /land-and-deploy
|
||||
- Save progress → invoke /context-save
|
||||
- Resume context → invoke /context-restore
|
||||
- Author a backlog-ready spec/issue → invoke /spec
|
||||
```
|
||||
|
||||
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
|
||||
@@ -285,13 +313,39 @@ AI orchestrator (e.g., OpenClaw). In spawned sessions:
|
||||
|
||||
"AskUserQuestion" can resolve to two tools at runtime: the **host MCP variant** (e.g. `mcp__conductor__AskUserQuestion` — appears in your tool list when the host registers it) or the **native** Claude Code tool.
|
||||
|
||||
**Rule:** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
|
||||
**Conductor rule (read before the MCP rule):** if `CONDUCTOR_SESSION: true` was echoed by the preamble, do NOT call AskUserQuestion at all — neither native nor any `mcp__*__AskUserQuestion` variant. Render EVERY decision brief as the **prose form** below and STOP. This is proactive, not a reaction to a failure: Conductor disables native AUQ and its MCP variant is flaky (it returns `[Tool result missing due to internal error]`), so prose is the reliable path. **Auto-decide preferences still apply first:** if a `[plan-tune auto-decide] <id> → <option>` result has already surfaced for a question, proceed with that option (no prose). Because in Conductor you go straight to prose without ever calling the tool, this auto-decide-first ordering is enforced HERE, not only by the PreToolUse hook. When you render a Conductor prose brief, also capture it with `bin/gstack-question-log` (the PostToolUse capture hook never fires on a prose path, so `/plan-tune` history/learning depends on this call).
|
||||
|
||||
**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report `BLOCKED — AskUserQuestion unavailable`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only `/plan-tune` AUTO_DECIDE opt-ins authorize auto-picking).
|
||||
**Rule (non-Conductor):** if any `mcp__*__AskUserQuestion` variant is in your tool list, prefer it. Hosts may disable native AUQ via `--disallowedTools AskUserQuestion` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies.
|
||||
|
||||
If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below.
|
||||
|
||||
### When AskUserQuestion is unavailable or a call fails
|
||||
|
||||
Tell three outcomes apart:
|
||||
|
||||
1. **Auto-decide denial (NOT a failure).** The result contains `[plan-tune auto-decide] <id> → <option>` — the preference hook working as designed. Proceed with that option. Do NOT retry, do NOT fall back to prose.
|
||||
2. **Genuine failure** — no variant in your tool list, OR the variant is present but the call returns an error / missing result (MCP transport error, empty result, host bug — e.g. Conductor's MCP AskUserQuestion is flaky and returns `[Tool result missing due to internal error]`).
|
||||
- If it was present and **errored** (not absent), retry the SAME call **once** — but only if no answer could have surfaced (a missing-result error can arrive after the user already saw the question; retrying would double-prompt, so if it may have reached them, treat as pending, don't retry).
|
||||
- Then branch on `SESSION_KIND` (echoed by the preamble; empty/absent ⇒ `interactive`):
|
||||
- `spawned` → defer to the **Spawned session** block: auto-choose the recommended option. Never prose, never BLOCKED.
|
||||
- `headless` → `BLOCKED — AskUserQuestion unavailable`; stop and wait (no human can answer).
|
||||
- `interactive` → **prose fallback** (below).
|
||||
|
||||
**Prose fallback — render the decision brief as a markdown message, not a tool call.** Same information as the tool format below, different structure (paragraphs, not ✅/❌ bullets). It MUST surface this triad:
|
||||
|
||||
1. **A clear ELI10 of the issue itself** — plain English on what's being decided and why it matters (the question, not per-choice), naming the stakes. Lead with it.
|
||||
2. **Completeness scores per choice** — explicit `Completeness: X/10` on EACH choice (10 complete, 7 happy-path, 3 shortcut); use the kind-note when options differ in kind not coverage, but never silently drop the score.
|
||||
3. **The recommendation and why** — a `Recommendation: <choice> because <reason>` line plus the `(recommended)` marker on that choice.
|
||||
|
||||
Layout: a `D<N>` title + a one-line note to reply with a letter (in Conductor this is the normal path; elsewhere it means AskUserQuestion was unavailable or errored); the issue ELI10; the Recommendation line; then ONE paragraph per choice carrying its `(recommended)` marker, its `Completeness: X/10`, and 2-4 sentences of reasoning — never a bare bullet list; a closing `Net:` line. Split chains / 5+ options: one prose block per per-option call, in sequence. Then STOP and wait — the user's typed answer is the decision. In plan mode this satisfies end-of-turn like a tool call.
|
||||
|
||||
**Continuation — mapping a typed reply back to a brief.** Each brief carries a stable label (`D<N>`, or `D<N>.k` in a split chain). The user references it (e.g. "3.2: B"). A bare letter maps to the single most-recent UNANSWERED brief; if more than one is open (a split chain), do NOT guess — ask which `D<N>.k` it answers. Never apply a bare letter ambiguously across a chain.
|
||||
|
||||
**One-way / destructive confirmations in prose.** When the decision is a one-way door (irreversible or destructive — delete, force-push, drop, overwrite), prose is a WEAKER gate than the tool, so make it stronger: require an explicit typed confirmation (the exact option letter or word), state plainly what is irreversible, and NEVER proceed on a vague, partial, or ambiguous reply — re-ask instead. Treat silence or "ok"/"sure" without the explicit choice as not-yet-confirmed.
|
||||
|
||||
### Format
|
||||
|
||||
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose.
|
||||
Every AskUserQuestion is a decision brief and must be sent as tool_use, not prose — unless the documented failure fallback above applies (interactive session + the call is unavailable/erroring), in which case the prose fallback is the correct output.
|
||||
|
||||
```
|
||||
D<N> — <one-line question title>
|
||||
@@ -324,25 +378,41 @@ Effort both-scales: when an option involves effort, label both human-team and CC
|
||||
|
||||
Net line closes the tradeoff. Per-skill instructions may add stricter rules.
|
||||
|
||||
12. **Non-ASCII characters — write directly, never \u-escape.** When any
|
||||
string field (question, option label, option description) contains
|
||||
Chinese (繁體/簡體), Japanese, Korean, or other non-ASCII text, emit
|
||||
the literal UTF-8 characters in the JSON string. **Never escape them
|
||||
as `\uXXXX`.** Claude Code's tool parameter pipe is UTF-8 native
|
||||
and passes characters through unchanged. Manually escaping requires
|
||||
recalling each codepoint from training, which is unreliable for long
|
||||
CJK strings — the model regularly emits the wrong codepoint (e.g.
|
||||
writes `\u3103` thinking it is 管 U+7BA1, but `\u3103` is
|
||||
actually , so the user sees `管理工具` rendered as `3用箱`).
|
||||
The trigger is long, multi-line questions with hundreds of CJK
|
||||
characters: that is exactly when reflexive escaping kicks in and
|
||||
exactly when miscoding is most damaging. Long ≠ escape. Keep
|
||||
characters literal.
|
||||
### Handling 5+ options — split, never drop
|
||||
|
||||
Wrong: `"question": "請選擇\uXXXX\uXXXX\uXXXX\uXXXX"`
|
||||
Right: `"question": "請選擇管理工具"`
|
||||
AskUserQuestion caps every call at **4 options**. With 5+ real options, NEVER
|
||||
drop, merge, or silently defer one to fit. Pick a compliant shape:
|
||||
|
||||
Only JSON-mandatory escapes remain allowed: `\n`, `\t`, `\"`, `\\`.
|
||||
- **Batch into ≤4-groups** — for coherent alternatives (e.g. version bumps,
|
||||
layout variants). One call, 5th surfaced only if first 4 don't fit.
|
||||
- **Split per-option** — for independent scope items (e.g. "ship E1..E6?").
|
||||
Fire N sequential calls, one per option. Default to this when unsure.
|
||||
|
||||
Per-option call shape: `D<N>.k` header (e.g. D3.1..D3.5), ELI10 per option,
|
||||
Recommendation, kind-note (no completeness score — Include/Defer/Cut/Hold are
|
||||
decision actions), and 4 buckets:
|
||||
**A) Include**, **B) Defer**, **C) Cut**, **D) Hold** (stop chain, discuss).
|
||||
|
||||
After the chain, fire `D<N>.final` to validate the assembled set (reprompt
|
||||
dependency conflicts) and confirm shipping it. Use `D<N>.revise-<k>` to
|
||||
revise one option without re-running the chain.
|
||||
|
||||
For N>6, fire a `D<N>.0` meta-AskUserQuestion first (proceed / narrow / batch).
|
||||
|
||||
question_ids for split chains: `<skill>-split-<option-slug>` (kebab-case ASCII,
|
||||
≤64 chars, `-2`/`-3` suffix on collision). The runtime checker
|
||||
(`bin/gstack-question-preference`) refuses `never-ask` on any `*-split-*` id,
|
||||
so split chains are never AUTO_DECIDE-eligible — the user's option set is sacred.
|
||||
|
||||
**Full rule + worked examples + Hold/dependency semantics:** see
|
||||
`docs/askuserquestion-split.md` in the gstack repo. Read on demand when N>4.
|
||||
|
||||
**Non-ASCII characters — write directly, never \u-escape.** When any string
|
||||
field contains Chinese (繁體/簡體), Japanese, Korean, or other non-ASCII text,
|
||||
emit the literal UTF-8 characters; never escape them as `\uXXXX` (the pipe is
|
||||
UTF-8 native, and manual escaping miscodes long CJK strings). Only `\n`,
|
||||
`\t`, `\"`, `\\` remain allowed. Full rationale + worked example: see
|
||||
`docs/askuserquestion-cjk.md`. Read on demand when a question contains CJK.
|
||||
|
||||
### Self-check before emitting
|
||||
|
||||
@@ -355,8 +425,11 @@ Before calling AskUserQuestion, verify:
|
||||
- [ ] (recommended) label on one option (even for neutral-posture)
|
||||
- [ ] Dual-scale effort labels on effort-bearing options (human / CC)
|
||||
- [ ] Net line closes the decision
|
||||
- [ ] You are calling the tool, not writing prose
|
||||
- [ ] You are calling the tool, not writing prose — unless `CONDUCTOR_SESSION: true` (then prose is the DEFAULT, not the tool) OR the documented failure fallback applies (then: prose with the mandatory triad — issue ELI10, per-choice Completeness, Recommendation + `(recommended)` — and a "reply with a letter" instruction, then STOP)
|
||||
- [ ] Non-ASCII characters (CJK / accents) written directly, NOT \u-escaped
|
||||
- [ ] If you had 5+ options, you split (or batched into ≤4-groups) — did NOT drop any
|
||||
- [ ] If you split, you checked dependencies between options before firing the chain
|
||||
- [ ] If a per-option Hold fires, you stopped the chain immediately (didn't queue)
|
||||
|
||||
|
||||
## Artifacts Sync (skill start)
|
||||
@@ -539,12 +612,19 @@ if [ -d "$_PROJ" ]; then
|
||||
fi
|
||||
_LATEST_CP=$(find "$_PROJ/checkpoints" -name "*.md" -type f 2>/dev/null | xargs ls -t 2>/dev/null | head -1)
|
||||
[ -n "$_LATEST_CP" ] && echo "LATEST_CHECKPOINT: $_LATEST_CP"
|
||||
if [ -f "$_PROJ/decisions.active.json" ]; then
|
||||
echo "--- ACTIVE DECISIONS (recent, scope-relevant) ---"
|
||||
~/.claude/skills/gstack/bin/gstack-decision-search --recent 5 2>/dev/null
|
||||
echo "--- END DECISIONS ---"
|
||||
fi
|
||||
echo "--- END ARTIFACTS ---"
|
||||
fi
|
||||
```
|
||||
|
||||
If artifacts are listed, read the newest useful one. If `LAST_SESSION` or `LATEST_CHECKPOINT` appears, give a 2-sentence welcome back summary. If `RECENT_PATTERN` clearly implies a next skill, suggest it once.
|
||||
|
||||
**Cross-session decisions.** If `ACTIVE DECISIONS` are listed, treat them as prior settled calls with their rationale — do not silently re-litigate them; if you're about to reverse one, say so explicitly. Reach for `~/.claude/skills/gstack/bin/gstack-decision-search` whenever a question touches a past decision ("what did we decide / why / did we try"). When you or the user make a DURABLE decision (architecture, scope, tool/vendor choice, or a reversal) — NOT a turn-level or trivial choice — log it with `~/.claude/skills/gstack/bin/gstack-decision-log` (`--supersede <id>` for a reversal). Reliable and local; gbrain not required.
|
||||
|
||||
## Writing Style (skip entirely if `EXPLAIN_LEVEL: terse` appears in the preamble echo OR the user's current message explicitly requests terse / no-explanations output)
|
||||
|
||||
Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format is structure; this is prose quality.
|
||||
@@ -556,89 +636,12 @@ Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format i
|
||||
- User-turn override wins: if the current message asks for terse / no explanations / just the answer, skip this section.
|
||||
- Terse mode (EXPLAIN_LEVEL: terse): no glosses, no outcome-framing layer, shorter responses.
|
||||
|
||||
Jargon list, gloss on first use if the term appears:
|
||||
- idempotent
|
||||
- idempotency
|
||||
- race condition
|
||||
- deadlock
|
||||
- cyclomatic complexity
|
||||
- N+1
|
||||
- N+1 query
|
||||
- backpressure
|
||||
- memoization
|
||||
- eventual consistency
|
||||
- CAP theorem
|
||||
- CORS
|
||||
- CSRF
|
||||
- XSS
|
||||
- SQL injection
|
||||
- prompt injection
|
||||
- DDoS
|
||||
- rate limit
|
||||
- throttle
|
||||
- circuit breaker
|
||||
- load balancer
|
||||
- reverse proxy
|
||||
- SSR
|
||||
- CSR
|
||||
- hydration
|
||||
- tree-shaking
|
||||
- bundle splitting
|
||||
- code splitting
|
||||
- hot reload
|
||||
- tombstone
|
||||
- soft delete
|
||||
- cascade delete
|
||||
- foreign key
|
||||
- composite index
|
||||
- covering index
|
||||
- OLTP
|
||||
- OLAP
|
||||
- sharding
|
||||
- replication lag
|
||||
- quorum
|
||||
- two-phase commit
|
||||
- saga
|
||||
- outbox pattern
|
||||
- inbox pattern
|
||||
- optimistic locking
|
||||
- pessimistic locking
|
||||
- thundering herd
|
||||
- cache stampede
|
||||
- bloom filter
|
||||
- consistent hashing
|
||||
- virtual DOM
|
||||
- reconciliation
|
||||
- closure
|
||||
- hoisting
|
||||
- tail call
|
||||
- GIL
|
||||
- zero-copy
|
||||
- mmap
|
||||
- cold start
|
||||
- warm start
|
||||
- green-blue deploy
|
||||
- canary deploy
|
||||
- feature flag
|
||||
- kill switch
|
||||
- dead letter queue
|
||||
- fan-out
|
||||
- fan-in
|
||||
- debounce
|
||||
- throttle (UI)
|
||||
- hydration mismatch
|
||||
- memory leak
|
||||
- GC pause
|
||||
- heap fragmentation
|
||||
- stack overflow
|
||||
- null pointer
|
||||
- dangling pointer
|
||||
- buffer overflow
|
||||
Curated jargon list lives at `~/.claude/skills/gstack/scripts/jargon-list.json` (80+ terms). On the first jargon term you encounter this session, Read that file once; treat the `terms` array as the canonical list. The list is repo-owned and may grow between releases.
|
||||
|
||||
|
||||
## Completeness Principle — Boil the Lake
|
||||
## Completeness Principle — Boil the Ocean
|
||||
|
||||
AI makes completeness cheap. Recommend complete lakes (tests, edge cases, error paths); flag oceans (rewrites, multi-quarter migrations).
|
||||
AI makes completeness cheap, so the complete thing is the goal. Recommend full coverage (tests, edge cases, error paths) — boil the ocean one lake at a time. The only thing out of scope is genuinely unrelated work (rewrites, multi-quarter migrations); flag that as separate scope, never as an excuse for a shortcut.
|
||||
|
||||
When options differ in coverage, include `Completeness: X/10` (10 = all edge cases, 7 = happy path, 3 = shortcut). When options differ in kind, write: `Note: options differ in kind, not coverage — no completeness score.` Do not fabricate scores.
|
||||
|
||||
@@ -681,7 +684,11 @@ If you are looping on the same diagnostic, same file, or failed fix variants, ST
|
||||
|
||||
Before each AskUserQuestion, choose `question_id` from `scripts/question-registry.ts` or `{skill}-{slug}`, then run `~/.claude/skills/gstack/bin/gstack-question-preference --check "<id>"`. `AUTO_DECIDE` means choose the recommended option and say "Auto-decided [summary] → [option] (your preference). Change with /plan-tune." `ASK_NORMALLY` means ask.
|
||||
|
||||
After answer, log best-effort:
|
||||
**Embed the question_id as a marker in the question text** so hooks can identify it deterministically (plan-tune cathedral T14 / D18 progressive markers). Append `<gstack-qid:{question_id}>` somewhere in the rendered question (the leading line or trailing line is fine; the marker doesn't render visibly to the user when wrapped in HTML-style angle brackets, but the hook strips it). Without the marker the PreToolUse enforcement hook treats the AUQ as observed-only and never auto-decides — so always include it when the question matches a registered `question_id`.
|
||||
|
||||
**Embed the option recommendation via the `(recommended)` label suffix** on exactly one option per AUQ. The PreToolUse hook parses `(recommended)` first, falls back to "Recommendation: X" prose, and refuses to auto-decide if ambiguous. Two `(recommended)` labels = refuse.
|
||||
|
||||
After answer, log best-effort (PostToolUse hook also captures deterministically when installed; dedup on (source, tool_use_id) handles double-writes):
|
||||
```bash
|
||||
~/.claude/skills/gstack/bin/gstack-question-log '{"skill":"autoplan","question_id":"<id>","question_summary":"<short>","category":"<approval|clarification|routing|cherry-pick|feedback-loop>","door_type":"<one-way|two-way>","options_count":N,"user_choice":"<key>","recommended":"<key>","session_id":"'"$_SESSION_ID"'"}' 2>/dev/null || true
|
||||
```
|
||||
@@ -766,9 +773,7 @@ Replace `SKILL_NAME`, `OUTCOME`, and `USED_BROWSE` before running.
|
||||
|
||||
## Plan Status Footer
|
||||
|
||||
In plan mode before ExitPlanMode: if the plan file lacks `## GSTACK REVIEW REPORT`, run `~/.claude/skills/gstack/bin/gstack-review-read` and append the standard runs/status/findings table. With `NO_REVIEWS` or empty, append a 5-row placeholder with verdict "NO REVIEWS YET — run `/autoplan`". If a richer report exists, skip.
|
||||
|
||||
PLAN MODE EXCEPTION — always allowed (it's the plan file).
|
||||
Skills that run plan reviews (`/plan-*-review`, `/codex review`) include the EXIT PLAN MODE GATE blocking checklist at the end of the skill, which verifies the plan file ends with `## GSTACK REVIEW REPORT` before ExitPlanMode is called. Skills that don't run plan reviews (operational skills like `/ship`, `/qa`, `/review`) typically don't operate in plan mode and have no review report to verify; this footer is a no-op for them. Writing the plan file is the one edit allowed in plan mode.
|
||||
|
||||
## Step 0: Detect platform and base branch
|
||||
|
||||
@@ -840,7 +845,7 @@ Read the `/office-hours` skill file at `~/.claude/skills/gstack/office-hours/SKI
|
||||
Follow its instructions from top to bottom, **skipping these sections** (already handled by the parent skill):
|
||||
- Preamble (run first)
|
||||
- AskUserQuestion Format
|
||||
- Completeness Principle — Boil the Lake
|
||||
- Completeness Principle — Boil the Ocean
|
||||
- Search Before Building
|
||||
- Contributor Mode
|
||||
- Completion Status Protocol
|
||||
@@ -1046,7 +1051,7 @@ Read each file using the Read tool:
|
||||
(they are already handled by /autoplan):**
|
||||
- Preamble (run first)
|
||||
- AskUserQuestion Format
|
||||
- Completeness Principle — Boil the Lake
|
||||
- Completeness Principle — Boil the Ocean
|
||||
- Search Before Building
|
||||
- Completion Status Protocol
|
||||
- Telemetry (run last)
|
||||
@@ -1073,11 +1078,17 @@ workflow.
|
||||
|
||||
```bash
|
||||
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || echo off)
|
||||
_CODEX_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || echo enabled)
|
||||
source ~/.claude/skills/gstack/bin/gstack-codex-probe
|
||||
|
||||
# Master switch first: codex_reviews=disabled turns off ALL Codex work globally,
|
||||
# including autoplan's own dual-voice orchestration. Honor it before probing.
|
||||
if [ "$_CODEX_CFG" = "disabled" ]; then
|
||||
echo "[codex disabled by config — Claude-only voices] Re-enable: gstack-config set codex_reviews enabled"
|
||||
_CODEX_AVAILABLE=false
|
||||
# Check Codex binary. If missing, tag the degradation matrix and continue
|
||||
# with Claude subagent only (autoplan's existing degradation fallback).
|
||||
if ! command -v codex >/dev/null 2>&1; then
|
||||
elif ! command -v codex >/dev/null 2>&1; then
|
||||
_gstack_codex_log_event "codex_cli_missing"
|
||||
echo "[codex-unavailable: binary not found] — proceeding with Claude subagent only"
|
||||
_CODEX_AVAILABLE=false
|
||||
|
||||
@@ -216,7 +216,7 @@ Read each file using the Read tool:
|
||||
(they are already handled by /autoplan):**
|
||||
- Preamble (run first)
|
||||
- AskUserQuestion Format
|
||||
- Completeness Principle — Boil the Lake
|
||||
- Completeness Principle — Boil the Ocean
|
||||
- Search Before Building
|
||||
- Completion Status Protocol
|
||||
- Telemetry (run last)
|
||||
@@ -243,11 +243,17 @@ workflow.
|
||||
|
||||
```bash
|
||||
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || echo off)
|
||||
_CODEX_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || echo enabled)
|
||||
source ~/.claude/skills/gstack/bin/gstack-codex-probe
|
||||
|
||||
# Master switch first: codex_reviews=disabled turns off ALL Codex work globally,
|
||||
# including autoplan's own dual-voice orchestration. Honor it before probing.
|
||||
if [ "$_CODEX_CFG" = "disabled" ]; then
|
||||
echo "[codex disabled by config — Claude-only voices] Re-enable: gstack-config set codex_reviews enabled"
|
||||
_CODEX_AVAILABLE=false
|
||||
# Check Codex binary. If missing, tag the degradation matrix and continue
|
||||
# with Claude subagent only (autoplan's existing degradation fallback).
|
||||
if ! command -v codex >/dev/null 2>&1; then
|
||||
elif ! command -v codex >/dev/null 2>&1; then
|
||||
_gstack_codex_log_event "codex_cli_missing"
|
||||
echo "[codex-unavailable: binary not found] — proceeding with Claude subagent only"
|
||||
_CODEX_AVAILABLE=false
|
||||
|
||||
+42
-15
@@ -2,14 +2,7 @@
|
||||
name: benchmark-models
|
||||
preamble-tier: 1
|
||||
version: 1.0.0
|
||||
description: |
|
||||
Cross-model benchmark for gstack skills. Runs the same prompt through Claude,
|
||||
GPT (via Codex CLI), and Gemini side-by-side — compares latency, tokens, cost,
|
||||
and optionally quality via LLM judge. Answers "which model is actually best
|
||||
for this skill?" with data instead of vibes. Separate from /benchmark, which
|
||||
measures web page performance. Use when: "benchmark models", "compare models",
|
||||
"which model is best for X", "cross-model comparison", "model shootout". (gstack)
|
||||
Voice triggers (speech-to-text aliases): "compare models", "model shootout", "which model is best".
|
||||
description: Cross-model benchmark for gstack skills. (gstack)
|
||||
triggers:
|
||||
- cross model benchmark
|
||||
- compare claude gpt gemini
|
||||
@@ -23,6 +16,18 @@ allowed-tools:
|
||||
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
|
||||
<!-- Regenerate: bun run gen:skill-docs -->
|
||||
|
||||
|
||||
## When to invoke this skill
|
||||
|
||||
Runs the same prompt through Claude,
|
||||
GPT (via Codex CLI), and Gemini side-by-side — compares latency, tokens, cost,
|
||||
and optionally quality via LLM judge. Answers "which model is actually best
|
||||
for this skill?" with data instead of vibes. Separate from /benchmark, which
|
||||
measures web page performance. Use when: "benchmark models", "compare models",
|
||||
"which model is best for X", "cross-model comparison", "model shootout".
|
||||
|
||||
Voice triggers (speech-to-text aliases): "compare models", "model shootout", "which model is best".
|
||||
|
||||
## Preamble (run first)
|
||||
|
||||
```bash
|
||||
@@ -43,6 +48,16 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
|
||||
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
|
||||
REPO_MODE=${REPO_MODE:-unknown}
|
||||
echo "REPO_MODE: $REPO_MODE"
|
||||
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
|
||||
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
|
||||
echo "SESSION_KIND: $_SESSION_KIND"
|
||||
# Conductor host: AskUserQuestion is unreliable here (native disabled, MCP
|
||||
# variant flaky), so skills render decisions as prose instead of calling the
|
||||
# tool. Gated on !headless so an eval/CI run INSIDE Conductor (GSTACK_HEADLESS)
|
||||
# still BLOCKs rather than rendering prose to nobody.
|
||||
if [ "$_SESSION_KIND" != "headless" ] && { [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ]; }; then
|
||||
echo "CONDUCTOR_SESSION: true"
|
||||
fi
|
||||
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
|
||||
echo "LAKE_INTRO: $_LAKE_SEEN"
|
||||
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
|
||||
@@ -58,7 +73,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
|
||||
echo "QUESTION_TUNING: $_QUESTION_TUNING"
|
||||
mkdir -p ~/.gstack/analytics
|
||||
if [ "$_TEL" != "off" ]; then
|
||||
echo '{"skill":"benchmark-models","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
echo '{"skill":"benchmark-models","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
fi
|
||||
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
|
||||
if [ -f "$_PF" ]; then
|
||||
@@ -100,6 +115,19 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
|
||||
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
|
||||
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
|
||||
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
|
||||
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
|
||||
# Claude Code exposes plan mode via system reminders; we detect best-effort
|
||||
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
|
||||
# fall back to "inactive". Codex hosts and Claude execution mode both end up
|
||||
# inactive, which is the safe default (defaults to file+execute pipeline).
|
||||
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
else
|
||||
export GSTACK_PLAN_MODE="inactive"
|
||||
fi
|
||||
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
|
||||
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
|
||||
```
|
||||
|
||||
@@ -109,7 +137,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
|
||||
|
||||
## Skill Invocation During Plan Mode
|
||||
|
||||
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
|
||||
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
|
||||
|
||||
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
|
||||
|
||||
@@ -144,7 +172,7 @@ touch ~/.gstack/.writing-style-prompted
|
||||
|
||||
Skip if `WRITING_STYLE_PENDING` is `no`.
|
||||
|
||||
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Lake** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
|
||||
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Ocean** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
|
||||
|
||||
```bash
|
||||
open https://garryslist.org/posts/boil-the-ocean
|
||||
@@ -155,7 +183,7 @@ Only run `open` if yes. Always run `touch`.
|
||||
|
||||
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
|
||||
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
|
||||
|
||||
Options:
|
||||
- A) Help gstack get better! (recommended)
|
||||
@@ -231,6 +259,7 @@ Key routing rules:
|
||||
- Ship/deploy/PR → invoke /ship or /land-and-deploy
|
||||
- Save progress → invoke /context-save
|
||||
- Resume context → invoke /context-restore
|
||||
- Author a backlog-ready spec/issue → invoke /spec
|
||||
```
|
||||
|
||||
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
|
||||
@@ -475,9 +504,7 @@ Replace `SKILL_NAME`, `OUTCOME`, and `USED_BROWSE` before running.
|
||||
|
||||
## Plan Status Footer
|
||||
|
||||
In plan mode before ExitPlanMode: if the plan file lacks `## GSTACK REVIEW REPORT`, run `~/.claude/skills/gstack/bin/gstack-review-read` and append the standard runs/status/findings table. With `NO_REVIEWS` or empty, append a 5-row placeholder with verdict "NO REVIEWS YET — run `/autoplan`". If a richer report exists, skip.
|
||||
|
||||
PLAN MODE EXCEPTION — always allowed (it's the plan file).
|
||||
Skills that run plan reviews (`/plan-*-review`, `/codex review`) include the EXIT PLAN MODE GATE blocking checklist at the end of the skill, which verifies the plan file ends with `## GSTACK REVIEW REPORT` before ExitPlanMode is called. Skills that don't run plan reviews (operational skills like `/ship`, `/qa`, `/review`) typically don't operate in plan mode and have no review report to verify; this footer is a no-op for them. Writing the plan file is the one edit allowed in plan mode.
|
||||
|
||||
# /benchmark-models — Cross-Model Skill Benchmark
|
||||
|
||||
|
||||
+41
-14
@@ -2,13 +2,7 @@
|
||||
name: benchmark
|
||||
preamble-tier: 1
|
||||
version: 1.0.0
|
||||
description: |
|
||||
Performance regression detection using the browse daemon. Establishes
|
||||
baselines for page load times, Core Web Vitals, and resource sizes.
|
||||
Compares before/after on every PR. Tracks performance trends over time.
|
||||
Use when: "performance", "benchmark", "page speed", "lighthouse", "web vitals",
|
||||
"bundle size", "load time". (gstack)
|
||||
Voice triggers (speech-to-text aliases): "speed test", "check performance".
|
||||
description: Performance regression detection using the browse daemon. (gstack)
|
||||
triggers:
|
||||
- performance benchmark
|
||||
- check page speed
|
||||
@@ -23,6 +17,17 @@ allowed-tools:
|
||||
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
|
||||
<!-- Regenerate: bun run gen:skill-docs -->
|
||||
|
||||
|
||||
## When to invoke this skill
|
||||
|
||||
Establishes
|
||||
baselines for page load times, Core Web Vitals, and resource sizes.
|
||||
Compares before/after on every PR. Tracks performance trends over time.
|
||||
Use when: "performance", "benchmark", "page speed", "lighthouse", "web vitals",
|
||||
"bundle size", "load time".
|
||||
|
||||
Voice triggers (speech-to-text aliases): "speed test", "check performance".
|
||||
|
||||
## Preamble (run first)
|
||||
|
||||
```bash
|
||||
@@ -43,6 +48,16 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
|
||||
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
|
||||
REPO_MODE=${REPO_MODE:-unknown}
|
||||
echo "REPO_MODE: $REPO_MODE"
|
||||
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
|
||||
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
|
||||
echo "SESSION_KIND: $_SESSION_KIND"
|
||||
# Conductor host: AskUserQuestion is unreliable here (native disabled, MCP
|
||||
# variant flaky), so skills render decisions as prose instead of calling the
|
||||
# tool. Gated on !headless so an eval/CI run INSIDE Conductor (GSTACK_HEADLESS)
|
||||
# still BLOCKs rather than rendering prose to nobody.
|
||||
if [ "$_SESSION_KIND" != "headless" ] && { [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ]; }; then
|
||||
echo "CONDUCTOR_SESSION: true"
|
||||
fi
|
||||
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
|
||||
echo "LAKE_INTRO: $_LAKE_SEEN"
|
||||
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
|
||||
@@ -58,7 +73,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
|
||||
echo "QUESTION_TUNING: $_QUESTION_TUNING"
|
||||
mkdir -p ~/.gstack/analytics
|
||||
if [ "$_TEL" != "off" ]; then
|
||||
echo '{"skill":"benchmark","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
echo '{"skill":"benchmark","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
fi
|
||||
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
|
||||
if [ -f "$_PF" ]; then
|
||||
@@ -100,6 +115,19 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
|
||||
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
|
||||
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
|
||||
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
|
||||
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
|
||||
# Claude Code exposes plan mode via system reminders; we detect best-effort
|
||||
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
|
||||
# fall back to "inactive". Codex hosts and Claude execution mode both end up
|
||||
# inactive, which is the safe default (defaults to file+execute pipeline).
|
||||
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
else
|
||||
export GSTACK_PLAN_MODE="inactive"
|
||||
fi
|
||||
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
|
||||
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
|
||||
```
|
||||
|
||||
@@ -109,7 +137,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
|
||||
|
||||
## Skill Invocation During Plan Mode
|
||||
|
||||
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
|
||||
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
|
||||
|
||||
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
|
||||
|
||||
@@ -144,7 +172,7 @@ touch ~/.gstack/.writing-style-prompted
|
||||
|
||||
Skip if `WRITING_STYLE_PENDING` is `no`.
|
||||
|
||||
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Lake** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
|
||||
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Ocean** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
|
||||
|
||||
```bash
|
||||
open https://garryslist.org/posts/boil-the-ocean
|
||||
@@ -155,7 +183,7 @@ Only run `open` if yes. Always run `touch`.
|
||||
|
||||
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
|
||||
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
|
||||
|
||||
Options:
|
||||
- A) Help gstack get better! (recommended)
|
||||
@@ -231,6 +259,7 @@ Key routing rules:
|
||||
- Ship/deploy/PR → invoke /ship or /land-and-deploy
|
||||
- Save progress → invoke /context-save
|
||||
- Resume context → invoke /context-restore
|
||||
- Author a backlog-ready spec/issue → invoke /spec
|
||||
```
|
||||
|
||||
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
|
||||
@@ -475,9 +504,7 @@ Replace `SKILL_NAME`, `OUTCOME`, and `USED_BROWSE` before running.
|
||||
|
||||
## Plan Status Footer
|
||||
|
||||
In plan mode before ExitPlanMode: if the plan file lacks `## GSTACK REVIEW REPORT`, run `~/.claude/skills/gstack/bin/gstack-review-read` and append the standard runs/status/findings table. With `NO_REVIEWS` or empty, append a 5-row placeholder with verdict "NO REVIEWS YET — run `/autoplan`". If a richer report exists, skip.
|
||||
|
||||
PLAN MODE EXCEPTION — always allowed (it's the plan file).
|
||||
Skills that run plan reviews (`/plan-*-review`, `/codex review`) include the EXIT PLAN MODE GATE blocking checklist at the end of the skill, which verifies the plan file ends with `## GSTACK REVIEW REPORT` before ExitPlanMode is called. Skills that don't run plan reviews (operational skills like `/ship`, `/qa`, `/review`) typically don't operate in plan mode and have no review report to verify; this footer is a no-op for them. Writing the plan file is the one edit allowed in plan mode.
|
||||
|
||||
## SETUP (run this check BEFORE any browse command)
|
||||
|
||||
|
||||
+61
-2
@@ -56,8 +56,64 @@ if [ ! -e "$AGENTS_LINK" ]; then
|
||||
ln -s "$REPO_ROOT" "$AGENTS_LINK"
|
||||
fi
|
||||
|
||||
# 6. Run setup via the symlink so it detects .claude/skills/ as its parent
|
||||
"$GSTACK_LINK/setup"
|
||||
# 6. Run setup via the symlink so it detects .claude/skills/ as its parent.
|
||||
#
|
||||
# Workspace/dev setup MUST be non-interactive: Conductor runs this under a
|
||||
# forwarded pty, so any `read` in setup (skill-prefix prompt, plan-tune hook
|
||||
# consent) would hang the workspace forever. Detaching stdin makes every setup
|
||||
# prompt take its smart non-interactive default (flat skill names, etc.).
|
||||
#
|
||||
# `--plan-tune-hooks=prompt` is load-bearing, not redundant: stdin alone only
|
||||
# suppresses the *prompt* branch. A saved `plan_tune_hooks: yes` or an exported
|
||||
# GSTACK_PLAN_TUNE_HOOKS=yes would still resolve to "install" and rewrite the
|
||||
# user's global ~/.claude/settings.json to point at THIS ephemeral worktree —
|
||||
# which breaks once the workspace is deleted. The flag has highest precedence,
|
||||
# so it pins resolution to "prompt", and closed stdin then makes prompt-mode a
|
||||
# no-op skip (no install, no decline marker). A dev workspace must never mutate
|
||||
# global settings.json. To install the hooks, run `./setup --plan-tune-hooks`
|
||||
# directly (outside dev-setup). Saved prefix/other config preferences still apply.
|
||||
#
|
||||
# GSTACK_SKIP_GBRAIN_REGEN=1 is passed INLINE (not exported) so it scopes to
|
||||
# exactly this nested setup call and can't leak into any other setup path. It
|
||||
# tells setup NOT to regenerate the gbrain :user variant into the tracked
|
||||
# worktree (that would dirty checked-in source). We render it into an untracked
|
||||
# per-workspace dir below instead.
|
||||
GSTACK_SKIP_GBRAIN_REGEN=1 "$GSTACK_LINK/setup" --plan-tune-hooks=prompt </dev/null
|
||||
|
||||
# 7. Brain-aware (gbrain) blocks — render into an untracked workspace dir.
|
||||
#
|
||||
# The worktree's SKILL.md files stay canonical (the guard above). If gbrain is
|
||||
# installed, render the :user variant (with GBRAIN_CONTEXT_LOAD +
|
||||
# GBRAIN_SAVE_RESULTS) into .claude/gstack-rendered (gitignored, per-workspace)
|
||||
# and repoint the workspace's SKILL.md symlinks at it. gen-skill-docs --out-dir
|
||||
# also rewrites the section-base path so section reads resolve to the render, not
|
||||
# the global install. Result: this workspace gets the full gbrain experience
|
||||
# while git stays clean. Other projects pick up blocks via `gstack-config
|
||||
# gbrain-refresh` (printed below).
|
||||
GBRAIN_DETECT="$REPO_ROOT/bin/gstack-gbrain-detect"
|
||||
RENDER_DIR="$REPO_ROOT/.claude/gstack-rendered"
|
||||
if [ -x "$GBRAIN_DETECT" ] && "$GBRAIN_DETECT" --is-ok 2>/dev/null; then
|
||||
echo ""
|
||||
echo "gbrain detected — rendering brain-aware skills into .claude/gstack-rendered (workspace-only, untracked)..."
|
||||
rm -rf "$RENDER_DIR"
|
||||
if ( cd "$REPO_ROOT" && bun run gen:skill-docs:user --host claude --out-dir "$RENDER_DIR" >/dev/null 2>&1 ); then
|
||||
# Repoint each project-local SKILL.md symlink whose worktree target has a
|
||||
# rendered counterpart. The skill DIRECTORY name (basename of the symlink
|
||||
# target's dir) maps to RENDER_DIR/<dir>/SKILL.md, which is robust to
|
||||
# frontmatter renames and the gstack- prefix on the link name.
|
||||
repointed=0
|
||||
for skill_link in "$REPO_ROOT"/.claude/skills/*/SKILL.md; do
|
||||
[ -L "$skill_link" ] || continue
|
||||
target="$(readlink "$skill_link")"
|
||||
skilldir="$(basename "$(dirname "$target")")"
|
||||
rendered="$RENDER_DIR/$skilldir/SKILL.md"
|
||||
if [ -f "$rendered" ]; then ln -snf "$rendered" "$skill_link"; repointed=$((repointed + 1)); fi
|
||||
done
|
||||
echo " $repointed workspace skills now serve brain-aware blocks (worktree stays canonical)."
|
||||
else
|
||||
echo " warning: brain-aware render failed — workspace uses canonical skills."
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Dev mode active. Skills resolve from this working tree."
|
||||
@@ -65,4 +121,7 @@ echo " .claude/skills/gstack → $REPO_ROOT"
|
||||
echo " .agents/skills/gstack → $REPO_ROOT"
|
||||
echo "Edit any SKILL.md and test immediately — no copy/deploy needed."
|
||||
echo ""
|
||||
echo "To make brain-aware blocks live across your OTHER projects too, run:"
|
||||
echo " gstack-config gbrain-refresh"
|
||||
echo ""
|
||||
echo "To tear down: bin/dev-teardown"
|
||||
|
||||
+8
-1
@@ -24,9 +24,16 @@ if [ -d "$CLAUDE_SKILLS" ]; then
|
||||
fi
|
||||
|
||||
rmdir "$CLAUDE_SKILLS" 2>/dev/null || true
|
||||
rmdir "$REPO_ROOT/.claude" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# ─── Clean up the untracked brain-aware render (bin/dev-setup step 7) ──
|
||||
RENDER_DIR="$REPO_ROOT/.claude/gstack-rendered"
|
||||
if [ -d "$RENDER_DIR" ]; then
|
||||
rm -rf "$RENDER_DIR"
|
||||
removed+=("claude/gstack-rendered")
|
||||
fi
|
||||
rmdir "$REPO_ROOT/.claude" 2>/dev/null || true
|
||||
|
||||
# ─── Clean up .agents/skills/ ────────────────────────────────
|
||||
AGENTS_SKILLS="$REPO_ROOT/.agents/skills"
|
||||
if [ -d "$AGENTS_SKILLS" ]; then
|
||||
|
||||
@@ -227,8 +227,18 @@ projects/*/ceo-plans/*.md
|
||||
projects/*/ceo-plans/*/*.md
|
||||
projects/*/designs/*.md
|
||||
projects/*/designs/*/*.md
|
||||
# Project-root design / test-plan artifacts written by /office-hours,
|
||||
# /plan-eng-review, and /autoplan. The skills emit
|
||||
# `{user}-{branch}-design-{datetime}.md`,
|
||||
# `{user}-{branch}-test-plan-{datetime}.md`, and
|
||||
# `{user}-{branch}-eng-review-test-plan-{datetime}.md` at the project
|
||||
# root (not under designs/), so the existing `designs/*.md` patterns
|
||||
# miss them. Without these the cross-machine pull on machine B gets
|
||||
# the referencing CEO plan but not the underlying design / test plan
|
||||
# (#1452).
|
||||
projects/*/*-design-*.md
|
||||
projects/*/*-test-plan-*.md
|
||||
projects/*/*-eng-review-test-plan-*.md
|
||||
projects/*/timeline.jsonl
|
||||
retros/*.md
|
||||
developer-profile.json
|
||||
@@ -256,6 +266,7 @@ cat > "$GSTACK_HOME/.brain-privacy-map.json" <<'EOF'
|
||||
{"pattern": "projects/*/designs/*/*.md", "class": "artifact"},
|
||||
{"pattern": "projects/*/*-design-*.md", "class": "artifact"},
|
||||
{"pattern": "projects/*/*-test-plan-*.md", "class": "artifact"},
|
||||
{"pattern": "projects/*/*-eng-review-test-plan-*.md", "class": "artifact"},
|
||||
{"pattern": "retros/*.md", "class": "artifact"},
|
||||
{"pattern": "builder-journey.md", "class": "artifact"},
|
||||
{"pattern": "projects/*/timeline.jsonl", "class": "behavioral"},
|
||||
|
||||
@@ -49,6 +49,19 @@ strip_git() {
|
||||
echo "${1%.git}"
|
||||
}
|
||||
|
||||
valid_owner_repo() {
|
||||
local owner_repo="$1"
|
||||
case "$owner_repo" in
|
||||
""|/*|*/|*//*)
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
case "$owner_repo" in
|
||||
*/*) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Parse to (host, owner_repo) regardless of input shape.
|
||||
parse_url() {
|
||||
local u="$1"
|
||||
@@ -82,7 +95,7 @@ parse_url() {
|
||||
exit 3
|
||||
;;
|
||||
esac
|
||||
if [ -z "$host" ] || [ -z "$owner_repo" ] || [ "$owner_repo" = "$u" ]; then
|
||||
if [ -z "$host" ] || ! valid_owner_repo "$owner_repo"; then
|
||||
echo "gstack-artifacts-url: failed to parse host/owner from: $u" >&2
|
||||
exit 3
|
||||
fi
|
||||
|
||||
Executable
+965
@@ -0,0 +1,965 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* gstack-brain-cache — three-tier cache for brain-aware planning skills.
|
||||
*
|
||||
* Subcommands:
|
||||
* get <entity-name> [--project <slug>] — return digest content; refresh if stale
|
||||
* refresh [--full] [--entity X] [--project <slug>] — force refresh one or all
|
||||
* invalidate <entity-name> [--project <slug>] — mark stale; next get triggers cold
|
||||
* digest <entity-slug> — compress a brain page slug to digest
|
||||
* meta [--project <slug>] — print _meta.json
|
||||
*
|
||||
* (Later commits add: bootstrap [T2b], list [T18], purge [T18], retention sweep [T18].)
|
||||
*
|
||||
* Cache layout:
|
||||
* ~/.gstack/brain-cache/ ← cross-project (user-profile only)
|
||||
* ~/.gstack/projects/<slug>/brain-cache/ ← per-project (everything else)
|
||||
*
|
||||
* Atomic writes via .tmp + rename. Stale-but-usable fallback when brain
|
||||
* unreachable. Concurrent-refresh dedup is a follow-up commit (T15).
|
||||
*/
|
||||
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync, renameSync, statSync, unlinkSync, readdirSync, openSync, closeSync } from 'fs';
|
||||
import { join, dirname } from 'path';
|
||||
import { homedir, hostname } from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
import { execGbrainJson, spawnGbrain } from '../lib/gbrain-exec';
|
||||
import {
|
||||
BRAIN_CACHE_ENTITIES,
|
||||
CACHE_REFRESH_LOCK_TIMEOUT_MS,
|
||||
GSTACK_SCHEMA_PACK_NAME,
|
||||
GSTACK_SCHEMA_PACK_VERSION,
|
||||
SALIENCE_DEFAULT_ALLOWLIST,
|
||||
type BrainCacheEntity,
|
||||
} from '../scripts/brain-cache-spec';
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Paths + meta
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
const GSTACK_HOME = process.env.GSTACK_HOME || join(homedir(), '.gstack');
|
||||
|
||||
interface CacheMeta {
|
||||
/** Version of the schema pack the cache was built against. Mismatch → full rebuild. */
|
||||
schema_version: string;
|
||||
/** SHA8 hash of the brain MCP endpoint URL (or 'local' for on-disk engines). */
|
||||
endpoint_hash: string;
|
||||
/** Per-entity last-refresh epoch ms. Absent → never refreshed. */
|
||||
last_refresh: Record<string, number>;
|
||||
/** Per-entity last-attempt epoch ms (even if attempt failed). For stale-but-usable diagnostics. */
|
||||
last_attempt?: Record<string, number>;
|
||||
}
|
||||
|
||||
/** Returns the directory holding a given entity's cache file. */
|
||||
export function entityDir(entity: BrainCacheEntity, projectSlug: string | null): string {
|
||||
if (entity.scope === 'cross-project') {
|
||||
return join(GSTACK_HOME, 'brain-cache');
|
||||
}
|
||||
if (!projectSlug) {
|
||||
throw new Error(`Per-project entity needs a project slug: ${entity.file}`);
|
||||
}
|
||||
return join(GSTACK_HOME, 'projects', projectSlug, 'brain-cache');
|
||||
}
|
||||
|
||||
/** Returns the path to the cache file for a given entity. */
|
||||
export function entityPath(entityName: string, projectSlug: string | null): string {
|
||||
const entity = BRAIN_CACHE_ENTITIES[entityName];
|
||||
if (!entity) throw new Error(`Unknown brain cache entity: ${entityName}`);
|
||||
return join(entityDir(entity, projectSlug), entity.file);
|
||||
}
|
||||
|
||||
/** Returns the path to the _meta.json for a given scope. */
|
||||
export function metaPath(scope: 'cross-project' | 'per-project', projectSlug: string | null): string {
|
||||
if (scope === 'cross-project') {
|
||||
return join(GSTACK_HOME, 'brain-cache', '_meta.json');
|
||||
}
|
||||
if (!projectSlug) throw new Error('Per-project meta needs a project slug');
|
||||
return join(GSTACK_HOME, 'projects', projectSlug, 'brain-cache', '_meta.json');
|
||||
}
|
||||
|
||||
function loadMeta(scope: 'cross-project' | 'per-project', projectSlug: string | null): CacheMeta {
|
||||
const path = metaPath(scope, projectSlug);
|
||||
if (!existsSync(path)) {
|
||||
return { schema_version: GSTACK_SCHEMA_PACK_VERSION, endpoint_hash: detectEndpointHash(), last_refresh: {}, last_attempt: {} };
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(readFileSync(path, 'utf-8')) as unknown;
|
||||
// #1879: a valid JSON file can still be the wrong shape. JSON.parse can return
|
||||
// null/array/string/number, and a partial object can omit last_refresh — three
|
||||
// consumers (isStale, cmdInvalidate, refreshEntity) dereference meta.last_refresh
|
||||
// unguarded and crash with a TypeError.
|
||||
if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) {
|
||||
return { schema_version: GSTACK_SCHEMA_PACK_VERSION, endpoint_hash: detectEndpointHash(), last_refresh: {}, last_attempt: {} };
|
||||
}
|
||||
const meta = parsed as CacheMeta;
|
||||
// Normalize ONLY the dereferenced maps. Do NOT default schema_version /
|
||||
// endpoint_hash — leaving them absent makes schemaVersionMismatch() /
|
||||
// endpointSwitched() correctly force a rebuild (missing identity = mismatch =
|
||||
// safe). Defaulting them to current values would suppress invalidation and
|
||||
// trust a stale file of unknown provenance.
|
||||
meta.last_refresh = meta.last_refresh ?? {};
|
||||
meta.last_attempt = meta.last_attempt ?? {};
|
||||
return meta;
|
||||
} catch {
|
||||
// Corrupt _meta — start fresh (entries will refresh on next access).
|
||||
return { schema_version: GSTACK_SCHEMA_PACK_VERSION, endpoint_hash: detectEndpointHash(), last_refresh: {}, last_attempt: {} };
|
||||
}
|
||||
}
|
||||
|
||||
function saveMeta(scope: 'cross-project' | 'per-project', projectSlug: string | null, meta: CacheMeta): void {
|
||||
const path = metaPath(scope, projectSlug);
|
||||
mkdirSync(dirname(path), { recursive: true });
|
||||
atomicWrite(path, JSON.stringify(meta, null, 2));
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Endpoint hash detection
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
import { createHash } from 'crypto';
|
||||
|
||||
function sha8(input: string): string {
|
||||
return createHash('sha256').update(input).digest('hex').slice(0, 8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects the active brain endpoint (MCP URL or 'local') and returns its
|
||||
* stable identity hash. Used to detect when the user switches brains
|
||||
* (different endpoint → different cache).
|
||||
*/
|
||||
export function detectEndpointHash(): string {
|
||||
const claudeJsonPath = join(homedir(), '.claude.json');
|
||||
if (existsSync(claudeJsonPath)) {
|
||||
try {
|
||||
const cfg = JSON.parse(readFileSync(claudeJsonPath, 'utf-8'));
|
||||
const gbrainServer = cfg?.mcpServers?.gbrain;
|
||||
const url = gbrainServer?.url || gbrainServer?.transport?.url;
|
||||
if (typeof url === 'string' && url.length > 0) {
|
||||
return sha8(url);
|
||||
}
|
||||
} catch { /* fall through to local */ }
|
||||
}
|
||||
// Local engine — no endpoint URL; use a stable literal hash.
|
||||
return 'local';
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Atomic write (tmp + rename)
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
function atomicWrite(path: string, content: string): void {
|
||||
mkdirSync(dirname(path), { recursive: true });
|
||||
const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
|
||||
writeFileSync(tmp, content, 'utf-8');
|
||||
renameSync(tmp, path);
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Staleness + refresh logic
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/** Returns true if the cached digest is past its TTL. */
|
||||
function isStale(entityName: string, meta: CacheMeta): boolean {
|
||||
const entity = BRAIN_CACHE_ENTITIES[entityName];
|
||||
if (!entity) return true;
|
||||
const last = meta.last_refresh[entityName];
|
||||
if (!last) return true;
|
||||
return Date.now() - last > entity.ttl_ms;
|
||||
}
|
||||
|
||||
/** Returns true if the cache file exists on disk. */
|
||||
function hasFile(entityName: string, projectSlug: string | null): boolean {
|
||||
return existsSync(entityPath(entityName, projectSlug));
|
||||
}
|
||||
|
||||
/** Returns true if schema version recorded in meta differs from current pack version. */
|
||||
function schemaVersionMismatch(meta: CacheMeta): boolean {
|
||||
return meta.schema_version !== GSTACK_SCHEMA_PACK_VERSION;
|
||||
}
|
||||
|
||||
/** Returns true if endpoint hash recorded in meta differs from current detected endpoint. */
|
||||
function endpointSwitched(meta: CacheMeta): boolean {
|
||||
return meta.endpoint_hash !== detectEndpointHash();
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: get
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
interface GetResult {
|
||||
/** Path to the digest file. */
|
||||
path: string;
|
||||
/** Cache state: 'warm' (fresh + valid), 'cold-refreshed' (was stale, refreshed inline), 'stale-fallback' (used stale because refresh failed), 'missing' (no cache and no refresh). */
|
||||
state: 'warm' | 'cold-refreshed' | 'stale-fallback' | 'missing';
|
||||
/** Optional message for diagnostics. */
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export function cmdGet(entityName: string, projectSlug: string | null): GetResult {
|
||||
const entity = BRAIN_CACHE_ENTITIES[entityName];
|
||||
if (!entity) throw new Error(`Unknown entity: ${entityName}`);
|
||||
const scope = entity.scope;
|
||||
const meta = loadMeta(scope, projectSlug);
|
||||
|
||||
// Schema-version mismatch → full rebuild (D4 A4).
|
||||
if (schemaVersionMismatch(meta) || endpointSwitched(meta)) {
|
||||
rebuildAllForScope(scope, projectSlug);
|
||||
// After rebuild, meta is fresh; fall through to warm path.
|
||||
const newMeta = loadMeta(scope, projectSlug);
|
||||
if (hasFile(entityName, projectSlug) && !isStale(entityName, newMeta)) {
|
||||
return { path: entityPath(entityName, projectSlug), state: 'warm' };
|
||||
}
|
||||
// Rebuild may have failed for this entity specifically.
|
||||
return { path: entityPath(entityName, projectSlug), state: 'missing', message: 'rebuild after schema/endpoint change' };
|
||||
}
|
||||
|
||||
if (hasFile(entityName, projectSlug) && !isStale(entityName, meta)) {
|
||||
return { path: entityPath(entityName, projectSlug), state: 'warm' };
|
||||
}
|
||||
|
||||
// Stale or missing — try cold refresh.
|
||||
const refreshed = refreshEntity(entityName, projectSlug);
|
||||
if (refreshed) {
|
||||
return { path: entityPath(entityName, projectSlug), state: 'cold-refreshed' };
|
||||
}
|
||||
// Refresh failed. Use stale-but-usable if file exists.
|
||||
if (hasFile(entityName, projectSlug)) {
|
||||
return { path: entityPath(entityName, projectSlug), state: 'stale-fallback', message: 'brain unreachable; using stale cache' };
|
||||
}
|
||||
// No cache and no refresh = missing.
|
||||
return { path: entityPath(entityName, projectSlug), state: 'missing', message: 'brain unreachable; no cache available' };
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: refresh
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Lockfile dedup (T15 / D3)
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Returns the lock file path for a project scope. Cross-project entities
|
||||
* still lock per-project (the project triggering the refresh holds the lock);
|
||||
* concurrent attempts from different projects on cross-project entities
|
||||
* serialize naturally because they're rare and the lock window is short.
|
||||
*/
|
||||
function lockPath(projectSlug: string | null): string {
|
||||
const dir = projectSlug
|
||||
? join(GSTACK_HOME, 'projects', projectSlug, 'brain-cache')
|
||||
: join(GSTACK_HOME, 'brain-cache');
|
||||
return join(dir, '.refresh.lock');
|
||||
}
|
||||
|
||||
interface LockHandle {
|
||||
fd: number;
|
||||
path: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to acquire the refresh lock. Returns null when another process holds it
|
||||
* (and the lock is fresh). Stale locks (process dead OR older than the
|
||||
* timeout) are taken over.
|
||||
*/
|
||||
function tryAcquireLock(projectSlug: string | null): LockHandle | null {
|
||||
const path = lockPath(projectSlug);
|
||||
mkdirSync(dirname(path), { recursive: true });
|
||||
|
||||
// If a lock exists, see if it's stale
|
||||
if (existsSync(path)) {
|
||||
try {
|
||||
const raw = readFileSync(path, 'utf-8');
|
||||
const lock = JSON.parse(raw) as { pid: number; host: string; ts: number };
|
||||
const age = Date.now() - lock.ts;
|
||||
const sameHost = lock.host === hostname();
|
||||
const processGone = sameHost && lock.pid > 0 && !isPidAlive(lock.pid);
|
||||
if (age <= CACHE_REFRESH_LOCK_TIMEOUT_MS && !processGone) {
|
||||
return null; // someone else holds a fresh lock
|
||||
}
|
||||
// Stale: take over
|
||||
} catch {
|
||||
// Corrupt lock file → take over
|
||||
}
|
||||
}
|
||||
|
||||
// Write our lock (best-effort O_EXCL via tmp+rename for atomic creation)
|
||||
const payload = JSON.stringify({ pid: process.pid, host: hostname(), ts: Date.now() });
|
||||
const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
|
||||
try {
|
||||
writeFileSync(tmp, payload);
|
||||
renameSync(tmp, path);
|
||||
} catch (err) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Race: another process may have raced us. Re-read and verify ownership.
|
||||
try {
|
||||
const raw = readFileSync(path, 'utf-8');
|
||||
const lock = JSON.parse(raw) as { pid: number; host: string };
|
||||
if (lock.pid !== process.pid || lock.host !== hostname()) {
|
||||
return null;
|
||||
}
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
return { fd: -1, path };
|
||||
}
|
||||
|
||||
function releaseLock(handle: LockHandle): void {
|
||||
try { unlinkSync(handle.path); } catch { /* best effort */ }
|
||||
}
|
||||
|
||||
function isPidAlive(pid: number): boolean {
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch (err: any) {
|
||||
if (err?.code === 'EPERM') return true; // exists but we don't own it
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a refresh callback under the project-scoped lock. If another refresh is
|
||||
* already in flight, returns 'dedup' and the caller can either wait + retry
|
||||
* (the resolver does this) or fall through to stale-but-usable. Stale locks
|
||||
* (process dead, or older than CACHE_REFRESH_LOCK_TIMEOUT_MS) are taken over.
|
||||
*/
|
||||
export function withRefreshLock<T>(projectSlug: string | null, fn: () => T): T | 'dedup' {
|
||||
const handle = tryAcquireLock(projectSlug);
|
||||
if (!handle) return 'dedup';
|
||||
try {
|
||||
return fn();
|
||||
} finally {
|
||||
releaseLock(handle);
|
||||
}
|
||||
}
|
||||
|
||||
/** Refreshes one entity from the brain. Returns true on success. */
|
||||
export function refreshEntity(entityName: string, projectSlug: string | null): boolean {
|
||||
const entity = BRAIN_CACHE_ENTITIES[entityName];
|
||||
if (!entity) return false;
|
||||
|
||||
// Mark attempt
|
||||
const meta = loadMeta(entity.scope, projectSlug);
|
||||
meta.last_attempt = meta.last_attempt || {};
|
||||
meta.last_attempt[entityName] = Date.now();
|
||||
|
||||
// Fetch from brain. The actual fetch logic varies per entity — derived digests
|
||||
// (recent-decisions, salience) need different queries from direct page reads.
|
||||
// For T2a we implement the direct-page path; derived digests get filled in by
|
||||
// the resolver / write-back paths in later commits.
|
||||
const digestContent = fetchAndCompressEntity(entityName, projectSlug);
|
||||
if (digestContent === null) {
|
||||
saveMeta(entity.scope, projectSlug, meta);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Enforce per-entity budget by truncating from end (oldest items live there
|
||||
// by convention in our compressor). The per-skill budget is separately
|
||||
// enforced at preflight injection time.
|
||||
let final = digestContent;
|
||||
if (Buffer.byteLength(final, 'utf-8') > entity.budget_bytes) {
|
||||
final = truncateToBudget(final, entity.budget_bytes);
|
||||
}
|
||||
|
||||
atomicWrite(entityPath(entityName, projectSlug), final);
|
||||
meta.last_refresh[entityName] = Date.now();
|
||||
// Keep schema/endpoint identity fresh.
|
||||
meta.schema_version = GSTACK_SCHEMA_PACK_VERSION;
|
||||
meta.endpoint_hash = detectEndpointHash();
|
||||
saveMeta(entity.scope, projectSlug, meta);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh all entities for a scope (per-project or cross-project).
|
||||
* Used by --full and by schema/endpoint-change rebuilds.
|
||||
*/
|
||||
export function refreshAll(projectSlug: string | null): { success: number; failed: number } {
|
||||
let success = 0;
|
||||
let failed = 0;
|
||||
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
|
||||
// Cross-project entities only refresh when explicitly targeted via no-slug calls
|
||||
if (entity.scope === 'cross-project' && projectSlug) continue;
|
||||
if (entity.scope === 'per-project' && !projectSlug) continue;
|
||||
if (refreshEntity(name, projectSlug)) success++; else failed++;
|
||||
}
|
||||
return { success, failed };
|
||||
}
|
||||
|
||||
/** Rebuild on schema-version mismatch or endpoint switch. Wipes affected scope first. */
|
||||
function rebuildAllForScope(scope: 'cross-project' | 'per-project', projectSlug: string | null): void {
|
||||
// Wipe files but preserve dir; meta gets fully rewritten by refreshes below.
|
||||
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
|
||||
if (entity.scope !== scope) continue;
|
||||
const p = entityPath(name, projectSlug);
|
||||
if (existsSync(p)) {
|
||||
try { unlinkSync(p); } catch { /* best effort */ }
|
||||
}
|
||||
}
|
||||
// Fresh meta starts here
|
||||
const fresh: CacheMeta = {
|
||||
schema_version: GSTACK_SCHEMA_PACK_VERSION,
|
||||
endpoint_hash: detectEndpointHash(),
|
||||
last_refresh: {},
|
||||
last_attempt: {},
|
||||
};
|
||||
saveMeta(scope, projectSlug, fresh);
|
||||
// Refresh all entities in this scope
|
||||
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
|
||||
if (entity.scope !== scope) continue;
|
||||
refreshEntity(name, projectSlug);
|
||||
}
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: invalidate
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export function cmdInvalidate(entityName: string, projectSlug: string | null): void {
|
||||
const entity = BRAIN_CACHE_ENTITIES[entityName];
|
||||
if (!entity) throw new Error(`Unknown entity: ${entityName}`);
|
||||
const meta = loadMeta(entity.scope, projectSlug);
|
||||
delete meta.last_refresh[entityName];
|
||||
saveMeta(entity.scope, projectSlug, meta);
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Fetch + compress per-entity
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Returns the digest markdown content for an entity, or null if the brain is
|
||||
* unreachable / the source page doesn't exist.
|
||||
*
|
||||
* For T2a we implement the entity → page-slug mapping for the simple cases.
|
||||
* Derived digests (recent-decisions, salience) get specialized paths.
|
||||
*/
|
||||
function fetchAndCompressEntity(entityName: string, projectSlug: string | null): string | null {
|
||||
switch (entityName) {
|
||||
case 'user-profile':
|
||||
return fetchUserProfile();
|
||||
case 'product':
|
||||
return fetchProduct(projectSlug);
|
||||
case 'goals':
|
||||
return fetchGoals(projectSlug);
|
||||
case 'developer-persona':
|
||||
return fetchSimplePage(`gstack/developer-persona/${projectSlug}`);
|
||||
case 'brand':
|
||||
return fetchSimplePage(`gstack/brand/${projectSlug}`);
|
||||
case 'competitive-intel':
|
||||
return fetchSimplePage(`gstack/competitive-intel/${projectSlug}`);
|
||||
case 'recent-decisions':
|
||||
return fetchRecentDecisions(projectSlug);
|
||||
case 'salience':
|
||||
// D9 salience allowlist applied in T17 commit; T2a returns raw output for now.
|
||||
return fetchSalience(projectSlug);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Generic single-page fetch via `gbrain get`. Returns null on miss/unreachable. */
|
||||
function fetchSimplePage(slug: string): string | null {
|
||||
const result = spawnGbrain(['get', slug, '--json'], { timeout: 10_000 });
|
||||
if (result.status !== 0) return null;
|
||||
try {
|
||||
const page = JSON.parse(result.stdout) as { body?: string; title?: string };
|
||||
if (!page?.body) return null;
|
||||
return compressPage(slug, page.title || slug, page.body);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function fetchUserProfile(): string | null {
|
||||
// The user-slug discovery is implemented in T16 (D4 A3). For T2a we accept
|
||||
// env GSTACK_USER_SLUG as override, fallback to $USER for direct calls.
|
||||
const slug = process.env.GSTACK_USER_SLUG || process.env.USER || 'unknown';
|
||||
return fetchSimplePage(`gstack/user-profile/${slug}`);
|
||||
}
|
||||
|
||||
function fetchProduct(projectSlug: string | null): string | null {
|
||||
if (!projectSlug) return null;
|
||||
return fetchSimplePage(`gstack/product/${projectSlug}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Goals are LIST queries: all gstack/goal/<project>/* pages.
|
||||
* Compress the top N by recency.
|
||||
*/
|
||||
function fetchGoals(projectSlug: string | null): string | null {
|
||||
if (!projectSlug) return null;
|
||||
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string; body?: string }> }>([
|
||||
'list-pages',
|
||||
'--type', 'gstack/goal',
|
||||
'--limit', '10',
|
||||
'--json',
|
||||
]);
|
||||
if (!result?.pages) return null;
|
||||
const goals = result.pages.filter((p) => p.slug?.startsWith(`gstack/goal/${projectSlug}/`));
|
||||
if (goals.length === 0) {
|
||||
// Empty digest is valid (just header + 'no active goals' line)
|
||||
return `# Active goals (project: ${projectSlug})\n\n_No active goals recorded yet._\n`;
|
||||
}
|
||||
const lines = goals.map((g) => `- [[${g.slug}]] — ${g.title || '(untitled)'}`);
|
||||
return `# Active goals (project: ${projectSlug})\n\n${lines.join('\n')}\n`;
|
||||
}
|
||||
|
||||
/**
|
||||
* recent-decisions: last 5 gstack/skill-run pages for this project, compressed
|
||||
* to one-line summaries.
|
||||
*/
|
||||
function fetchRecentDecisions(projectSlug: string | null): string | null {
|
||||
if (!projectSlug) return null;
|
||||
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string }> }>([
|
||||
'list-pages',
|
||||
'--type', 'gstack/skill-run',
|
||||
'--limit', '5',
|
||||
'--sort', 'updated_desc',
|
||||
'--json',
|
||||
]);
|
||||
if (!result?.pages) {
|
||||
return `# Recent decisions (project: ${projectSlug})\n\n_No prior skill runs recorded._\n`;
|
||||
}
|
||||
const lines = result.pages.map((p) => `- ${p.title || p.slug}`);
|
||||
return `# Recent decisions (project: ${projectSlug})\n\n${lines.join('\n')}\n`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the user's salience allowlist override from gstack-config. If unset,
|
||||
* returns SALIENCE_DEFAULT_ALLOWLIST. The override is comma-separated; we
|
||||
* trim and drop empty entries.
|
||||
*/
|
||||
export function getSalienceAllowlist(): ReadonlyArray<string> {
|
||||
// Short-circuit via env var for tests + headless callers.
|
||||
const env = process.env.GSTACK_SALIENCE_ALLOWLIST;
|
||||
if (typeof env === 'string' && env.length > 0) {
|
||||
return env.split(',').map((s) => s.trim()).filter(Boolean);
|
||||
}
|
||||
// Shell out to gstack-config with a tight timeout. Falls back to defaults
|
||||
// on any failure (config script missing, command non-zero, parse error).
|
||||
try {
|
||||
const skillRoot = join(homedir(), '.claude', 'skills', 'gstack');
|
||||
const bin = join(skillRoot, 'bin', 'gstack-config');
|
||||
if (!existsSync(bin)) return SALIENCE_DEFAULT_ALLOWLIST;
|
||||
const result = spawnSync(bin, ['get', 'salience_allowlist'], { timeout: 2000, encoding: 'utf-8' });
|
||||
if (result.status !== 0 || !result.stdout) return SALIENCE_DEFAULT_ALLOWLIST;
|
||||
const trimmed = result.stdout.trim();
|
||||
if (!trimmed) return SALIENCE_DEFAULT_ALLOWLIST;
|
||||
const parts = trimmed.split(',').map((s) => s.trim()).filter(Boolean);
|
||||
return parts.length > 0 ? parts : SALIENCE_DEFAULT_ALLOWLIST;
|
||||
} catch {
|
||||
return SALIENCE_DEFAULT_ALLOWLIST;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* D9 salience privacy gate: returns true if the slug starts with any allowlisted
|
||||
* prefix. Anything NOT matching is stripped at digest write time so that family,
|
||||
* therapy, reflection, and other sensitive content never leaks into work-flow
|
||||
* planning prompts by default.
|
||||
*/
|
||||
export function isSalienceSlugAllowed(slug: string, allowlist: ReadonlyArray<string>): boolean {
|
||||
for (const prefix of allowlist) {
|
||||
if (slug.startsWith(prefix)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function fetchSalience(projectSlug: string | null): string | null {
|
||||
// get-recent-salience is a gbrain CLI sub-shape; we use the MCP-shape JSON
|
||||
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string; emotional_weight?: number }> }>([
|
||||
'get-recent-salience',
|
||||
'--days', '14',
|
||||
'--limit', '10',
|
||||
'--json',
|
||||
]);
|
||||
if (!result?.pages) return `# Recent salience\n\n_No salient pages in last 14d._\n`;
|
||||
|
||||
// D9 privacy gate: strip entries outside the allowlist BEFORE rendering.
|
||||
// Sensitive personal content (family, therapy, reflection) is never written
|
||||
// into the digest cache file, even when the brain itself ranks it salient.
|
||||
const allowlist = getSalienceAllowlist();
|
||||
const filtered = result.pages.filter((p) => p.slug && isSalienceSlugAllowed(p.slug, allowlist));
|
||||
const stripped = result.pages.length - filtered.length;
|
||||
if (filtered.length === 0) {
|
||||
const header = `# Recent salience (last 14d)`;
|
||||
const note = stripped > 0
|
||||
? `\n_All ${stripped} salient entries stripped by allowlist gate (no work-flow content in window)._\n`
|
||||
: `\n_No salient pages in last 14d._\n`;
|
||||
return `${header}\n${note}`;
|
||||
}
|
||||
const lines = filtered.map((p) => `- [[${p.slug}]] — ${p.title || ''} (weight: ${p.emotional_weight?.toFixed(2) ?? 'n/a'})`);
|
||||
const footer = stripped > 0
|
||||
? `\n\n_${stripped} private entries stripped by allowlist gate._`
|
||||
: '';
|
||||
return `# Recent salience (last 14d)\n\n${lines.join('\n')}${footer}\n`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compress a brain page body into a digest. The compressor keeps frontmatter
|
||||
* out, trims body to the first H2/H3 sections, and prepends a slug header.
|
||||
* Per-entity budget enforcement happens at the caller (refreshEntity).
|
||||
*/
|
||||
function compressPage(slug: string, title: string, body: string): string {
|
||||
const trimmed = body
|
||||
.replace(/^---[\s\S]*?---\s*\n/m, '') // strip frontmatter
|
||||
.trim();
|
||||
return `# ${title}\nslug: ${slug}\n\n${trimmed}\n`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate a digest to a byte budget. Tries to cut at the last newline before
|
||||
* the budget so the digest stays readable.
|
||||
*/
|
||||
function truncateToBudget(content: string, budgetBytes: number): string {
|
||||
const buf = Buffer.from(content, 'utf-8');
|
||||
if (buf.byteLength <= budgetBytes) return content;
|
||||
const truncated = buf.slice(0, budgetBytes).toString('utf-8');
|
||||
const lastNewline = truncated.lastIndexOf('\n');
|
||||
const cleanCut = lastNewline > budgetBytes * 0.8 ? truncated.slice(0, lastNewline) : truncated;
|
||||
return `${cleanCut}\n\n_(digest truncated to ${budgetBytes}-byte budget)_\n`;
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: digest
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Public: compress a brain page slug to digest format. Used by callers that
|
||||
* want to know what the digest WOULD look like without writing to cache.
|
||||
*/
|
||||
export function cmdDigest(slug: string): string | null {
|
||||
return fetchSimplePage(slug);
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: meta
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export function cmdMeta(projectSlug: string | null): CacheMeta {
|
||||
if (projectSlug) return loadMeta('per-project', projectSlug);
|
||||
return loadMeta('cross-project', null);
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: bootstrap (T2b)
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Bootstrap synthesizes draft entity content from CLAUDE.md + README +
|
||||
* recent commits + learnings.jsonl for a fresh project. Emits as JSON for
|
||||
* the caller (skill template) to AUQ-confirm before any write to the brain.
|
||||
*
|
||||
* This keeps the CLI pure (no AUQ logic) while preventing silent
|
||||
* auto-extraction garbage (D10 T4 fix). The agent is responsible for the
|
||||
* "Synthesized X — looks right?" prompt per entity.
|
||||
*/
|
||||
export interface BootstrapDraft {
|
||||
product?: { slug: string; title: string; body: string };
|
||||
goals?: Array<{ slug: string; title: string; body: string }>;
|
||||
developer_persona?: { slug: string; title: string; body: string };
|
||||
brand?: { slug: string; title: string; body: string };
|
||||
competitive_intel?: { slug: string; title: string; body: string };
|
||||
}
|
||||
|
||||
export function cmdBootstrap(projectSlug: string): BootstrapDraft {
|
||||
const draft: BootstrapDraft = {};
|
||||
const repoRoot = process.env.GSTACK_REPO_ROOT || process.cwd();
|
||||
|
||||
// Product synthesis: CLAUDE.md headline + README first paragraph
|
||||
let claudeMd = '';
|
||||
try { claudeMd = readFileSync(join(repoRoot, 'CLAUDE.md'), 'utf-8'); } catch { /* missing is fine */ }
|
||||
let readmeMd = '';
|
||||
try { readmeMd = readFileSync(join(repoRoot, 'README.md'), 'utf-8'); } catch { /* missing is fine */ }
|
||||
|
||||
const productLead = synthesizeProductLead(claudeMd, readmeMd, projectSlug);
|
||||
if (productLead) {
|
||||
draft.product = {
|
||||
slug: `gstack/product/${projectSlug}`,
|
||||
title: projectSlug,
|
||||
body: productLead,
|
||||
};
|
||||
}
|
||||
|
||||
// Goals: try learnings.jsonl + recent commit messages mentioning "goal" or "ship"
|
||||
const learningsPath = join(GSTACK_HOME, 'projects', projectSlug, 'learnings.jsonl');
|
||||
const goalsHints = synthesizeGoalsHints(learningsPath, repoRoot);
|
||||
if (goalsHints.length > 0) {
|
||||
draft.goals = goalsHints.slice(0, 3).map((hint, idx) => ({
|
||||
slug: `gstack/goal/${projectSlug}/bootstrap-${idx + 1}`,
|
||||
title: hint.title,
|
||||
body: hint.body,
|
||||
}));
|
||||
}
|
||||
|
||||
return draft;
|
||||
}
|
||||
|
||||
function synthesizeProductLead(claudeMd: string, readmeMd: string, slug: string): string | null {
|
||||
// First H1 in CLAUDE.md or README, plus first paragraph after it.
|
||||
const source = claudeMd || readmeMd;
|
||||
if (!source) return null;
|
||||
const h1Match = source.match(/^#\s+(.+)$/m);
|
||||
const heading = h1Match?.[1]?.trim() || slug;
|
||||
// First non-heading paragraph
|
||||
const paraMatch = source.match(/(?:^|\n)([^#\n][^\n]+(?:\n[^#\n][^\n]+)*)/);
|
||||
const lead = paraMatch?.[1]?.trim() || '(no description found in CLAUDE.md or README)';
|
||||
return [
|
||||
`# ${heading}`,
|
||||
'',
|
||||
'## What',
|
||||
lead.slice(0, 500),
|
||||
'',
|
||||
'## Stage',
|
||||
'(fill in current stage, e.g., v1.x shipped, in development, paused)',
|
||||
'',
|
||||
'## Team',
|
||||
'(fill in team composition + size)',
|
||||
'',
|
||||
'## Active goals',
|
||||
'(populated by /office-hours over time)',
|
||||
'',
|
||||
'## Recent decisions',
|
||||
'(populated by /plan-ceo-review over time)',
|
||||
'',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
function synthesizeGoalsHints(learningsPath: string, repoRoot: string): Array<{ title: string; body: string }> {
|
||||
const hints: Array<{ title: string; body: string }> = [];
|
||||
if (existsSync(learningsPath)) {
|
||||
try {
|
||||
const lines = readFileSync(learningsPath, 'utf-8').split('\n').filter(Boolean);
|
||||
for (const line of lines.slice(-10)) {
|
||||
try {
|
||||
const entry = JSON.parse(line);
|
||||
if (entry?.insight && (entry?.type === 'pattern' || entry?.type === 'architecture')) {
|
||||
hints.push({
|
||||
title: entry.insight.slice(0, 80),
|
||||
body: `Source: learnings.jsonl\nType: ${entry.type}\n\n${entry.insight}\n`,
|
||||
});
|
||||
}
|
||||
} catch { /* skip malformed line */ }
|
||||
}
|
||||
} catch { /* unreadable file, skip */ }
|
||||
}
|
||||
return hints;
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: list (T18)
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Lists all gstack-owned pages currently in the brain for a project, grouped
|
||||
* by type. Powers the user's ability to audit what gstack has written.
|
||||
*/
|
||||
export function cmdList(projectSlug: string | null): Array<{ type: string; slug: string; title?: string }> {
|
||||
// We probe each gstack/<type>/ namespace via list-pages with a type filter.
|
||||
const types = ['gstack/user-profile', 'gstack/product', 'gstack/goal', 'gstack/developer-persona', 'gstack/brand', 'gstack/competitive-intel', 'gstack/skill-run', 'gstack/take'];
|
||||
const all: Array<{ type: string; slug: string; title?: string }> = [];
|
||||
for (const type of types) {
|
||||
const result = execGbrainJson<{ pages?: Array<{ slug: string; title?: string }> }>([
|
||||
'list-pages',
|
||||
'--type', type,
|
||||
'--limit', '200',
|
||||
'--json',
|
||||
]);
|
||||
if (!result?.pages) continue;
|
||||
for (const page of result.pages) {
|
||||
if (projectSlug && !page.slug?.includes(`/${projectSlug}`) && type !== 'gstack/user-profile') {
|
||||
continue;
|
||||
}
|
||||
all.push({ type, slug: page.slug, title: page.title });
|
||||
}
|
||||
}
|
||||
return all;
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Subcommand: purge (T18)
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Delete one gstack-owned page from the brain. Caller (skill template) is
|
||||
* responsible for the confirm prompt; this is the raw operation.
|
||||
*/
|
||||
export function cmdPurge(slug: string): { deleted: boolean; error?: string } {
|
||||
if (!slug.startsWith('gstack/')) {
|
||||
return { deleted: false, error: 'refusing to purge non-gstack page' };
|
||||
}
|
||||
const result = spawnGbrain(['delete-page', slug], { timeout: 10_000 });
|
||||
if (result.status !== 0) {
|
||||
return { deleted: false, error: result.stderr?.trim() || `exit ${result.status}` };
|
||||
}
|
||||
// Also invalidate any cached digests that referenced this page.
|
||||
// Best-effort — derived digests may need explicit invalidate.
|
||||
return { deleted: true };
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// CLI dispatch
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
function parseArgs(argv: string[]): { cmd: string; positional: string[]; flags: Record<string, string | boolean> } {
|
||||
const cmd = argv[2] || '';
|
||||
const rest = argv.slice(3);
|
||||
const positional: string[] = [];
|
||||
const flags: Record<string, string | boolean> = {};
|
||||
for (let i = 0; i < rest.length; i++) {
|
||||
const arg = rest[i];
|
||||
if (arg.startsWith('--')) {
|
||||
const key = arg.slice(2);
|
||||
const next = rest[i + 1];
|
||||
if (next && !next.startsWith('--')) {
|
||||
flags[key] = next;
|
||||
i++;
|
||||
} else {
|
||||
flags[key] = true;
|
||||
}
|
||||
} else {
|
||||
positional.push(arg);
|
||||
}
|
||||
}
|
||||
return { cmd, positional, flags };
|
||||
}
|
||||
|
||||
function projectSlugFromFlag(flags: Record<string, string | boolean>): string | null {
|
||||
const v = flags.project;
|
||||
return typeof v === 'string' ? v : null;
|
||||
}
|
||||
|
||||
function printUsage(): void {
|
||||
process.stderr.write(`Usage: gstack-brain-cache <subcommand>
|
||||
|
||||
Subcommands:
|
||||
get <entity-name> [--project <slug>]
|
||||
refresh [--full] [--entity X] [--project <slug>]
|
||||
invalidate <entity-name> [--project <slug>]
|
||||
digest <entity-slug>
|
||||
meta [--project <slug>]
|
||||
bootstrap --project <slug> — emit synthesized entity drafts (JSON)
|
||||
list [--project <slug>] — list gstack-owned pages in brain
|
||||
purge <slug> — delete a gstack-owned brain page (refuses non-gstack/ slugs)
|
||||
`);
|
||||
}
|
||||
|
||||
async function main(): Promise<number> {
|
||||
const { cmd, positional, flags } = parseArgs(process.argv);
|
||||
const projectSlug = projectSlugFromFlag(flags);
|
||||
|
||||
try {
|
||||
switch (cmd) {
|
||||
case 'get': {
|
||||
const entityName = positional[0];
|
||||
if (!entityName) { printUsage(); return 1; }
|
||||
const result = cmdGet(entityName, projectSlug);
|
||||
if (result.state === 'missing') {
|
||||
process.stderr.write(`(${result.state}: ${result.message ?? 'no cache'})\n`);
|
||||
return 2;
|
||||
}
|
||||
if (result.state !== 'warm') {
|
||||
process.stderr.write(`(${result.state}${result.message ? ': ' + result.message : ''})\n`);
|
||||
}
|
||||
process.stdout.write(readFileSync(result.path, 'utf-8'));
|
||||
return 0;
|
||||
}
|
||||
case 'refresh': {
|
||||
// D3: dedup concurrent refreshes via lockfile. Skipped (dedup) when
|
||||
// another process is already mid-refresh on the same project.
|
||||
if (flags.entity) {
|
||||
const entityName = String(flags.entity);
|
||||
const result = withRefreshLock(projectSlug, () => refreshEntity(entityName, projectSlug));
|
||||
if (result === 'dedup') {
|
||||
process.stderr.write(`(dedup: another refresh in flight)\n`);
|
||||
return 3;
|
||||
}
|
||||
process.stdout.write(result ? `refreshed ${entityName}\n` : `failed to refresh ${entityName}\n`);
|
||||
return result ? 0 : 1;
|
||||
}
|
||||
const allResult = withRefreshLock(projectSlug, () => refreshAll(projectSlug));
|
||||
if (allResult === 'dedup') {
|
||||
process.stderr.write(`(dedup: another refresh in flight)\n`);
|
||||
return 3;
|
||||
}
|
||||
process.stdout.write(`refreshed=${allResult.success} failed=${allResult.failed}\n`);
|
||||
return allResult.failed > 0 ? 1 : 0;
|
||||
}
|
||||
case 'invalidate': {
|
||||
const entityName = positional[0];
|
||||
if (!entityName) { printUsage(); return 1; }
|
||||
cmdInvalidate(entityName, projectSlug);
|
||||
process.stdout.write(`invalidated ${entityName}\n`);
|
||||
return 0;
|
||||
}
|
||||
case 'digest': {
|
||||
const slug = positional[0];
|
||||
if (!slug) { printUsage(); return 1; }
|
||||
const content = cmdDigest(slug);
|
||||
if (content === null) {
|
||||
process.stderr.write('brain unreachable or page not found\n');
|
||||
return 2;
|
||||
}
|
||||
process.stdout.write(content);
|
||||
return 0;
|
||||
}
|
||||
case 'meta': {
|
||||
const meta = cmdMeta(projectSlug);
|
||||
process.stdout.write(JSON.stringify(meta, null, 2) + '\n');
|
||||
return 0;
|
||||
}
|
||||
case 'bootstrap': {
|
||||
if (!projectSlug) {
|
||||
process.stderr.write('bootstrap requires --project <slug>\n');
|
||||
return 1;
|
||||
}
|
||||
const draft = cmdBootstrap(projectSlug);
|
||||
process.stdout.write(JSON.stringify(draft, null, 2) + '\n');
|
||||
return 0;
|
||||
}
|
||||
case 'list': {
|
||||
const pages = cmdList(projectSlug);
|
||||
if (flags.json) {
|
||||
process.stdout.write(JSON.stringify(pages, null, 2) + '\n');
|
||||
} else {
|
||||
for (const p of pages) {
|
||||
process.stdout.write(`${p.type}\t${p.slug}\t${p.title ?? ''}\n`);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
case 'purge': {
|
||||
const slug = positional[0];
|
||||
if (!slug) { printUsage(); return 1; }
|
||||
const result = cmdPurge(slug);
|
||||
if (result.deleted) {
|
||||
process.stdout.write(`deleted ${slug}\n`);
|
||||
return 0;
|
||||
}
|
||||
process.stderr.write(`failed: ${result.error}\n`);
|
||||
return 1;
|
||||
}
|
||||
case '':
|
||||
case 'help':
|
||||
case '--help':
|
||||
case '-h':
|
||||
printUsage();
|
||||
return 0;
|
||||
default:
|
||||
process.stderr.write(`unknown subcommand: ${cmd}\n`);
|
||||
printUsage();
|
||||
return 1;
|
||||
}
|
||||
} catch (err) {
|
||||
process.stderr.write(`error: ${err instanceof Error ? err.message : String(err)}\n`);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Only run main when invoked as a script (not when imported by tests)
|
||||
if (import.meta.main) {
|
||||
main().then((code) => process.exit(code));
|
||||
}
|
||||
@@ -192,7 +192,10 @@ function resolveSkillFile(args: CliArgs): string | null {
|
||||
|
||||
function gbrainAvailable(): boolean {
|
||||
try {
|
||||
execFileSync("command", ["-v", "gbrain"], { stdio: "ignore" });
|
||||
execFileSync("gbrain", ["--version"], {
|
||||
stdio: "ignore",
|
||||
timeout: MCP_TIMEOUT_MS,
|
||||
});
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
|
||||
+57
-12
@@ -136,7 +136,11 @@ def load_privacy_map(path):
|
||||
|
||||
allowlist_globs = load_lines(allowlist_path)
|
||||
privacy_map = load_privacy_map(privacy_path)
|
||||
skip_lines = set(load_lines(skip_path))
|
||||
# Normalize skip entries to the POSIX form queued paths use, so a backslash
|
||||
# entry in .brain-skip.txt still matches on Windows. The drain is the safety
|
||||
# boundary that actually stages files, so it must normalize identically to
|
||||
# discover_new — otherwise an explicitly-skipped file gets committed.
|
||||
skip_lines = {s.replace(os.sep, "/") for s in load_lines(skip_path)}
|
||||
|
||||
# Read queue; collect unique file paths.
|
||||
queue_paths = set()
|
||||
@@ -253,6 +257,8 @@ subcmd_once() {
|
||||
|
||||
# Stage with git add -f (forces past .gitignore=*) explicit paths only.
|
||||
while IFS= read -r p; do
|
||||
p="${p%$'\r'}" # Windows: compute_paths_to_stage's python print() emits CRLF;
|
||||
# a trailing CR makes the pathspec match nothing (silent no-stage).
|
||||
[ -z "$p" ] && continue
|
||||
git -C "$GSTACK_HOME" add -f -- "$p" 2>/dev/null || true
|
||||
done < "$paths_file"
|
||||
@@ -376,10 +382,13 @@ subcmd_discover_new() {
|
||||
exit 0
|
||||
fi
|
||||
# Walk allowlist globs; enqueue any file where mtime+size differs from cursor.
|
||||
python3 - "$GSTACK_HOME" "$ALLOWLIST" "$DISCOVER_CURSOR" "$SCRIPT_DIR/gstack-brain-enqueue" <<'PYEOF' 2>/dev/null || true
|
||||
import sys, os, json, glob, fnmatch, subprocess, hashlib
|
||||
python3 - "$GSTACK_HOME" "$ALLOWLIST" "$DISCOVER_CURSOR" <<'PYEOF' 2>/dev/null || true
|
||||
import sys, os, json, fnmatch
|
||||
from datetime import datetime, timezone
|
||||
|
||||
gstack_home, allowlist_path, cursor_path, enqueue_bin = sys.argv[1:5]
|
||||
gstack_home, allowlist_path, cursor_path = sys.argv[1:4]
|
||||
queue_path = os.path.join(gstack_home, ".brain-queue.jsonl")
|
||||
skip_path = os.path.join(gstack_home, ".brain-skip.txt")
|
||||
|
||||
def load_lines(path):
|
||||
try:
|
||||
@@ -403,8 +412,12 @@ def save_cursor(path, data):
|
||||
pass
|
||||
|
||||
allowlist = load_lines(allowlist_path)
|
||||
# Normalize skip entries to the same POSIX form as `rel` below, so a
|
||||
# backslash entry in .brain-skip.txt still matches a normalized path on Windows.
|
||||
skip = {s.replace(os.sep, "/") for s in load_lines(skip_path)}
|
||||
cursor = load_cursor(cursor_path)
|
||||
new_cursor = dict(cursor)
|
||||
to_enqueue = []
|
||||
|
||||
# Walk all files under gstack_home, match against allowlist.
|
||||
for root, dirs, files in os.walk(gstack_home):
|
||||
@@ -413,22 +426,54 @@ for root, dirs, files in os.walk(gstack_home):
|
||||
continue
|
||||
for name in files:
|
||||
full = os.path.join(root, name)
|
||||
rel = os.path.relpath(full, gstack_home)
|
||||
# Repo paths are POSIX-relative. os.path.relpath yields backslash
|
||||
# separators on Windows, which never match the forward-slash allowlist
|
||||
# globs (e.g. "projects/*/learnings.jsonl"), so discovery silently
|
||||
# enqueued nothing under projects/ on Windows. Normalize to "/".
|
||||
rel = os.path.relpath(full, gstack_home).replace(os.sep, "/")
|
||||
if rel.startswith(".brain-"):
|
||||
continue
|
||||
matched = any(fnmatch.fnmatchcase(rel, pat) for pat in allowlist)
|
||||
if not matched:
|
||||
if not any(fnmatch.fnmatchcase(rel, pat) for pat in allowlist):
|
||||
continue
|
||||
if rel in skip:
|
||||
continue
|
||||
try:
|
||||
st = os.stat(full)
|
||||
key = f"{int(st.st_mtime)}:{st.st_size}"
|
||||
except OSError:
|
||||
continue
|
||||
prev = cursor.get(rel)
|
||||
if prev != key:
|
||||
# Enqueue via the shim (respects sync mode + skip list).
|
||||
subprocess.run([enqueue_bin, rel], check=False)
|
||||
new_cursor[rel] = key
|
||||
if cursor.get(rel) != key:
|
||||
to_enqueue.append((rel, key))
|
||||
|
||||
# Append to the queue directly. The previous implementation shelled out to
|
||||
# gstack-brain-enqueue once per file, but Windows Python cannot exec a
|
||||
# bash-shebang script (the spawn fails with a fork error), so discovery
|
||||
# enqueued nothing on Windows even after the path-match fix above.
|
||||
# Writing the queue line here is platform-agnostic; the drain step
|
||||
# (compute_paths_to_stage) still re-applies the skip-list + privacy filters.
|
||||
if to_enqueue:
|
||||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
try:
|
||||
# One atomic append per record (O_APPEND, each line < PIPE_BUF), matching
|
||||
# gstack-brain-enqueue's concurrency contract so a writer-shim append
|
||||
# running in parallel can't interleave mid-record. Buffered text writes
|
||||
# don't guarantee that. Compact separators match the shim's JSON shape.
|
||||
fd = os.open(queue_path, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o644)
|
||||
try:
|
||||
for rel, key in to_enqueue:
|
||||
rec = json.dumps({"file": rel, "ts": ts}, separators=(",", ":"))
|
||||
os.write(fd, (rec + "\n").encode("utf-8"))
|
||||
finally:
|
||||
os.close(fd)
|
||||
except OSError:
|
||||
# Queue write failed (disk full, AV file lock). Leave the cursor
|
||||
# unadvanced so these files are retried on the next discover instead of
|
||||
# being silently recorded as synced (which loses the change until the
|
||||
# file next changes).
|
||||
to_enqueue = []
|
||||
# Advance the cursor only for records actually written.
|
||||
for rel, key in to_enqueue:
|
||||
new_cursor[rel] = key
|
||||
|
||||
save_cursor(cursor_path, new_cursor)
|
||||
PYEOF
|
||||
|
||||
Executable
+223
@@ -0,0 +1,223 @@
|
||||
#!/usr/bin/env bash
|
||||
# gstack-codex-session-import — backfill question-log.jsonl from Codex sessions.
|
||||
#
|
||||
# Codex has no AskUserQuestion tool (per docs/spikes/codex-session-format.md).
|
||||
# gstack skills running on Codex emit Decision Briefs as plain agent_message
|
||||
# text, and the user's response shows up in the next user_message. This
|
||||
# importer reconstructs those question/answer pairs from the structured
|
||||
# JSONL session files at ~/.codex/sessions/<date>/.
|
||||
#
|
||||
# Usage:
|
||||
# gstack-codex-session-import # latest session under ~/.codex/sessions/
|
||||
# gstack-codex-session-import <path/to.jsonl> # explicit session file
|
||||
# gstack-codex-session-import --since <iso> # all sessions newer than <iso>
|
||||
#
|
||||
# Recovery strategy (two-tier per D5/T4 spike):
|
||||
# 1. Marker-first: extract <gstack-qid:foo-bar> from agent_message → stable id.
|
||||
# 2. Pattern fallback: detect D<N> header + numbered options → hash id
|
||||
# (source=codex-import-pattern, never used as preference key per D18).
|
||||
#
|
||||
# Writes via bin/gstack-question-log so source tagging, dedup, and async
|
||||
# derive all apply uniformly.
|
||||
set -euo pipefail
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
|
||||
CODEX_SESSIONS_ROOT="${CODEX_SESSIONS_ROOT:-$HOME/.codex/sessions}"
|
||||
|
||||
MODE="latest"
|
||||
EXPLICIT_PATH=""
|
||||
SINCE_ISO=""
|
||||
|
||||
if [ $# -gt 0 ]; then
|
||||
case "$1" in
|
||||
--since)
|
||||
MODE="since"
|
||||
SINCE_ISO="${2:-}"
|
||||
;;
|
||||
--help|-h)
|
||||
sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||'
|
||||
exit 0
|
||||
;;
|
||||
-*)
|
||||
echo "unknown flag: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
MODE="explicit"
|
||||
EXPLICIT_PATH="$1"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Resolve list of session files to process.
|
||||
SESSION_FILES=()
|
||||
case "$MODE" in
|
||||
explicit)
|
||||
if [ ! -f "$EXPLICIT_PATH" ]; then
|
||||
echo "gstack-codex-session-import: file not found: $EXPLICIT_PATH" >&2
|
||||
exit 1
|
||||
fi
|
||||
SESSION_FILES=("$EXPLICIT_PATH")
|
||||
;;
|
||||
latest)
|
||||
if [ ! -d "$CODEX_SESSIONS_ROOT" ]; then
|
||||
echo "NO_SESSIONS: $CODEX_SESSIONS_ROOT does not exist"
|
||||
exit 0
|
||||
fi
|
||||
LATEST=$(find "$CODEX_SESSIONS_ROOT" -type f -name "rollout-*.jsonl" -print 2>/dev/null \
|
||||
| xargs ls -t 2>/dev/null | head -1 || true)
|
||||
if [ -z "$LATEST" ]; then
|
||||
echo "NO_SESSIONS: no rollout-*.jsonl files under $CODEX_SESSIONS_ROOT"
|
||||
exit 0
|
||||
fi
|
||||
SESSION_FILES=("$LATEST")
|
||||
;;
|
||||
since)
|
||||
if [ -z "$SINCE_ISO" ]; then
|
||||
echo "--since requires an ISO 8601 timestamp" >&2
|
||||
exit 1
|
||||
fi
|
||||
while IFS= read -r f; do
|
||||
SESSION_FILES+=("$f")
|
||||
done < <(find "$CODEX_SESSIONS_ROOT" -type f -name "rollout-*.jsonl" -newer <(date -u -d "$SINCE_ISO" 2>/dev/null || date -u) 2>/dev/null)
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ ${#SESSION_FILES[@]} -eq 0 ]; then
|
||||
echo "NO_SESSIONS: nothing to import"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Parse + extract via bun. Emits one line per question found, ready to pipe
|
||||
# into gstack-question-log. Tagged with source so downstream consumers
|
||||
# (/plan-tune stats, dream cycle) can distinguish backfilled events from
|
||||
# live captures.
|
||||
IMPORTED=0
|
||||
SKIPPED_NO_ANSWER=0
|
||||
|
||||
for SESSION_FILE in "${SESSION_FILES[@]}"; do
|
||||
COUNT_LINE=$(SESSION_FILE_PATH="$SESSION_FILE" QLOG_BIN="$SCRIPT_DIR/gstack-question-log" bun -e '
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { spawnSync } = require("child_process");
|
||||
const crypto = require("crypto");
|
||||
|
||||
const sessionPath = process.env.SESSION_FILE_PATH;
|
||||
const qlogBin = process.env.QLOG_BIN;
|
||||
const lines = fs.readFileSync(sessionPath, "utf-8").trim().split("\n").filter(Boolean);
|
||||
|
||||
let meta = null;
|
||||
const stream = [];
|
||||
for (const ln of lines) {
|
||||
try {
|
||||
const e = JSON.parse(ln);
|
||||
if (e.type === "session_meta") meta = e.payload;
|
||||
else stream.push(e);
|
||||
} catch {}
|
||||
}
|
||||
if (!meta) {
|
||||
console.error("WARN: no session_meta in " + sessionPath);
|
||||
console.log("0 0");
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const cwd = meta.cwd || "";
|
||||
const sessionId = (meta.id || path.basename(sessionPath)).slice(0, 64);
|
||||
|
||||
// Walk for agent_message → next user_message pairs.
|
||||
const briefs = [];
|
||||
for (let i = 0; i < stream.length; i++) {
|
||||
const e = stream[i];
|
||||
if (e.type !== "event_msg" || e.payload?.type !== "agent_message") continue;
|
||||
const text = String(e.payload?.message || "");
|
||||
if (!text) continue;
|
||||
// Detect D-numbered brief or marker. Markers are sufficient on their own.
|
||||
const markerMatch = text.match(/<gstack-qid:([a-z0-9-]{1,64})>/i);
|
||||
const dMatch = text.match(/^D\d+[\.\d]*\s*[—\-]\s*(.+?)$/m);
|
||||
if (!markerMatch && !dMatch) continue;
|
||||
|
||||
// Find the next user_message in the stream.
|
||||
let answer = null;
|
||||
for (let j = i + 1; j < stream.length; j++) {
|
||||
const e2 = stream[j];
|
||||
if (e2.type === "event_msg" && e2.payload?.type === "user_message") {
|
||||
answer = String(e2.payload?.message || "").trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!answer) continue;
|
||||
|
||||
// Extract options A) ... B) ... from the brief.
|
||||
const optMatches = [...text.matchAll(/^([A-Z])\)\s+(.+?)(?:\s+\(recommended\))?$/gm)];
|
||||
const options = optMatches.map((m) => m[2].trim());
|
||||
|
||||
// Identify recommended option (label first, prose fallback).
|
||||
let recommended;
|
||||
const recLabel = [...text.matchAll(/^([A-Z])\)\s+(.+?)\s+\(recommended\)$/gm)];
|
||||
if (recLabel.length === 1) recommended = recLabel[0][2].trim();
|
||||
|
||||
// Identify which option the user picked from their answer.
|
||||
// Look for "A" / "A) ..." / option-label prefix match.
|
||||
let userChoice = "__unknown__";
|
||||
const letterMatch = answer.match(/^\s*([A-Z])\b/);
|
||||
if (letterMatch) {
|
||||
const idx = letterMatch[1].charCodeAt(0) - 65;
|
||||
if (idx >= 0 && idx < options.length) userChoice = options[idx];
|
||||
else userChoice = letterMatch[1];
|
||||
} else if (options.length > 0) {
|
||||
const lower = answer.toLowerCase();
|
||||
const m = options.find((o) => lower.includes(o.toLowerCase().slice(0, 12)));
|
||||
if (m) userChoice = m;
|
||||
}
|
||||
if (userChoice === "__unknown__") {
|
||||
userChoice = answer.slice(0, 64);
|
||||
}
|
||||
|
||||
const summary = (dMatch?.[1] || text.split("\n")[0]).slice(0, 200);
|
||||
|
||||
let questionId, source;
|
||||
if (markerMatch) {
|
||||
questionId = markerMatch[1];
|
||||
source = "codex-import-marker";
|
||||
} else {
|
||||
const sortedOpts = [...options].sort().join("|");
|
||||
const h = crypto.createHash("sha1").update("codex::" + summary + "::" + sortedOpts).digest("hex").slice(0, 10);
|
||||
questionId = "hook-" + h;
|
||||
source = "codex-import-pattern";
|
||||
}
|
||||
|
||||
briefs.push({
|
||||
skill: "codex",
|
||||
question_id: questionId,
|
||||
question_summary: summary,
|
||||
options_count: options.length || 1,
|
||||
user_choice: userChoice.slice(0, 64),
|
||||
...(recommended ? { recommended: recommended.slice(0, 64) } : {}),
|
||||
source,
|
||||
session_id: sessionId,
|
||||
// Use ts_nanos+ts shape from the event itself if available; else null.
|
||||
ts: e.timestamp || undefined,
|
||||
});
|
||||
}
|
||||
|
||||
let imported = 0;
|
||||
for (const b of briefs) {
|
||||
const res = spawnSync(qlogBin, [JSON.stringify(b)], {
|
||||
encoding: "utf-8",
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
// Run from the originating cwd so gstack-slug bucks events into the
|
||||
// right project. Falls back to the importer cwd if the session cwd
|
||||
// no longer exists.
|
||||
cwd: cwd && fs.existsSync(cwd) ? cwd : undefined,
|
||||
timeout: 5000,
|
||||
});
|
||||
if (res.status === 0) imported++;
|
||||
}
|
||||
console.log(imported + " 0");
|
||||
' 2>&1)
|
||||
|
||||
IMP=$(echo "$COUNT_LINE" | awk "{print \$1}")
|
||||
IMPORTED=$((IMPORTED + IMP))
|
||||
done
|
||||
|
||||
echo "IMPORTED: $IMPORTED events from ${#SESSION_FILES[@]} session(s)"
|
||||
+267
-16
@@ -8,11 +8,13 @@
|
||||
# gstack-config defaults — show just the defaults table
|
||||
#
|
||||
# Env overrides (for testing):
|
||||
# GSTACK_STATE_ROOT — override ~/.gstack state directory (highest priority,
|
||||
# matches D16 cathedral isolation convention)
|
||||
# GSTACK_HOME — override ~/.gstack state directory (aligns with writer scripts)
|
||||
# GSTACK_STATE_DIR — legacy alias for GSTACK_HOME (kept for backwards compat)
|
||||
set -euo pipefail
|
||||
|
||||
STATE_DIR="${GSTACK_HOME:-${GSTACK_STATE_DIR:-$HOME/.gstack}}"
|
||||
STATE_DIR="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-${GSTACK_STATE_DIR:-$HOME/.gstack}}}"
|
||||
CONFIG_FILE="$STATE_DIR/config.yaml"
|
||||
|
||||
# Annotated header for new config files. Written once on first `set`.
|
||||
@@ -73,8 +75,27 @@ CONFIG_HEADER='# gstack configuration — edit freely, changes take effect on ne
|
||||
# # Set to true once the privacy gate has asked the user.
|
||||
# # Flip back to false to be re-prompted.
|
||||
#
|
||||
# ─── Plan-tune hooks ─────────────────────────────────────────────────
|
||||
# plan_tune_hooks: prompt # Controls whether ./setup installs the plan-tune
|
||||
# # Claude Code hooks (PostToolUse capture +
|
||||
# # PreToolUse preference enforcement).
|
||||
# # prompt — ask on a real TTY, skip otherwise (default)
|
||||
# # yes — install non-interactively
|
||||
# # no — skip non-interactively
|
||||
# # Override per-run: ./setup --plan-tune-hooks /
|
||||
# # --no-plan-tune-hooks, or env GSTACK_PLAN_TUNE_HOOKS.
|
||||
#
|
||||
# ─── Advanced ────────────────────────────────────────────────────────
|
||||
# codex_reviews: enabled # disabled = skip Codex adversarial reviews in /ship
|
||||
# codex_reviews: enabled # Master switch for Codex cross-model review. enabled =
|
||||
# # Codex runs as a standard step in /review, /ship,
|
||||
# # /document-release, plan reviews, and /autoplan (auto
|
||||
# # falls back to a Claude subagent if Codex is missing or
|
||||
# # not authenticated). disabled = skip all Codex passes.
|
||||
# # Asymmetry on disabled: diff-review (/review, /ship) still
|
||||
# # runs the free Claude adversarial subagent; plan-review and
|
||||
# # /document-release skip the outside-voice step entirely.
|
||||
# # An invalid value is REJECTED (existing value preserved) so
|
||||
# # a typo cannot silently turn paid Codex calls on or off.
|
||||
# gstack_contributor: false # true = file field reports when gstack misbehaves
|
||||
# skip_eng_review: false # true = skip eng review gate in /ship (not recommended)
|
||||
#
|
||||
@@ -100,6 +121,7 @@ lookup_default() {
|
||||
skill_prefix) echo "false" ;;
|
||||
checkpoint_mode) echo "explicit" ;;
|
||||
checkpoint_push) echo "false" ;;
|
||||
explain_level) echo "default" ;;
|
||||
codex_reviews) echo "enabled" ;;
|
||||
gstack_contributor) echo "false" ;;
|
||||
skip_eng_review) echo "false" ;;
|
||||
@@ -107,19 +129,145 @@ lookup_default() {
|
||||
cross_project_learnings) echo "" ;; # intentionally empty → unset triggers first-time prompt
|
||||
artifacts_sync_mode) echo "off" ;;
|
||||
artifacts_sync_mode_prompted) echo "false" ;;
|
||||
plan_tune_hooks) echo "prompt" ;; # prompt | yes | no — controls ./setup plan-tune hook install
|
||||
|
||||
redact_repo_visibility) echo "" ;; # empty → fall through to gh/glab detection
|
||||
redact_prepush_hook) echo "false" ;;
|
||||
# Brain-aware planning (v1.48 / T5+T10+T16). Defaults documented inline:
|
||||
# brain_trust_policy@<hash> — unset on fresh install; setup-gbrain
|
||||
# writes 'personal' for local engines,
|
||||
# asks the user for remote-ambiguous.
|
||||
# salience_allowlist — empty falls through to
|
||||
# SALIENCE_DEFAULT_ALLOWLIST (D9).
|
||||
# user_slug_at_<hash> — empty triggers resolve-user-slug
|
||||
# fallback chain (D4 A3) on first call.
|
||||
brain_trust_policy*) echo "unset" ;;
|
||||
salience_allowlist) echo "" ;;
|
||||
user_slug_at_*) echo "" ;;
|
||||
*) echo "" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Brain-integration helpers (T5+T10+T16)
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
# Compute sha8 of a string. Used for endpoint hashing.
|
||||
sha8_of() {
|
||||
printf '%s' "$1" | shasum -a 256 | cut -c1-8
|
||||
}
|
||||
|
||||
# Detect the active brain endpoint hash. Reads ~/.claude.json for the gbrain
|
||||
# MCP server URL. Falls back to the literal 'local' when no MCP is configured.
|
||||
endpoint_hash() {
|
||||
_claude_json="$HOME/.claude.json"
|
||||
if [ -f "$_claude_json" ] && command -v jq >/dev/null 2>&1; then
|
||||
_url=$(jq -r '.mcpServers.gbrain.url // .mcpServers.gbrain.transport.url // empty' "$_claude_json" 2>/dev/null)
|
||||
if [ -n "$_url" ] && [ "$_url" != "null" ]; then
|
||||
sha8_of "$_url"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
printf '%s' "local"
|
||||
}
|
||||
|
||||
# Detect endpoint hash collisions. When two distinct endpoints share the same
|
||||
# sha8 prefix (rare but possible), escalate to sha16 by emitting the longer
|
||||
# hash. Detection: scan config file for existing brain_trust_policy@<hash> or
|
||||
# user_slug_at_<hash> keys; if any non-active hash equals the active sha8 but
|
||||
# would differ at sha16, the active endpoint needs sha16.
|
||||
endpoint_hash_with_collision_check() {
|
||||
_active=$(endpoint_hash)
|
||||
if [ "$_active" = "local" ]; then
|
||||
printf '%s' "$_active"
|
||||
return 0
|
||||
fi
|
||||
# If a different endpoint (different URL) shares this sha8, escalate.
|
||||
# We only catch this when the config has another endpoint recorded.
|
||||
_matching=$(grep -E "^(brain_trust_policy|user_slug_at)@${_active}" "$CONFIG_FILE" 2>/dev/null | head -1 || true)
|
||||
_claude_json="$HOME/.claude.json"
|
||||
if [ -n "$_matching" ] && [ -f "$_claude_json" ] && command -v jq >/dev/null 2>&1; then
|
||||
_url=$(jq -r '.mcpServers.gbrain.url // .mcpServers.gbrain.transport.url // empty' "$_claude_json" 2>/dev/null)
|
||||
_sha16=$(printf '%s' "$_url" | shasum -a 256 | cut -c1-16)
|
||||
# Look for any sha16-namespaced key that conflicts. If a stored sha16 exists
|
||||
# and differs from current sha16, that's the collision evidence; emit sha16.
|
||||
_stored16=$(grep -E "^(brain_trust_policy|user_slug_at)@${_sha16}" "$CONFIG_FILE" 2>/dev/null | head -1 || true)
|
||||
if [ -n "$_stored16" ]; then
|
||||
printf '%s' "$_sha16"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
printf '%s' "$_active"
|
||||
}
|
||||
|
||||
# Resolve the user-slug per D4 A3 chain:
|
||||
# 1. mcp__gbrain__whoami.client_name (best effort via gbrain CLI shell-out)
|
||||
# 2. $USER env
|
||||
# 3. sha8($(git config user.email))
|
||||
# 4. anonymous-<sha8(hostname)>
|
||||
# Persists result via gstack-config set user_slug_at_<endpoint-hash> on first call.
|
||||
resolve_user_slug() {
|
||||
_hash=$(endpoint_hash_with_collision_check)
|
||||
_stored=$(grep -E "^user_slug_at_${_hash}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
|
||||
if [ -n "$_stored" ]; then
|
||||
printf '%s' "$_stored"
|
||||
return 0
|
||||
fi
|
||||
|
||||
_slug=""
|
||||
|
||||
# Layer 1: gbrain whoami
|
||||
if command -v gbrain >/dev/null 2>&1; then
|
||||
_whoami=$(gbrain whoami --json 2>/dev/null || true)
|
||||
if [ -n "$_whoami" ] && command -v jq >/dev/null 2>&1; then
|
||||
_client_name=$(printf '%s' "$_whoami" | jq -r '.client_name // .token_name // empty' 2>/dev/null || true)
|
||||
if [ -n "$_client_name" ] && [ "$_client_name" != "null" ]; then
|
||||
_slug=$(printf '%s' "$_client_name" | tr '[:upper:] ' '[:lower:]-' | tr -dc '[:alnum:]-')
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Layer 2: $USER
|
||||
if [ -z "$_slug" ] && [ -n "${USER:-}" ]; then
|
||||
_slug=$(printf '%s' "$USER" | tr '[:upper:] ' '[:lower:]-' | tr -dc '[:alnum:]-')
|
||||
fi
|
||||
|
||||
# Layer 3: sha8 of git email
|
||||
if [ -z "$_slug" ]; then
|
||||
_email=$(git config user.email 2>/dev/null || true)
|
||||
if [ -n "$_email" ]; then
|
||||
_slug="email-$(sha8_of "$_email")"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Layer 4: anonymous-<sha8(hostname)>
|
||||
if [ -z "$_slug" ]; then
|
||||
_slug="anonymous-$(sha8_of "$(hostname 2>/dev/null || echo unknown)")"
|
||||
fi
|
||||
|
||||
# Persist via direct file write (avoid recursion into gstack-config set)
|
||||
mkdir -p "$STATE_DIR"
|
||||
if [ ! -f "$CONFIG_FILE" ]; then
|
||||
printf '%s' "$CONFIG_HEADER" > "$CONFIG_FILE"
|
||||
fi
|
||||
if ! grep -qE "^user_slug_at_${_hash}:" "$CONFIG_FILE" 2>/dev/null; then
|
||||
echo "user_slug_at_${_hash}: ${_slug}" >> "$CONFIG_FILE"
|
||||
fi
|
||||
|
||||
printf '%s' "$_slug"
|
||||
}
|
||||
|
||||
case "${1:-}" in
|
||||
get)
|
||||
KEY="${2:?Usage: gstack-config get <key>}"
|
||||
# Validate key (alphanumeric + underscore only)
|
||||
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+$'; then
|
||||
echo "Error: key must contain only alphanumeric characters and underscores" >&2
|
||||
# Validate key (alphanumeric + underscore + optional @<hash> suffix for
|
||||
# endpoint-namespaced keys introduced by the brain-aware planning layer)
|
||||
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+(@[a-f0-9]+)?$'; then
|
||||
echo "Error: key must contain only alphanumeric characters, underscores, and an optional @<hex-hash> suffix" >&2
|
||||
exit 1
|
||||
fi
|
||||
VALUE=$(grep -E "^${KEY}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
|
||||
# Use literal match for keys containing @ (sha hashes), regex otherwise
|
||||
VALUE=$(grep -F "${KEY}:" "$CONFIG_FILE" 2>/dev/null | grep -E "^${KEY%@*}(@[a-f0-9]+)?:" | grep -F "${KEY}:" | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
|
||||
if [ -z "$VALUE" ]; then
|
||||
VALUE=$(lookup_default "$KEY")
|
||||
fi
|
||||
@@ -128,11 +276,17 @@ case "${1:-}" in
|
||||
set)
|
||||
KEY="${2:?Usage: gstack-config set <key> <value>}"
|
||||
VALUE="${3:?Usage: gstack-config set <key> <value>}"
|
||||
# Validate key (alphanumeric + underscore only)
|
||||
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+$'; then
|
||||
echo "Error: key must contain only alphanumeric characters and underscores" >&2
|
||||
# Validate key (alphanumeric + underscore + optional @<hash> suffix)
|
||||
if ! printf '%s' "$KEY" | grep -qE '^[a-zA-Z0-9_]+(@[a-f0-9]+)?$'; then
|
||||
echo "Error: key must contain only alphanumeric characters, underscores, and an optional @<hex-hash> suffix" >&2
|
||||
exit 1
|
||||
fi
|
||||
# Validate brain_trust_policy value domain (D4 / D11)
|
||||
if printf '%s' "$KEY" | grep -qE '^brain_trust_policy(@|$)' && \
|
||||
[ "$VALUE" != "personal" ] && [ "$VALUE" != "shared" ] && [ "$VALUE" != "unset" ]; then
|
||||
echo "Warning: brain_trust_policy '$VALUE' not recognized. Valid values: personal, shared, unset. Using unset." >&2
|
||||
VALUE="unset"
|
||||
fi
|
||||
# V1: whitelist values for keys with closed value domains. Unknown values warn + default.
|
||||
if [ "$KEY" = "explain_level" ] && [ "$VALUE" != "default" ] && [ "$VALUE" != "terse" ]; then
|
||||
echo "Warning: explain_level '$VALUE' not recognized. Valid values: default, terse. Using default." >&2
|
||||
@@ -142,6 +296,28 @@ case "${1:-}" in
|
||||
echo "Warning: artifacts_sync_mode '$VALUE' not recognized. Valid values: off, artifacts-only, full. Using off." >&2
|
||||
VALUE="off"
|
||||
fi
|
||||
# redact_repo_visibility: a LOCAL override for repos gh/glab can't read (e.g.
|
||||
# self-hosted GitLab). It lives in ~/.gstack/config.yaml (never committed), so
|
||||
# it can't be used to weaken the gate repo-wide for other contributors.
|
||||
if [ "$KEY" = "redact_repo_visibility" ] && [ "$VALUE" != "public" ] && [ "$VALUE" != "private" ] && [ "$VALUE" != "unknown" ]; then
|
||||
echo "Warning: redact_repo_visibility '$VALUE' not recognized. Valid values: public, private, unknown. Using unknown." >&2
|
||||
VALUE="unknown"
|
||||
fi
|
||||
if [ "$KEY" = "redact_prepush_hook" ] && [ "$VALUE" != "true" ] && [ "$VALUE" != "false" ]; then
|
||||
echo "Warning: redact_prepush_hook '$VALUE' not recognized. Valid values: true, false. Using false." >&2
|
||||
VALUE="false"
|
||||
fi
|
||||
if [ "$KEY" = "plan_tune_hooks" ] && [ "$VALUE" != "prompt" ] && [ "$VALUE" != "yes" ] && [ "$VALUE" != "no" ]; then
|
||||
echo "Warning: plan_tune_hooks '$VALUE' not recognized. Valid values: prompt, yes, no. Using prompt." >&2
|
||||
VALUE="prompt"
|
||||
fi
|
||||
# codex_reviews controls PAID Codex calls. Unlike the warn-and-default keys above,
|
||||
# an invalid value is REJECTED and the existing setting is left unchanged — a typo
|
||||
# must never silently flip the switch and turn paid Codex calls on or off.
|
||||
if [ "$KEY" = "codex_reviews" ] && [ "$VALUE" != "enabled" ] && [ "$VALUE" != "disabled" ]; then
|
||||
echo "Error: codex_reviews '$VALUE' not recognized. Valid values: enabled, disabled. Existing value left unchanged." >&2
|
||||
exit 1
|
||||
fi
|
||||
mkdir -p "$STATE_DIR"
|
||||
# Write annotated header on first creation
|
||||
if [ ! -f "$CONFIG_FILE" ]; then
|
||||
@@ -169,9 +345,9 @@ case "${1:-}" in
|
||||
echo ""
|
||||
echo "# ─── Active values (including defaults for unset keys) ───"
|
||||
for KEY in proactive routing_declined telemetry auto_upgrade update_check \
|
||||
skill_prefix checkpoint_mode checkpoint_push codex_reviews \
|
||||
gstack_contributor skip_eng_review workspace_root \
|
||||
artifacts_sync_mode artifacts_sync_mode_prompted; do
|
||||
skill_prefix checkpoint_mode checkpoint_push explain_level \
|
||||
codex_reviews gstack_contributor skip_eng_review workspace_root \
|
||||
artifacts_sync_mode artifacts_sync_mode_prompted plan_tune_hooks; do
|
||||
VALUE=$(grep -E "^${KEY}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true)
|
||||
SOURCE="default"
|
||||
if [ -n "$VALUE" ]; then
|
||||
@@ -185,14 +361,89 @@ case "${1:-}" in
|
||||
defaults)
|
||||
echo "# gstack-config defaults"
|
||||
for KEY in proactive routing_declined telemetry auto_upgrade update_check \
|
||||
skill_prefix checkpoint_mode checkpoint_push codex_reviews \
|
||||
gstack_contributor skip_eng_review workspace_root \
|
||||
artifacts_sync_mode artifacts_sync_mode_prompted; do
|
||||
skill_prefix checkpoint_mode checkpoint_push explain_level \
|
||||
codex_reviews gstack_contributor skip_eng_review workspace_root \
|
||||
artifacts_sync_mode artifacts_sync_mode_prompted plan_tune_hooks; do
|
||||
printf ' %-24s %s\n' "$KEY:" "$(lookup_default "$KEY")"
|
||||
done
|
||||
;;
|
||||
endpoint-hash)
|
||||
# Brain integration helper (T10): print active brain endpoint sha8
|
||||
endpoint_hash_with_collision_check
|
||||
;;
|
||||
resolve-user-slug)
|
||||
# Brain integration helper (T16 / D4 A3): resolve + persist user-slug
|
||||
resolve_user_slug
|
||||
;;
|
||||
gbrain-refresh)
|
||||
# Brain integration helper: re-detect gbrain installation state and
|
||||
# persist to ~/.gstack/gbrain-detection.json. gen-skill-docs reads this
|
||||
# file (when invoked with --respect-detection) to decide whether to
|
||||
# render GBRAIN_CONTEXT_LOAD and GBRAIN_SAVE_RESULTS blocks in
|
||||
# generated SKILL.md files.
|
||||
#
|
||||
# Run this after installing or uninstalling gbrain so your locally
|
||||
# generated SKILL.md files match your installation state.
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
DETECT_BIN="$SCRIPT_DIR/gstack-gbrain-detect"
|
||||
DETECTION_FILE="$STATE_DIR/gbrain-detection.json"
|
||||
mkdir -p "$STATE_DIR"
|
||||
if [ ! -x "$DETECT_BIN" ]; then
|
||||
echo "gstack-gbrain-detect not found at $DETECT_BIN" >&2
|
||||
exit 1
|
||||
fi
|
||||
if ! "$DETECT_BIN" > "$DETECTION_FILE.tmp" 2>/dev/null; then
|
||||
printf '{"gbrain_on_path":false,"gbrain_local_status":"no-cli"}\n' > "$DETECTION_FILE.tmp"
|
||||
fi
|
||||
mv "$DETECTION_FILE.tmp" "$DETECTION_FILE"
|
||||
|
||||
# Summarize for the user. Use python (already required elsewhere) to
|
||||
# parse the JSON portably; fall back to grep if python is unavailable.
|
||||
PYTHON_CMD=$(command -v python3 || command -v python || true)
|
||||
if [ -n "$PYTHON_CMD" ]; then
|
||||
STATUS=$("$PYTHON_CMD" -c "import json,sys; d=json.load(open('$DETECTION_FILE')); print(d.get('gbrain_local_status','unknown'))" 2>/dev/null || echo unknown)
|
||||
VERSION=$("$PYTHON_CMD" -c "import json,sys; d=json.load(open('$DETECTION_FILE')); print(d.get('gbrain_version') or 'unknown')" 2>/dev/null || echo unknown)
|
||||
else
|
||||
STATUS=$(grep -o '"gbrain_local_status":[[:space:]]*"[^"]*"' "$DETECTION_FILE" | sed 's/.*"\([^"]*\)"$/\1/')
|
||||
VERSION=$(grep -o '"gbrain_version":[[:space:]]*"[^"]*"' "$DETECTION_FILE" | sed 's/.*"\([^"]*\)"$/\1/')
|
||||
[ -z "$STATUS" ] && STATUS=unknown
|
||||
[ -z "$VERSION" ] && VERSION=unknown
|
||||
fi
|
||||
|
||||
case "$STATUS" in
|
||||
ok)
|
||||
echo "Detected gbrain v$VERSION."
|
||||
# Render brain-aware blocks INTO the global install so EVERY project's
|
||||
# Claude sessions get them (other projects read SKILL.md + sections from
|
||||
# ~/.claude/skills/gstack via absolute paths baked at gen time). Guards
|
||||
# (never mutate an arbitrary directory): the target must exist, not be a
|
||||
# symlink (a symlinked install points at a dev worktree — rendering there
|
||||
# would dirty tracked source), and look like a real gstack clone.
|
||||
INSTALL_DIR="$HOME/.claude/skills/gstack"
|
||||
if [ ! -d "$INSTALL_DIR" ]; then
|
||||
echo "No global install at $INSTALL_DIR — nothing to render. (Dev workspaces get blocks via bin/dev-setup.)"
|
||||
elif [ -L "$INSTALL_DIR" ]; then
|
||||
echo "Skip: $INSTALL_DIR is a symlink (likely a dev worktree). Rendering there would dirty tracked source — run bin/dev-setup in that worktree instead."
|
||||
elif [ ! -f "$INSTALL_DIR/VERSION" ] || [ ! -f "$INSTALL_DIR/package.json" ]; then
|
||||
echo "Skip: $INSTALL_DIR doesn't look like a gstack clone (missing VERSION/package.json) — refusing to modify it."
|
||||
elif ! command -v bun >/dev/null 2>&1; then
|
||||
echo "Skip: bun not on PATH — can't render. Install bun, then re-run 'gstack-config gbrain-refresh'."
|
||||
elif ( cd "$INSTALL_DIR" && bun run gen:skill-docs:user --host claude >/dev/null 2>&1 ); then
|
||||
echo "Rendered brain-aware blocks into $INSTALL_DIR — now live across all your projects' Claude sessions."
|
||||
echo "Note: this dirties the install's git tree (generated blocks differ from main, by design)."
|
||||
echo " A 'git reset --hard origin/main' there reverts them; re-run 'gstack-config gbrain-refresh' to restore."
|
||||
else
|
||||
echo "Warning: render failed. Run 'cd $INSTALL_DIR && bun run gen:skill-docs:user --host claude' manually to see the error."
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo "gbrain not detected (local-status: $STATUS) → brain-aware blocks will be suppressed in planning-skill SKILL.md files."
|
||||
echo "Install gbrain (see /setup-gbrain) and re-run 'gstack-config gbrain-refresh' once it's configured."
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*)
|
||||
echo "Usage: gstack-config {get|set|list|defaults} [key] [value]"
|
||||
echo "Usage: gstack-config {get|set|list|defaults|endpoint-hash|resolve-user-slug|gbrain-refresh} [key] [value]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
Executable
+89
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* gstack-decision-log — append a durable decision (or supersede/redact/compact it).
|
||||
*
|
||||
* Usage:
|
||||
* gstack-decision-log '{"decision":"...","rationale":"...","scope":"repo","source":"user"}'
|
||||
* gstack-decision-log --supersede <decision-id>
|
||||
* gstack-decision-log --redact <decision-id>
|
||||
* gstack-decision-log --compact
|
||||
*
|
||||
* Event-sourced (lib/gstack-decision): every call appends an event and refreshes the
|
||||
* bounded active snapshot. NON-INTERACTIVE — never prompts (agents/skills call this;
|
||||
* a prompt would hang them). Validation + injection + HIGH-secret rejection happen in
|
||||
* validateDecide; a rejected decision exits 1 with a message, nothing persisted.
|
||||
*/
|
||||
|
||||
import { mkdirSync } from "fs";
|
||||
import { dirname } from "path";
|
||||
import { spawnSync } from "child_process";
|
||||
import {
|
||||
decisionPaths,
|
||||
validateDecide,
|
||||
makeRefEvent,
|
||||
appendEvent,
|
||||
rebuildSnapshot,
|
||||
compact,
|
||||
type DecisionEvent,
|
||||
} from "../lib/gstack-decision";
|
||||
import { resolveSlug, gitBranch, flagValue } from "../lib/bin-context";
|
||||
|
||||
const HERE = import.meta.dir;
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
const slug = resolveSlug(`${HERE}/gstack-slug`);
|
||||
const paths = decisionPaths(slug);
|
||||
mkdirSync(dirname(paths.log), { recursive: true });
|
||||
|
||||
function enqueue(): void {
|
||||
// Fire-and-forget cross-machine sync (no-op when artifacts_sync is off).
|
||||
spawnSync(`${HERE}/gstack-brain-enqueue`, [`projects/${slug}/decisions.jsonl`], { stdio: "ignore" });
|
||||
}
|
||||
|
||||
if (args.includes("--compact")) {
|
||||
const r = compact(paths);
|
||||
if (r.skipped) {
|
||||
console.log("compact skipped: a concurrent write/compact is in progress; log left intact — re-run");
|
||||
process.exit(0);
|
||||
}
|
||||
console.log(`compacted: ${r.activeCount} active, ${r.archivedCount} archived, ${r.expungedCount} expunged`);
|
||||
enqueue();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const supersedeId = flagValue(args, "--supersede");
|
||||
const redactId = flagValue(args, "--redact");
|
||||
if (supersedeId || redactId) {
|
||||
const kind = supersedeId ? "supersede" : "redact";
|
||||
const targetId = (supersedeId || redactId) as string;
|
||||
appendEvent(paths, makeRefEvent(kind, targetId, { source: "agent" }));
|
||||
rebuildSnapshot(paths);
|
||||
enqueue();
|
||||
console.log(`${kind}: ${targetId}`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const jsonArg = args.find((a) => !a.startsWith("--"));
|
||||
if (!jsonArg) {
|
||||
process.stderr.write(
|
||||
"gstack-decision-log: provide a JSON decision, or --supersede/--redact <id>, or --compact\n",
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
let obj: Partial<DecisionEvent>;
|
||||
try {
|
||||
obj = JSON.parse(jsonArg);
|
||||
} catch {
|
||||
process.stderr.write("gstack-decision-log: invalid JSON\n");
|
||||
process.exit(1);
|
||||
}
|
||||
if (obj.scope === "branch" && !obj.branch) obj.branch = gitBranch();
|
||||
const res = validateDecide(obj);
|
||||
if (!res.ok) {
|
||||
process.stderr.write(`gstack-decision-log: ${res.error}\n`);
|
||||
process.exit(1);
|
||||
}
|
||||
appendEvent(paths, res.event);
|
||||
rebuildSnapshot(paths);
|
||||
enqueue();
|
||||
console.log(res.event.id);
|
||||
Executable
+108
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* gstack-decision-search — read active decisions (the curated "what did we decide" view).
|
||||
*
|
||||
* Usage:
|
||||
* gstack-decision-search [--query KW] [--scope repo|branch|issue]
|
||||
* [--branch B] [--issue I] [--recent N] [--all] [--json]
|
||||
* [--semantic]
|
||||
*
|
||||
* Reads the BOUNDED active snapshot (decisions.active.json) — O(active), not a full
|
||||
* history scan — and rebuilds it from the event log if missing. Scope-filtered to the
|
||||
* current branch/issue context (recency != relevance). NON-INTERACTIVE. `--all` shows
|
||||
* superseded decisions too (from the full log). Exit 0 silently when there are none.
|
||||
*
|
||||
* `--semantic` (with `--query`) appends an OPTIONAL "related from memory" block from
|
||||
* gbrain semantic recall. It is a pure enhancement: when gbrain is off/unconfigured/
|
||||
* empty it degrades silently to the reliable file results above. The reliable path
|
||||
* never loads gbrain code (the semantic module is imported lazily only here).
|
||||
*/
|
||||
|
||||
import { existsSync } from "fs";
|
||||
import {
|
||||
decisionPaths,
|
||||
readSnapshot,
|
||||
rebuildSnapshot,
|
||||
readEvents,
|
||||
filterByScope,
|
||||
datamark,
|
||||
type ActiveDecision,
|
||||
} from "../lib/gstack-decision";
|
||||
import { resolveSlug, gitBranch, flagValue } from "../lib/bin-context";
|
||||
|
||||
const HERE = import.meta.dir;
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
const slug = resolveSlug(`${HERE}/gstack-slug`);
|
||||
const paths = decisionPaths(slug);
|
||||
const queryRaw = flagValue(args, "--query");
|
||||
const query = queryRaw?.toLowerCase();
|
||||
const scope = flagValue(args, "--scope");
|
||||
const branch = flagValue(args, "--branch") ?? gitBranch();
|
||||
const issue = flagValue(args, "--issue");
|
||||
const recentRaw = flagValue(args, "--recent");
|
||||
const recent = recentRaw ? parseInt(recentRaw, 10) : undefined;
|
||||
const showAll = args.includes("--all");
|
||||
const asJson = args.includes("--json");
|
||||
const semantic = args.includes("--semantic");
|
||||
|
||||
let rows: ActiveDecision[];
|
||||
if (showAll) {
|
||||
// --all includes SUPERSEDED decisions (history), but NEVER redacted ones — a redact
|
||||
// is an expunge, so it must remove the text from every read path, not just active.
|
||||
const events = readEvents(paths);
|
||||
const redacted = new Set(
|
||||
events.filter((e) => e.kind === "redact" && e.supersedes).map((e) => e.supersedes as string),
|
||||
);
|
||||
rows = events.filter((e): e is ActiveDecision => e.kind === "decide" && !redacted.has(e.id));
|
||||
} else {
|
||||
rows = readSnapshot(paths);
|
||||
// Rebuild only when a snapshot is absent but a log exists (don't write a snapshot
|
||||
// into a nonexistent store on an empty read — just return nothing).
|
||||
if (!rows.length && existsSync(paths.log)) rows = rebuildSnapshot(paths);
|
||||
}
|
||||
|
||||
rows = filterByScope(rows, { branch, issue });
|
||||
if (scope) rows = rows.filter((d) => d.scope === scope);
|
||||
if (query) {
|
||||
rows = rows.filter((d) =>
|
||||
[d.decision, d.rationale, d.alternatives_considered]
|
||||
.filter((s): s is string => typeof s === "string")
|
||||
.some((s) => s.toLowerCase().includes(query)),
|
||||
);
|
||||
}
|
||||
rows.sort((a, b) => (a.date < b.date ? 1 : a.date > b.date ? -1 : 0)); // newest first
|
||||
if (recent && recent > 0) rows = rows.slice(0, recent);
|
||||
|
||||
if (asJson) {
|
||||
// --json stays reliable-only (semantic recall is a human-facing supplement).
|
||||
console.log(JSON.stringify(rows));
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
for (const d of rows) {
|
||||
// Datamark all stored free-text (decision, rationale, branch/issue) — it lands in
|
||||
// agent context via Context Recovery, so treat it as DATA, not instructions.
|
||||
const branchTag = d.branch ? `:${datamark(d.branch)}` : "";
|
||||
const issueTag = d.issue ? `:${datamark(d.issue)}` : "";
|
||||
const scopeTag = d.scope === "repo" ? "" : ` [${d.scope}${branchTag}${issueTag}]`;
|
||||
console.log(`- ${datamark(d.decision ?? "")}${scopeTag} (${d.source}, ${d.date.slice(0, 10)})`);
|
||||
if (d.rationale) console.log(` why: ${datamark(d.rationale)}`);
|
||||
}
|
||||
|
||||
// OPTIONAL gbrain enhancement. Lazy import so the reliable path above never loads
|
||||
// gbrain code. Degrades silently: null (gbrain off) or [] (nothing found) leaves the
|
||||
// reliable results above as the answer.
|
||||
if (semantic && queryRaw) {
|
||||
const { semanticRecall } = await import("../lib/gstack-decision-semantic");
|
||||
const hits = semanticRecall(queryRaw);
|
||||
if (hits && hits.length) {
|
||||
console.log("\nRelated from memory (gbrain semantic recall):");
|
||||
for (const h of hits) {
|
||||
// gbrain hits are EXTERNAL corpus content — datamark slug + snippet too so they
|
||||
// can't spoof role markers / fences when printed into agent context.
|
||||
const snip = datamark(h.snippet.length > 100 ? `${h.snippet.slice(0, 100)}…` : h.snippet);
|
||||
console.log(` [${h.score.toFixed(2)}] ${datamark(h.slug)}: ${snip}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
Executable
+167
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python3
|
||||
"""gstack-detach — run a long agent job (evals, benchmarks, syncs) robustly.
|
||||
|
||||
Agent-launched long jobs on a shared dev box keep dying to environmental
|
||||
killers. This tool bakes in the fixes so gstack (and every gstack user) runs
|
||||
them properly:
|
||||
|
||||
* SIGTERM-proof: fork + setsid puts the job in its OWN session, so the
|
||||
harness's "polite quit" SIGTERM to the launching process group can't reach
|
||||
it (observed: `script "test:gate" was terminated by signal SIGTERM`).
|
||||
* No idle-sleep death (macOS): wraps the command in `caffeinate -i`.
|
||||
* No cross-worktree API saturation: `--lock NAME` takes a machine-wide
|
||||
advisory lock so concurrent Conductor worktrees SERIALIZE their eval runs
|
||||
instead of saturating the shared model API (which mass-times-out E2E suites).
|
||||
* No shared-/tmp collision: a run-scoped log path by default
|
||||
(~/.gstack-dev/eval-runs/<label>-<slug>-<branch>-<ts>-<pid>.log), so
|
||||
concurrent runs never clobber or contaminate each other's logs.
|
||||
* No silent hang: `--timeout SECS` watchdog kills a stalled run, and a
|
||||
`### gstack-detach EXIT=<code> ###` sentinel is ALWAYS appended on a
|
||||
terminal path so a poller can tell finished-vs-died (silence != success).
|
||||
|
||||
Usage:
|
||||
gstack-detach [--log PATH] [--lock NAME] [--timeout SECS] [--label LBL] -- CMD [ARGS...]
|
||||
|
||||
Prints `gstack-detach LOG <path>` and returns immediately. Poll the log; break
|
||||
on `### gstack-detach EXIT=` (both success and failure are marked).
|
||||
|
||||
Secrets are inherited from the environment ONLY — never pass an API key in argv.
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
def _now():
|
||||
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
|
||||
def _git(*args):
|
||||
try:
|
||||
return subprocess.check_output(["git", *args], stderr=subprocess.DEVNULL, text=True).strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def run_scoped_log(label):
|
||||
base = os.path.expanduser("~/.gstack-dev/eval-runs")
|
||||
os.makedirs(base, exist_ok=True)
|
||||
root = _git("rev-parse", "--show-toplevel")
|
||||
slug = os.path.basename(root) if root else "unknown"
|
||||
branch = (_git("branch", "--show-current") or "nobranch").replace("/", "-")
|
||||
stamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
||||
return os.path.join(base, f"{label}-{slug}-{branch}-{stamp}-{os.getpid()}.log")
|
||||
|
||||
|
||||
def log_line(path, msg):
|
||||
with open(path, "ab", buffering=0) as f:
|
||||
f.write((msg + "\n").encode("utf-8", "replace"))
|
||||
|
||||
|
||||
def acquire_lock(name, log):
|
||||
"""Machine-wide advisory lock via fcntl (portable on macOS + Linux). Blocks
|
||||
until free so concurrent worktrees serialize rather than saturate the API.
|
||||
Returns the held fd (kept open for the process lifetime)."""
|
||||
import fcntl
|
||||
|
||||
d = os.path.expanduser("~/.gstack/locks")
|
||||
os.makedirs(d, exist_ok=True)
|
||||
fd = open(os.path.join(d, f"{name}.lock"), "w")
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
except OSError:
|
||||
log_line(log, f"### gstack-detach WAITING for lock '{name}' (another run holds it) ### {_now()}")
|
||||
fcntl.flock(fd, fcntl.LOCK_EX) # block until released
|
||||
fd.write(f"{os.getpid()} {_now()}\n")
|
||||
fd.flush()
|
||||
log_line(log, f"### gstack-detach LOCK '{name}' ACQUIRED ### {_now()}")
|
||||
return fd
|
||||
|
||||
|
||||
def child_run(args, log):
|
||||
lock_fd = acquire_lock(args.lock, log) if args.lock else None
|
||||
cmd = args.cmd
|
||||
if shutil.which("caffeinate"): # macOS: block idle-sleep for the run
|
||||
cmd = ["caffeinate", "-i", *cmd]
|
||||
log_line(log, f"### gstack-detach START label={args.label} pgid={os.getpgid(0)} ### {_now()}")
|
||||
with open(log, "ab", buffering=0) as f:
|
||||
# start_new_session: the command runs in its OWN process group so the
|
||||
# watchdog can killpg() it without also killing this supervisor (which
|
||||
# must survive to write the EXIT sentinel).
|
||||
proc = subprocess.Popen(
|
||||
cmd, stdout=f, stderr=subprocess.STDOUT, stdin=subprocess.DEVNULL, start_new_session=True
|
||||
)
|
||||
if args.timeout and args.timeout > 0:
|
||||
try:
|
||||
code = proc.wait(timeout=args.timeout)
|
||||
except subprocess.TimeoutExpired:
|
||||
log_line(log, f"### gstack-detach WATCHDOG fired after {args.timeout}s — killing ### {_now()}")
|
||||
try:
|
||||
os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
|
||||
except Exception:
|
||||
pass
|
||||
time.sleep(5)
|
||||
try:
|
||||
proc.kill()
|
||||
except Exception:
|
||||
pass
|
||||
code = "timeout"
|
||||
else:
|
||||
code = proc.wait()
|
||||
log_line(log, f"### gstack-detach EXIT={code} ### {_now()}")
|
||||
if lock_fd:
|
||||
try:
|
||||
lock_fd.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(add_help=True)
|
||||
ap.add_argument("--log")
|
||||
ap.add_argument("--lock")
|
||||
ap.add_argument("--timeout", type=int, default=0)
|
||||
ap.add_argument("--label", default="job")
|
||||
ap.add_argument("cmd", nargs=argparse.REMAINDER)
|
||||
args = ap.parse_args()
|
||||
|
||||
cmd = args.cmd
|
||||
if cmd and cmd[0] == "--":
|
||||
cmd = cmd[1:]
|
||||
if not cmd:
|
||||
print("gstack-detach: no command given (usage: gstack-detach [opts] -- CMD...)", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
args.cmd = cmd
|
||||
|
||||
log = args.log or run_scoped_log(args.label)
|
||||
os.makedirs(os.path.dirname(log) or ".", exist_ok=True)
|
||||
open(log, "ab").close()
|
||||
|
||||
# Detach: fork so the launching shell returns immediately, then setsid in the
|
||||
# child to escape the harness's process group / controlling terminal.
|
||||
if os.fork() > 0:
|
||||
# flush BEFORE os._exit — os._exit skips stdio buffer flush, which would
|
||||
# otherwise drop this line and leave the caller without the log path.
|
||||
print(f"gstack-detach LOG {log}", flush=True)
|
||||
os._exit(0)
|
||||
os.setsid()
|
||||
devnull = os.open(os.devnull, os.O_RDWR)
|
||||
os.dup2(devnull, 0)
|
||||
lf = os.open(log, os.O_WRONLY | os.O_APPEND | os.O_CREAT, 0o644)
|
||||
os.dup2(lf, 1)
|
||||
os.dup2(lf, 2)
|
||||
try:
|
||||
child_run(args, log)
|
||||
except Exception as e: # never leave the log without a terminal marker
|
||||
log_line(log, f"### gstack-detach ERROR {e!r} ### {_now()}")
|
||||
log_line(log, f"### gstack-detach EXIT=error ### {_now()}")
|
||||
os._exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -17,6 +17,9 @@
|
||||
# --check-mismatch detect meaningful gaps between declared and observed.
|
||||
# --migrate migrate builder-profile.jsonl → developer-profile.json.
|
||||
# Idempotent; archives the source file on success.
|
||||
# --log-session append a session entry (from /office-hours) to
|
||||
# sessions[] and update aggregates. Required fields:
|
||||
# date, mode. Silent skip on invalid input.
|
||||
#
|
||||
# Profile file: ~/.gstack/developer-profile.json (unified schema — see
|
||||
# docs/designs/PLAN_TUNING_V0.md). Event file: ~/.gstack/projects/{SLUG}/
|
||||
@@ -25,7 +28,8 @@ set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
|
||||
# GSTACK_STATE_ROOT takes precedence over GSTACK_HOME (test isolation per D16).
|
||||
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
|
||||
PROFILE_FILE="$GSTACK_HOME/developer-profile.json"
|
||||
LEGACY_FILE="$GSTACK_HOME/builder-profile.jsonl"
|
||||
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
|
||||
@@ -154,6 +158,65 @@ ensure_profile() {
|
||||
EOF
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Record session: append a session entry from /office-hours to sessions[]
|
||||
# and update aggregates (signals_accumulated, resources_shown, topics).
|
||||
# Fix for #1671: the writer side of the v1.0.0.0 migration. Reader and
|
||||
# writer now share the same file.
|
||||
# Silent skip on invalid input (matches gstack-timeline-log:22-26 pattern).
|
||||
# -----------------------------------------------------------------------
|
||||
do_log_session() {
|
||||
local INPUT="${1:-}"
|
||||
if [ -z "$INPUT" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Validate: input must be parseable JSON with required fields (date, mode).
|
||||
if ! printf '%s' "$INPUT" | bun -e "
|
||||
const j = JSON.parse(await Bun.stdin.text());
|
||||
if (!j.date || !j.mode) process.exit(1);
|
||||
" 2>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
ensure_profile
|
||||
|
||||
local TMPOUT
|
||||
TMPOUT=$(mktemp "$GSTACK_HOME/developer-profile.json.XXXXXX.tmp")
|
||||
trap 'rm -f "$TMPOUT"' EXIT
|
||||
|
||||
PROFILE_FILE_PATH="$PROFILE_FILE" RECORD_INPUT="$INPUT" TMPOUT_PATH="$TMPOUT" bun -e "
|
||||
const fs = require('fs');
|
||||
const entry = JSON.parse(process.env.RECORD_INPUT);
|
||||
if (!entry.ts) entry.ts = new Date().toISOString();
|
||||
|
||||
const profile = JSON.parse(fs.readFileSync(process.env.PROFILE_FILE_PATH, 'utf-8'));
|
||||
profile.sessions = profile.sessions || [];
|
||||
profile.sessions.push(entry);
|
||||
|
||||
profile.signals_accumulated = profile.signals_accumulated || {};
|
||||
for (const s of (entry.signals || [])) {
|
||||
profile.signals_accumulated[s] = (profile.signals_accumulated[s] || 0) + 1;
|
||||
}
|
||||
|
||||
profile.resources_shown = profile.resources_shown || [];
|
||||
const resSet = new Set(profile.resources_shown);
|
||||
for (const r of (entry.resources_shown || [])) resSet.add(r);
|
||||
profile.resources_shown = Array.from(resSet);
|
||||
|
||||
profile.topics = profile.topics || [];
|
||||
const topicSet = new Set(profile.topics);
|
||||
for (const t of (entry.topics || [])) topicSet.add(t);
|
||||
profile.topics = Array.from(topicSet);
|
||||
|
||||
fs.writeFileSync(process.env.TMPOUT_PATH, JSON.stringify(profile, null, 2));
|
||||
"
|
||||
|
||||
mv "$TMPOUT" "$PROFILE_FILE"
|
||||
trap - EXIT
|
||||
"$SCRIPT_DIR/gstack-brain-enqueue" "developer-profile.json" 2>/dev/null &
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Read: emit legacy KEY: VALUE output for /office-hours compat.
|
||||
# -----------------------------------------------------------------------
|
||||
@@ -168,14 +231,19 @@ do_read() {
|
||||
else if (count >= 4) tier = 'regular';
|
||||
else if (count >= 1) tier = 'welcome_back';
|
||||
|
||||
const last = sessions[count - 1] || {};
|
||||
const prev = sessions[count - 2] || {};
|
||||
// LAST_* / CROSS_PROJECT must reflect real sessions, not resource-tracking
|
||||
// events (the Phase 6 auto-append). Without this filter, a session's
|
||||
// resources entry written immediately after the real session would clobber
|
||||
// LAST_PROJECT/LAST_ASSIGNMENT/LAST_DESIGN_TITLE.
|
||||
const realSessions = sessions.filter(e => e.mode !== 'resources');
|
||||
const last = realSessions[realSessions.length - 1] || {};
|
||||
const prev = realSessions[realSessions.length - 2] || {};
|
||||
const crossProject = prev.project_slug && last.project_slug
|
||||
? prev.project_slug !== last.project_slug
|
||||
: false;
|
||||
|
||||
const designs = sessions.map(e => e.design_doc || '').filter(Boolean);
|
||||
const designTitles = sessions
|
||||
const designs = realSessions.map(e => e.design_doc || '').filter(Boolean);
|
||||
const designTitles = realSessions
|
||||
.map(e => (e.design_doc ? (e.project_slug || 'unknown') : ''))
|
||||
.filter(Boolean);
|
||||
|
||||
@@ -441,6 +509,7 @@ case "$CMD" in
|
||||
--vibe) do_vibe ;;
|
||||
--check-mismatch) do_check_mismatch ;;
|
||||
--migrate) do_migrate ;;
|
||||
--log-session) do_log_session "$@" ;;
|
||||
--help|-h) sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||' ;;
|
||||
*)
|
||||
echo "gstack-developer-profile: unknown subcommand '$CMD'" >&2
|
||||
|
||||
@@ -57,7 +57,7 @@ while IFS= read -r f; do
|
||||
*.md) DOCS=true ;;
|
||||
|
||||
# Config
|
||||
package.json|package-lock.json|yarn.lock|bun.lockb) CONFIG=true ;;
|
||||
package.json|package-lock.json|yarn.lock|bun.lock|bun.lockb) CONFIG=true ;;
|
||||
Gemfile|Gemfile.lock) CONFIG=true ;;
|
||||
*.yml|*.yaml) CONFIG=true ;;
|
||||
.github/*) CONFIG=true ;;
|
||||
@@ -75,7 +75,10 @@ while IFS= read -r f; do
|
||||
|
||||
# Backend: everything else that's code (excluding views/components already matched)
|
||||
*.rb|*.py|*.go|*.rs|*.java|*.php|*.ex|*.exs) BACKEND=true ;;
|
||||
*.ts|*.js) BACKEND=true ;; # Non-component TS/JS is backend
|
||||
# Non-component TS/JS is backend. Include ESM/CJS (.mjs/.cjs) and
|
||||
# explicit-module TS (.mts/.cts) — #1810: these matched no category, so an
|
||||
# ESM/CJS-only PR skipped the backend reviewer entirely.
|
||||
*.ts|*.js|*.mjs|*.cjs|*.mts|*.cts) BACKEND=true ;;
|
||||
esac
|
||||
done <<< "$FILES"
|
||||
|
||||
|
||||
Executable
+181
@@ -0,0 +1,181 @@
|
||||
#!/usr/bin/env bash
|
||||
# gstack-distill-apply — apply a single distillation proposal after user Y.
|
||||
#
|
||||
# Plan-tune cathedral T11. Reads distillation-proposals.json, applies the
|
||||
# Nth proposal to the right surface:
|
||||
#
|
||||
# preference → gstack-question-preference --write
|
||||
# declared-nudge → atomic update to ~/.gstack/developer-profile.json declared
|
||||
# memory-nugget → append to ~/.gstack/free-text-memory.json (local fallback)
|
||||
#
|
||||
# Always confirm before calling this from the skill — the bin assumes the user
|
||||
# already approved (Codex #15 trust boundary). The skill template (/plan-tune
|
||||
# distill review section) handles the confirm UX.
|
||||
#
|
||||
# gbrain integration: when gbrain is configured, the skill template ALSO
|
||||
# invokes mcp__gbrain__put_page / extract_facts / add_tag in the same turn
|
||||
# (those are MCP tools, not CLI-callable). Pass --gbrain-published true to
|
||||
# mark the proposal as mirrored to gbrain. The local file always gets the
|
||||
# write so it's the durable source-of-truth even on machines without gbrain.
|
||||
#
|
||||
# Usage:
|
||||
# gstack-distill-apply --proposal <N> # apply Nth proposal
|
||||
# gstack-distill-apply --proposal <N> --gbrain-published true
|
||||
# gstack-distill-apply --list # show pending proposals
|
||||
set -euo pipefail
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
|
||||
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
|
||||
SLUG="${SLUG:-unknown}"
|
||||
PROJECT_DIR="$GSTACK_HOME/projects/$SLUG"
|
||||
PROPOSAL_FILE="$PROJECT_DIR/distillation-proposals.json"
|
||||
MEMORY_FILE="$GSTACK_HOME/free-text-memory.json"
|
||||
PROFILE_FILE="$GSTACK_HOME/developer-profile.json"
|
||||
|
||||
ACTION="apply"
|
||||
PROPOSAL_IDX=""
|
||||
GBRAIN_PUBLISHED="false"
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--proposal) PROPOSAL_IDX="$2"; shift 2 ;;
|
||||
--gbrain-published) GBRAIN_PUBLISHED="$2"; shift 2 ;;
|
||||
--list) ACTION="list"; shift ;;
|
||||
--help|-h)
|
||||
sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||'
|
||||
exit 0
|
||||
;;
|
||||
*) echo "unknown arg: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ ! -f "$PROPOSAL_FILE" ]; then
|
||||
echo "NO_PROPOSALS: $PROPOSAL_FILE missing — run gstack-distill-free-text first"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$ACTION" = "list" ]; then
|
||||
PROPOSAL_FILE_PATH="$PROPOSAL_FILE" bun -e '
|
||||
const fs = require("fs");
|
||||
const p = JSON.parse(fs.readFileSync(process.env.PROPOSAL_FILE_PATH, "utf-8"));
|
||||
const proposals = p.proposals || [];
|
||||
if (proposals.length === 0) { console.log("(no proposals)"); process.exit(0); }
|
||||
console.log("GENERATED: " + p.generated_at);
|
||||
console.log("SOURCE_EVENTS: " + (p.source_event_count || 0));
|
||||
proposals.forEach((pr, i) => {
|
||||
console.log("");
|
||||
console.log("[" + i + "] " + (pr.kind || "?") + " (confidence: " + (pr.confidence || "?") + ")");
|
||||
if (pr.rationale) console.log(" rationale: " + pr.rationale);
|
||||
if (pr.kind === "preference") {
|
||||
console.log(" question_id: " + pr.question_id);
|
||||
console.log(" preference: " + pr.preference);
|
||||
} else if (pr.kind === "declared-nudge") {
|
||||
console.log(" dimension: " + pr.dimension);
|
||||
console.log(" direction: " + pr.direction + " (" + (pr.magnitude || "?") + ")");
|
||||
} else if (pr.kind === "memory-nugget") {
|
||||
console.log(" nugget: " + pr.nugget);
|
||||
console.log(" signal_keys: " + JSON.stringify(pr.applies_to_signal_keys || []));
|
||||
}
|
||||
if (pr.source_quotes && pr.source_quotes.length) {
|
||||
console.log(" quotes:");
|
||||
pr.source_quotes.forEach((q) => console.log(" - \"" + q + "\""));
|
||||
}
|
||||
});
|
||||
'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ -z "$PROPOSAL_IDX" ]; then
|
||||
echo "--proposal <N> required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Apply via bun. Each kind has its own surface.
|
||||
mkdir -p "$PROJECT_DIR"
|
||||
PROPOSAL_IDX="$PROPOSAL_IDX" \
|
||||
PROPOSAL_FILE_PATH="$PROPOSAL_FILE" \
|
||||
MEMORY_FILE_PATH="$MEMORY_FILE" \
|
||||
PROFILE_FILE_PATH="$PROFILE_FILE" \
|
||||
PREF_BIN="$SCRIPT_DIR/gstack-question-preference" \
|
||||
GBRAIN_PUBLISHED="$GBRAIN_PUBLISHED" \
|
||||
bun -e '
|
||||
const fs = require("fs");
|
||||
const { spawnSync } = require("child_process");
|
||||
const idx = parseInt(process.env.PROPOSAL_IDX, 10);
|
||||
const p = JSON.parse(fs.readFileSync(process.env.PROPOSAL_FILE_PATH, "utf-8"));
|
||||
const proposals = p.proposals || [];
|
||||
if (!Number.isInteger(idx) || idx < 0 || idx >= proposals.length) {
|
||||
process.stderr.write("invalid --proposal index " + idx + " (have " + proposals.length + ")\n");
|
||||
process.exit(1);
|
||||
}
|
||||
const pr = proposals[idx];
|
||||
|
||||
const stamp = new Date().toISOString();
|
||||
|
||||
// Memory-nugget: always write to local file (durable source-of-truth even
|
||||
// when gbrain is configured — gbrain is mirror, file is canon for the
|
||||
// PreToolUse hook injection path in Layer 8).
|
||||
if (pr.kind === "memory-nugget") {
|
||||
const memPath = process.env.MEMORY_FILE_PATH;
|
||||
let mem = { nuggets: [] };
|
||||
try { mem = JSON.parse(fs.readFileSync(memPath, "utf-8")); } catch {}
|
||||
if (!Array.isArray(mem.nuggets)) mem.nuggets = [];
|
||||
mem.nuggets.push({
|
||||
nugget: pr.nugget,
|
||||
applies_to_signal_keys: pr.applies_to_signal_keys || [],
|
||||
applied_at: stamp,
|
||||
gbrain_published: process.env.GBRAIN_PUBLISHED === "true",
|
||||
source_quotes: pr.source_quotes || [],
|
||||
});
|
||||
const tmp = memPath + ".tmp";
|
||||
fs.writeFileSync(tmp, JSON.stringify(mem, null, 2));
|
||||
fs.renameSync(tmp, memPath);
|
||||
console.log("APPLIED: memory-nugget appended to " + memPath);
|
||||
}
|
||||
|
||||
// Preference: route through gstack-question-preference for the user-origin
|
||||
// gate + event audit trail. source=plan-tune is the allowed value since
|
||||
// the user opt-in came from inside /plan-tune.
|
||||
if (pr.kind === "preference") {
|
||||
const res = spawnSync(process.env.PREF_BIN, [
|
||||
"--write",
|
||||
JSON.stringify({
|
||||
question_id: pr.question_id,
|
||||
preference: pr.preference,
|
||||
source: "plan-tune",
|
||||
free_text: (pr.source_quotes || []).join(" | ").slice(0, 300),
|
||||
}),
|
||||
], { encoding: "utf-8", stdio: ["ignore", "pipe", "pipe"], timeout: 5000 });
|
||||
if (res.status !== 0) {
|
||||
process.stderr.write("preference apply failed: " + (res.stderr || res.stdout) + "\n");
|
||||
process.exit(1);
|
||||
}
|
||||
console.log("APPLIED: preference " + pr.question_id + " → " + pr.preference);
|
||||
}
|
||||
|
||||
// Declared-nudge: atomic update to developer-profile.json declared. Magnitude
|
||||
// tiers: small=0.05, medium=0.10, large=0.15. Clamp to [0, 1].
|
||||
if (pr.kind === "declared-nudge") {
|
||||
const mag = { small: 0.05, medium: 0.10, large: 0.15 }[pr.magnitude || "small"] || 0.05;
|
||||
const delta = pr.direction === "down" ? -mag : mag;
|
||||
const profilePath = process.env.PROFILE_FILE_PATH;
|
||||
let profile = {};
|
||||
try { profile = JSON.parse(fs.readFileSync(profilePath, "utf-8")); } catch {}
|
||||
profile.declared = profile.declared || {};
|
||||
const cur = typeof profile.declared[pr.dimension] === "number" ? profile.declared[pr.dimension] : 0.5;
|
||||
const next = Math.max(0, Math.min(1, cur + delta));
|
||||
profile.declared[pr.dimension] = +next.toFixed(3);
|
||||
profile.declared_at = stamp;
|
||||
const tmp = profilePath + ".tmp";
|
||||
fs.writeFileSync(tmp, JSON.stringify(profile, null, 2));
|
||||
fs.renameSync(tmp, profilePath);
|
||||
console.log("APPLIED: declared." + pr.dimension + " " + cur + " → " + profile.declared[pr.dimension]);
|
||||
}
|
||||
|
||||
// Mark the proposal as applied so /plan-tune list shows it consumed.
|
||||
pr.applied_at = stamp;
|
||||
pr.gbrain_published = process.env.GBRAIN_PUBLISHED === "true";
|
||||
const tmp = process.env.PROPOSAL_FILE_PATH + ".tmp";
|
||||
fs.writeFileSync(tmp, JSON.stringify(p, null, 2));
|
||||
fs.renameSync(tmp, process.env.PROPOSAL_FILE_PATH);
|
||||
'
|
||||
Executable
+272
@@ -0,0 +1,272 @@
|
||||
#!/usr/bin/env bash
|
||||
# gstack-distill-free-text — Layer 8 "dream cycle" batch distiller.
|
||||
#
|
||||
# Reads auq-other free-text events from this project's question-log.jsonl,
|
||||
# sends them to Claude via the Anthropic SDK, and writes structured proposals
|
||||
# the user can review via /plan-tune distill. Proposals require explicit
|
||||
# user Y before applying — never autonomous (Codex #15 trust boundary).
|
||||
#
|
||||
# Usage:
|
||||
# gstack-distill-free-text # sync, prompts at end
|
||||
# gstack-distill-free-text --background # spawn detached; results
|
||||
# # surface on next /plan-tune
|
||||
# gstack-distill-free-text --dry-run # show prompt, no API call
|
||||
# gstack-distill-free-text --status # show last-run stats
|
||||
#
|
||||
# No rate cap — the natural rate of free-text events (rare; user has to type
|
||||
# "Other" then content) bounds this loop already. Each Haiku call is ~$0.01,
|
||||
# so even a runaway at one-per-minute would be ~$14/day worst case. The
|
||||
# cumulative cost log at $GSTACK_STATE_ROOT/distill-cost.jsonl gives full
|
||||
# auditability via --status when you want it.
|
||||
# Per D6: Anthropic SDK direct call, fail-loud on missing ANTHROPIC_API_KEY.
|
||||
set -euo pipefail
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
|
||||
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
|
||||
SLUG="${SLUG:-unknown}"
|
||||
PROJECT_DIR="$GSTACK_HOME/projects/$SLUG"
|
||||
LOG_FILE="$PROJECT_DIR/question-log.jsonl"
|
||||
PROPOSAL_FILE="$PROJECT_DIR/distillation-proposals.json"
|
||||
COST_LOG="$GSTACK_HOME/distill-cost.jsonl"
|
||||
mkdir -p "$PROJECT_DIR"
|
||||
|
||||
MODE="sync"
|
||||
case "${1:-}" in
|
||||
--background) MODE="background" ;;
|
||||
--dry-run) MODE="dry-run" ;;
|
||||
--status) MODE="status" ;;
|
||||
--help|-h)
|
||||
sed -n '1,/^set -euo/p' "$0" | sed 's|^# \?||'
|
||||
exit 0
|
||||
;;
|
||||
'') ;;
|
||||
*) echo "unknown arg: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
# --- Status subcommand --------------------------------------------------
|
||||
|
||||
if [ "$MODE" = "status" ]; then
|
||||
COST_LOG_PATH="$COST_LOG" SLUG_PATH="$SLUG" bun -e '
|
||||
const fs = require("fs");
|
||||
const slug = process.env.SLUG_PATH;
|
||||
const path = process.env.COST_LOG_PATH;
|
||||
if (!fs.existsSync(path)) { console.log("no distill runs yet"); process.exit(0); }
|
||||
const lines = fs.readFileSync(path, "utf-8").trim().split("\n").filter(Boolean);
|
||||
const mine = lines.map((l) => JSON.parse(l)).filter((e) => e.slug === slug);
|
||||
if (mine.length === 0) { console.log("no distill runs yet for slug=" + slug); process.exit(0); }
|
||||
const totalUsd = mine.reduce((a, e) => a + (e.cost_usd_est || 0), 0);
|
||||
const todayIso = new Date().toISOString().slice(0, 10);
|
||||
const today = mine.filter((e) => (e.ts || "").startsWith(todayIso));
|
||||
const todayUsd = today.reduce((a, e) => a + (e.cost_usd_est || 0), 0);
|
||||
console.log("RUNS: " + mine.length);
|
||||
console.log("TODAY: " + today.length + " run(s), $" + todayUsd.toFixed(4));
|
||||
console.log("ESTIMATED_TOTAL_USD: $" + totalUsd.toFixed(4));
|
||||
const last = mine[mine.length - 1];
|
||||
console.log("LAST_RUN: " + (last.ts || "?") + " | " + (last.proposals_count || 0) + " proposals");
|
||||
'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- Background mode: detach + invoke self synchronously ---------------
|
||||
|
||||
if [ "$MODE" = "background" ]; then
|
||||
nohup "$0" >/dev/null 2>&1 &
|
||||
echo "DISTILL_SPAWNED: pid=$!"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# No rate cap. Natural input rate (free-text events are rare) + Haiku price
|
||||
# (~$0.01/run) keep this bounded. Use --status to audit spend.
|
||||
|
||||
# --- Gather unprocessed auq-other events from this project -------------
|
||||
|
||||
if [ ! -f "$LOG_FILE" ]; then
|
||||
echo "NO_LOG: no question-log.jsonl in $PROJECT_DIR"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
EVENTS_JSON=$(LOG_FILE_PATH="$LOG_FILE" bun -e '
|
||||
const fs = require("fs");
|
||||
const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").filter(Boolean);
|
||||
const out = [];
|
||||
for (const l of lines) {
|
||||
try {
|
||||
const e = JSON.parse(l);
|
||||
if (e.source === "auq-other" && !e.distilled_at && e.free_text) {
|
||||
out.push({
|
||||
ts: e.ts,
|
||||
question_id: e.question_id,
|
||||
question_summary: e.question_summary,
|
||||
free_text: e.free_text,
|
||||
session_id: e.session_id,
|
||||
});
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
process.stdout.write(JSON.stringify(out));
|
||||
')
|
||||
|
||||
EVENT_COUNT=$(printf '%s' "$EVENTS_JSON" | bun -e 'const a = JSON.parse(await Bun.stdin.text()); console.log(a.length);')
|
||||
if [ "$EVENT_COUNT" -eq 0 ]; then
|
||||
echo "NO_FREE_TEXT: nothing to distill"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- Build distill prompt ---------------------------------------------
|
||||
|
||||
# Heredoc into temp file (avoids $(cat <<'PROMPT'...) which choked the
|
||||
# bash parser on apostrophes elsewhere in the script).
|
||||
DISTILL_PROMPT_FILE=$(mktemp)
|
||||
trap 'rm -f "$DISTILL_PROMPT_FILE"' EXIT
|
||||
cat > "$DISTILL_PROMPT_FILE" <<'PROMPT'
|
||||
You are gstack dream-cycle distiller. Below are free-text responses the
|
||||
user typed into AskUserQuestion prompts (option "Other") across recent gstack
|
||||
sessions. For each response, extract structured signal that should update the
|
||||
user plan-tune profile or preferences.
|
||||
|
||||
Return strict JSON with this shape:
|
||||
{
|
||||
"proposals": [
|
||||
{
|
||||
"kind": "preference" | "declared-nudge" | "memory-nugget",
|
||||
"confidence": 0.0-1.0,
|
||||
"source_quotes": ["<verbatim quote 1>", "<verbatim quote 2>"],
|
||||
"question_id": "<id>",
|
||||
"preference": "never-ask" | "always-ask" | "ask-only-for-one-way",
|
||||
"dimension": "scope_appetite | risk_tolerance | detail_preference | autonomy | architecture_care",
|
||||
"direction": "up | down",
|
||||
"magnitude": "small | medium | large",
|
||||
"rationale": "<one sentence>",
|
||||
"nugget": "<one-line memory>",
|
||||
"applies_to_signal_keys": ["scope-appetite", "..."]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Reject any proposal where confidence < 0.7.
|
||||
- Quote VERBATIM from the user free_text. Never paraphrase a source quote.
|
||||
- A single user response may produce multiple proposals.
|
||||
- If nothing meaningful to extract, return {"proposals": []}.
|
||||
- No commentary outside the JSON.
|
||||
PROMPT
|
||||
DISTILL_PROMPT=$(cat "$DISTILL_PROMPT_FILE")
|
||||
|
||||
# --- Dry-run: emit prompt + events, exit ------------------------------
|
||||
|
||||
if [ "$MODE" = "dry-run" ]; then
|
||||
echo "=== DISTILL PROMPT ==="
|
||||
echo "$DISTILL_PROMPT"
|
||||
echo
|
||||
echo "=== EVENTS ($EVENT_COUNT) ==="
|
||||
echo "$EVENTS_JSON" | bun -e 'console.log(JSON.stringify(JSON.parse(await Bun.stdin.text()), null, 2));'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- SDK call: fail-loud on missing key -------------------------------
|
||||
|
||||
if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
|
||||
cat <<EOF >&2
|
||||
gstack-distill-free-text: ANTHROPIC_API_KEY not set.
|
||||
|
||||
Dream-cycle distillation needs an API key for the SDK call. Set
|
||||
ANTHROPIC_API_KEY in your environment, or run with --dry-run to see
|
||||
what would be sent without actually calling.
|
||||
|
||||
Note: this is a separate billing/auth surface from your interactive
|
||||
Claude Code session (per Codex correction in D6).
|
||||
EOF
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Run the SDK call in bun. Emits JSON: {proposals_count, cost_usd_est}.
|
||||
RESULT=$(EVENTS_JSON="$EVENTS_JSON" DISTILL_PROMPT="$DISTILL_PROMPT" \
|
||||
PROPOSAL_FILE_PATH="$PROPOSAL_FILE" LOG_FILE_PATH="$LOG_FILE" \
|
||||
ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
|
||||
bun --cwd "$ROOT_DIR" -e '
|
||||
const fs = require("fs");
|
||||
const Anthropic = require("@anthropic-ai/sdk").default;
|
||||
const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
|
||||
|
||||
const events = JSON.parse(process.env.EVENTS_JSON);
|
||||
const prompt = process.env.DISTILL_PROMPT + "\n\nFREE-TEXT RESPONSES (JSON array):\n" + JSON.stringify(events, null, 2);
|
||||
|
||||
// Pricing (Haiku 4.5 — cheap, fast, sufficient for structured extraction).
|
||||
// Per token, USD: input $0.001/1k = 1e-6, output $0.005/1k = 5e-6.
|
||||
const INPUT_PER_TOKEN = 1e-6;
|
||||
const OUTPUT_PER_TOKEN = 5e-6;
|
||||
|
||||
const resp = await client.messages.create({
|
||||
model: "claude-haiku-4-5-20251001",
|
||||
max_tokens: 4096,
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
});
|
||||
|
||||
const text = resp.content.map((b) => (b.type === "text" ? b.text : "")).join("");
|
||||
|
||||
// Strip optional fenced code blocks the model may wrap JSON in.
|
||||
const stripped = text.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim();
|
||||
let parsed;
|
||||
try { parsed = JSON.parse(stripped); } catch (e) {
|
||||
process.stderr.write("DISTILL: model returned non-JSON: " + text.slice(0, 200) + "\n");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const proposals = Array.isArray(parsed.proposals) ? parsed.proposals : [];
|
||||
// Keep only proposals with confidence >= 0.7 (model is told this rule;
|
||||
// double-check in case it slipped).
|
||||
const filtered = proposals.filter((p) => typeof p.confidence === "number" && p.confidence >= 0.7);
|
||||
|
||||
// Write proposals file (overwrite — only the latest run is reviewable).
|
||||
fs.writeFileSync(process.env.PROPOSAL_FILE_PATH, JSON.stringify({
|
||||
generated_at: new Date().toISOString(),
|
||||
source_event_count: events.length,
|
||||
proposals: filtered,
|
||||
}, null, 2));
|
||||
|
||||
// Mark source events as distilled_at so they do not re-propose.
|
||||
// Update question-log.jsonl in place: read all, rewrite with distilled_at
|
||||
// set on the matching events. Match by ts + question_id.
|
||||
const logPath = process.env.LOG_FILE_PATH;
|
||||
const distilledAt = new Date().toISOString();
|
||||
const matchKeys = new Set(events.map((e) => (e.ts || "") + "::" + (e.question_id || "")));
|
||||
const lines = fs.readFileSync(logPath, "utf-8").split("\n");
|
||||
const out = [];
|
||||
for (const ln of lines) {
|
||||
if (!ln.trim()) { out.push(ln); continue; }
|
||||
try {
|
||||
const e = JSON.parse(ln);
|
||||
const key = (e.ts || "") + "::" + (e.question_id || "");
|
||||
if (matchKeys.has(key)) {
|
||||
e.distilled_at = distilledAt;
|
||||
out.push(JSON.stringify(e));
|
||||
} else {
|
||||
out.push(ln);
|
||||
}
|
||||
} catch { out.push(ln); }
|
||||
}
|
||||
fs.writeFileSync(logPath, out.join("\n"));
|
||||
|
||||
// Cost estimate from usage tokens.
|
||||
const usage = resp.usage || {};
|
||||
const inTok = usage.input_tokens || 0;
|
||||
const outTok = usage.output_tokens || 0;
|
||||
const cost = inTok * INPUT_PER_TOKEN + outTok * OUTPUT_PER_TOKEN;
|
||||
|
||||
process.stdout.write(JSON.stringify({
|
||||
proposals_count: filtered.length,
|
||||
rejected_low_confidence: proposals.length - filtered.length,
|
||||
input_tokens: inTok,
|
||||
output_tokens: outTok,
|
||||
cost_usd_est: cost,
|
||||
}));
|
||||
')
|
||||
|
||||
# Append cost log line.
|
||||
TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
||||
echo "{\"ts\":\"$TS\",\"slug\":\"$SLUG\",$(echo "$RESULT" | sed 's/^{//; s/}$//')}" >> "$COST_LOG"
|
||||
|
||||
echo "DISTILL_COMPLETE:"
|
||||
echo " proposals_file: $PROPOSAL_FILE"
|
||||
echo " $RESULT"
|
||||
@@ -18,7 +18,8 @@
|
||||
* "gstack_brain_sync_mode": "off"|"artifacts-only"|"full",
|
||||
* "gstack_brain_git": true|false,
|
||||
* "gstack_artifacts_remote": "https://..." | "",
|
||||
* "gbrain_local_status": "ok"|"no-cli"|"missing-config"|"broken-config"|"broken-db"
|
||||
* "gbrain_local_status": "ok"|"no-cli"|"missing-config"|"broken-config"|"broken-db",
|
||||
* "gbrain_pooler_mode": "transaction"|"session"|null
|
||||
* }
|
||||
*
|
||||
* Backward compatibility (per plan codex #5): the 9 pre-existing fields stay
|
||||
@@ -42,6 +43,7 @@ import {
|
||||
resolveGbrainBin,
|
||||
readGbrainVersion,
|
||||
} from "../lib/gbrain-local-status";
|
||||
import { isTransactionModePooler } from "../lib/gbrain-exec";
|
||||
|
||||
const STATE_DIR = process.env.GSTACK_HOME || join(userHome(), ".gstack");
|
||||
const SCRIPT_DIR = __dirname;
|
||||
@@ -98,6 +100,17 @@ function detectConfig(): { exists: boolean; engine: "pglite" | "postgres" | null
|
||||
return { exists: true, engine: null };
|
||||
}
|
||||
|
||||
// --- pooler mode detection (#1435) ---
|
||||
//
|
||||
// Reads DATABASE_URL from ~/.gbrain/config.json and checks whether it targets
|
||||
// a PgBouncer transaction-mode pooler (port 6543). Surfaced so /sync-gbrain
|
||||
// and /setup-gbrain can advise users when search may require GBRAIN_PREPARE.
|
||||
function detectPoolerMode(): "transaction" | "session" | "unknown" | null {
|
||||
const parsed = tryReadJSON(GBRAIN_CONFIG) as { database_url?: string } | null;
|
||||
if (!parsed?.database_url) return null;
|
||||
return isTransactionModePooler(parsed.database_url) ? "transaction" : "session";
|
||||
}
|
||||
|
||||
// --- gbrain doctor health (any nonzero exit or non-"ok"/"warnings" status → false) ---
|
||||
//
|
||||
// Uses --fast to avoid hanging on a dead DB. Per the local-status classifier
|
||||
@@ -215,9 +228,20 @@ function main(): void {
|
||||
gstack_brain_git: detectBrainGit(),
|
||||
gstack_artifacts_remote: detectArtifactsRemote(),
|
||||
gbrain_local_status: localEngineStatus({ noCache }),
|
||||
gbrain_pooler_mode: detectPoolerMode(),
|
||||
};
|
||||
|
||||
process.stdout.write(JSON.stringify(out, null, 2) + "\n");
|
||||
}
|
||||
|
||||
// --is-ok: live engine-status gate. Exits 0 iff gbrain is usable ("ok"), 1
|
||||
// otherwise. Runs detection live (never reads the possibly-stale
|
||||
// gbrain-detection.json), so callers — setup, bin/dev-setup, and
|
||||
// `gstack-config gbrain-refresh` — can decide whether to render the gbrain
|
||||
// :user variant without duplicating the JSON grep. Prints nothing on stdout.
|
||||
if (process.argv.includes("--is-ok")) {
|
||||
const noCache = process.env.GSTACK_DETECT_NO_CACHE === "1";
|
||||
process.exit(localEngineStatus({ noCache }) === "ok" ? 0 : 1);
|
||||
}
|
||||
|
||||
main();
|
||||
|
||||
+110
-11
@@ -19,9 +19,14 @@
|
||||
# - git
|
||||
# - network reachability to https://github.com
|
||||
#
|
||||
# The pinned commit is declared here rather than resolved dynamically so
|
||||
# upgrades are explicit and reviewable. Update PINNED_COMMIT when gstack
|
||||
# verifies compatibility with a new gbrain release.
|
||||
# gbrain installs at the latest default-branch HEAD by default — the hard pin
|
||||
# was removed in #1744 (it had drifted ~23 versions behind). Pass
|
||||
# --pinned-commit <sha> to install a specific commit for reproducibility. A
|
||||
# minimum-version floor (MIN_GBRAIN_VERSION) hard-fails the install when the
|
||||
# resulting gbrain is too old for gstack's sync integration, and a fast
|
||||
# `gbrain doctor` self-test hard-fails a broken install when gbrain is already
|
||||
# configured. This keeps the version gate that the pin used to provide without
|
||||
# freezing users 23 releases behind.
|
||||
#
|
||||
# Env:
|
||||
# GBRAIN_INSTALL_DIR — override default install path (~/gbrain)
|
||||
@@ -33,8 +38,14 @@
|
||||
set -euo pipefail
|
||||
|
||||
# --- defaults ---
|
||||
PINNED_COMMIT="08b3698e90532b7b66c445e6b1d8cdfe71822802" # gbrain v0.18.2
|
||||
PINNED_TAG="v0.18.2"
|
||||
# No version pin by default — install the latest default-branch HEAD (#1744).
|
||||
# --pinned-commit <sha> overrides for reproducibility.
|
||||
PINNED_COMMIT=""
|
||||
PINNED_TAG=""
|
||||
# Minimum gbrain version gstack's integration is known to work with. The
|
||||
# `sources list --json` wrapped-object shape + federated sources landed by 0.20;
|
||||
# older predates the surface gstack drives. Hard-fail below this floor (#1744).
|
||||
MIN_GBRAIN_VERSION="0.20.0"
|
||||
GBRAIN_REPO_URL="https://github.com/garrytan/gbrain.git"
|
||||
DEFAULT_INSTALL_DIR="${GBRAIN_INSTALL_DIR:-$HOME/gbrain}"
|
||||
INSTALL_DIR="$DEFAULT_INSTALL_DIR"
|
||||
@@ -113,7 +124,7 @@ elif [ -n "$DETECTED_CLONE" ]; then
|
||||
else
|
||||
# Fresh clone path.
|
||||
if $DRY_RUN; then
|
||||
log "DRY RUN: would clone $GBRAIN_REPO_URL @ $PINNED_COMMIT → $INSTALL_DIR"
|
||||
log "DRY RUN: would clone $GBRAIN_REPO_URL ${PINNED_COMMIT:+@ $PINNED_COMMIT }→ $INSTALL_DIR (latest HEAD unless --pinned-commit)"
|
||||
exit 0
|
||||
fi
|
||||
if [ -d "$INSTALL_DIR" ]; then
|
||||
@@ -121,8 +132,12 @@ else
|
||||
fi
|
||||
log "cloning $GBRAIN_REPO_URL → $INSTALL_DIR"
|
||||
git clone --quiet "$GBRAIN_REPO_URL" "$INSTALL_DIR"
|
||||
( cd "$INSTALL_DIR" && git checkout --quiet "$PINNED_COMMIT" )
|
||||
log "pinned to $PINNED_COMMIT${PINNED_TAG:+ ($PINNED_TAG)}"
|
||||
if [ -n "$PINNED_COMMIT" ]; then
|
||||
( cd "$INSTALL_DIR" && git checkout --quiet "$PINNED_COMMIT" )
|
||||
log "checked out pinned commit $PINNED_COMMIT${PINNED_TAG:+ ($PINNED_TAG)}"
|
||||
else
|
||||
log "installed latest gbrain (default-branch HEAD)"
|
||||
fi
|
||||
fi
|
||||
|
||||
if $DRY_RUN; then
|
||||
@@ -131,9 +146,24 @@ if $DRY_RUN; then
|
||||
fi
|
||||
|
||||
# --- install + link ---
|
||||
# On Windows MSYS/Cygwin shells, bun's postinstall scripts (notably gbrain's
|
||||
# native-bindings setup) fail to parse path arguments correctly and abort
|
||||
# `bun install` with a non-zero exit. The package itself installs fine
|
||||
# without scripts, so detect Windows and pass --ignore-scripts there. The
|
||||
# `bun link` step below is unaffected.
|
||||
IS_WINDOWS=0
|
||||
case "$(uname -s)" in
|
||||
MINGW*|MSYS*|CYGWIN*|Windows_NT) IS_WINDOWS=1 ;;
|
||||
esac
|
||||
|
||||
if ! $VALIDATE_ONLY; then
|
||||
log "running bun install in $INSTALL_DIR"
|
||||
( cd "$INSTALL_DIR" && bun install --silent )
|
||||
if [ "$IS_WINDOWS" -eq 1 ]; then
|
||||
log "running bun install --ignore-scripts in $INSTALL_DIR (Windows shell detected)"
|
||||
( cd "$INSTALL_DIR" && bun install --silent --ignore-scripts )
|
||||
else
|
||||
log "running bun install in $INSTALL_DIR"
|
||||
( cd "$INSTALL_DIR" && bun install --silent )
|
||||
fi
|
||||
log "running bun link in $INSTALL_DIR"
|
||||
( cd "$INSTALL_DIR" && bun link --silent )
|
||||
fi
|
||||
@@ -179,5 +209,74 @@ if [ "$actual_norm" != "$expected_norm" ]; then
|
||||
fi
|
||||
|
||||
log "installed gbrain $actual_version from $INSTALL_DIR"
|
||||
|
||||
# --- minimum-version floor (#1744) ---
|
||||
# Unpinning means new installs track gbrain HEAD. Hard-fail if the resulting
|
||||
# version is below the floor gstack's sync integration needs — same exit-3 posture
|
||||
# as the PATH-shadow / version-mismatch failures above. A warning here is exactly
|
||||
# how the data-loss class slipped through, so this gate fails closed.
|
||||
version_lt() {
|
||||
# 0 (true) when $1 < $2 by version sort; equal versions are NOT less-than.
|
||||
[ "$1" = "$2" ] && return 1
|
||||
[ "$(printf '%s\n%s\n' "$1" "$2" | sort -V | head -1)" = "$1" ]
|
||||
}
|
||||
if version_lt "$actual_norm" "$MIN_GBRAIN_VERSION"; then
|
||||
echo "" >&2
|
||||
echo "gstack-gbrain-install: gbrain $actual_version is below the minimum gstack-tested version ($MIN_GBRAIN_VERSION)." >&2
|
||||
echo " gstack's sync integration needs the v0.20+ source/list surface." >&2
|
||||
echo " Fix: update the gbrain clone at $INSTALL_DIR to a newer release (git pull), then" >&2
|
||||
echo " re-run /setup-gbrain. Or pass --pinned-commit <sha> to install a specific newer commit." >&2
|
||||
echo "" >&2
|
||||
exit 3
|
||||
fi
|
||||
|
||||
# --- functional self-test when gbrain is already configured (#1744) ---
|
||||
# When a brain config exists (re-install / detected clone), run a fast doctor as
|
||||
# a hard gate so a broken gbrain is caught at setup, not at data-loss time.
|
||||
# Pre-init installs skip this (config not written yet); the full
|
||||
# `/sync-gbrain --dry-run` self-test runs from /setup-gbrain after `gbrain init`.
|
||||
_GBRAIN_HOME_CHECK="${GBRAIN_HOME:-$HOME/.gbrain}"
|
||||
if [ -f "$_GBRAIN_HOME_CHECK/config.json" ]; then
|
||||
if ! gbrain doctor --fast >/dev/null 2>&1; then
|
||||
echo "" >&2
|
||||
echo "gstack-gbrain-install: gbrain $actual_version installed but 'gbrain doctor --fast' failed." >&2
|
||||
echo " Refusing to leave a broken gbrain in place. Run 'gbrain doctor' to see what's wrong," >&2
|
||||
echo " fix it, then re-run /setup-gbrain." >&2
|
||||
echo "" >&2
|
||||
exit 3
|
||||
fi
|
||||
log "gbrain doctor --fast passed"
|
||||
fi
|
||||
|
||||
# v1.40.0.0 post-install validation (T6 / codex review #19): --ignore-scripts
|
||||
# may skip artifacts gbrain needs at runtime, especially on Windows
|
||||
# MSYS/MINGW where we DID pass --ignore-scripts. `gbrain --version` above
|
||||
# already confirmed the binary runs; this second probe checks that the
|
||||
# subcommand surface is reachable (`sources` is the entry point the sync
|
||||
# stage hits first). If the probe fails, we warn but don't exit non-zero —
|
||||
# the user may still be able to use other commands.
|
||||
if ! gbrain sources --help >/dev/null 2>&1; then
|
||||
echo "" >&2
|
||||
echo "gstack-gbrain-install: WARNING — gbrain installed but 'gbrain sources --help' did not exit 0." >&2
|
||||
if [ "$IS_WINDOWS" -eq 1 ]; then
|
||||
echo " Windows shells skip bun postinstall scripts; some gbrain features may need native build tools." >&2
|
||||
echo " If /sync-gbrain fails to find subcommands, install gbrain from a non-MSYS shell," >&2
|
||||
echo " or run: cd $INSTALL_DIR && bun install (without --ignore-scripts)" >&2
|
||||
else
|
||||
echo " This may be a transient gbrain CLI issue or a missing native dependency." >&2
|
||||
echo " If /sync-gbrain fails, re-run: cd $INSTALL_DIR && bun install" >&2
|
||||
fi
|
||||
echo "" >&2
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Next: gbrain init --pglite (or run /setup-gbrain for the full setup flow)"
|
||||
if [ -n "${VOYAGE_API_KEY:-}" ]; then
|
||||
echo "Next: gbrain init --pglite --embedding-model voyage:voyage-code-3 --embedding-dimensions 1024"
|
||||
echo " (or run /setup-gbrain for the full setup flow)"
|
||||
else
|
||||
echo "Next: gbrain init --pglite (or run /setup-gbrain for the full setup flow)"
|
||||
echo ""
|
||||
echo "Tip: set VOYAGE_API_KEY before init to use voyage-code-3 (best embedding"
|
||||
echo "model for code retrieval on Voyage). Without it, gbrain falls back to its"
|
||||
echo "auto-selected provider (OpenAI when OPENAI_API_KEY is set, etc.)."
|
||||
fi
|
||||
|
||||
@@ -27,8 +27,22 @@
|
||||
# restore), D16 (pooler URL paste hygiene with redacted preview).
|
||||
|
||||
# _gstack_gbrain_validate_varname <name> — returns 0 if usable, 2 otherwise.
|
||||
# `local LC_ALL=C` is load-bearing twice over:
|
||||
# 1. In many macOS shells the default locale (e.g. en_US.UTF-8) makes `case`
|
||||
# glob brackets like `[A-Z]` match lowercase letters too. Without the
|
||||
# LC_ALL=C pin, names like `lower-case` pass validation and then trip
|
||||
# `printf -v "$varname"` and `export "$varname"` with "not a valid
|
||||
# identifier" errors the caller can't easily distinguish from other
|
||||
# failures.
|
||||
# 2. `local` is required because this file is documented as a sourced helper
|
||||
# (see header), so a bare `LC_ALL=C` would mutate the caller's locale for
|
||||
# the rest of the process — silently affecting downstream `sort`, `tr`,
|
||||
# and any locale-aware glob in the same shell.
|
||||
# Together they give ASCII-only bracket semantics on both macOS and Linux
|
||||
# (matching the documented `[A-Z_][A-Z0-9_]*` contract) without leaking.
|
||||
_gstack_gbrain_validate_varname() {
|
||||
local name="$1"
|
||||
local LC_ALL=C
|
||||
case "$name" in
|
||||
[A-Z_][A-Z0-9_]*) return 0 ;;
|
||||
*) return 2 ;;
|
||||
|
||||
@@ -339,7 +339,7 @@ cmd_pooler_url() {
|
||||
# Prefer the singular Session Pooler config when Supabase returns an
|
||||
# array (response shape can vary by project state). Fall back to the
|
||||
# first PRIMARY entry if no "session" pool_mode is present.
|
||||
local db_user db_host db_port db_name
|
||||
local db_user db_host db_port db_name pool_mode
|
||||
local first_or_session
|
||||
if printf '%s' "$resp" | jq -e 'type == "array"' >/dev/null 2>&1; then
|
||||
first_or_session=$(printf '%s' "$resp" | jq '[.[] | select(.pool_mode == "session")][0] // .[0]')
|
||||
@@ -351,11 +351,27 @@ cmd_pooler_url() {
|
||||
db_host=$(printf '%s' "$first_or_session" | jq -r '.db_host // empty')
|
||||
db_port=$(printf '%s' "$first_or_session" | jq -r '.db_port // empty')
|
||||
db_name=$(printf '%s' "$first_or_session" | jq -r '.db_name // empty')
|
||||
pool_mode=$(printf '%s' "$first_or_session" | jq -r '.pool_mode // empty')
|
||||
|
||||
if [ -z "$db_user" ] || [ -z "$db_host" ] || [ -z "$db_port" ] || [ -z "$db_name" ]; then
|
||||
die "pooler-url: missing pooler config fields (db_user/db_host/db_port/db_name); re-poll or check project state"
|
||||
fi
|
||||
|
||||
# Issue #1301: New Supabase projects' Management API returns a single
|
||||
# transaction-mode pooler at port 6543, but the shared pooler tenant
|
||||
# for fresh projects only listens on the session port 5432. Trusting
|
||||
# db_port verbatim makes `gbrain init` hang to TCP timeout (transaction
|
||||
# port unreachable) before falling into "tenant not found"-style errors
|
||||
# that look like auth bugs. Rewrite transaction/6543 -> session/5432.
|
||||
# Override with GSTACK_SUPABASE_TRUST_API_PORT=1 if a future API version
|
||||
# starts returning a working transaction port and this rewrite is wrong.
|
||||
if [ "${GSTACK_SUPABASE_TRUST_API_PORT:-0}" != "1" ] \
|
||||
&& [ "$pool_mode" = "transaction" ] && [ "$db_port" = "6543" ]; then
|
||||
echo "pooler-url: API returned transaction pooler (port 6543); shared pooler for new projects listens on session port 5432 — rewriting (set GSTACK_SUPABASE_TRUST_API_PORT=1 to disable)" >&2
|
||||
db_port=5432
|
||||
pool_mode="session"
|
||||
fi
|
||||
|
||||
local url="postgresql://${db_user}:${DB_PASS}@${db_host}:${db_port}/${db_name}"
|
||||
|
||||
if $json_mode; then
|
||||
|
||||
+948
-70
File diff suppressed because it is too large
Load Diff
@@ -273,16 +273,23 @@ function resolveClaudeCodeCwd(
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractCwdFromJsonl(filePath: string): string | null {
|
||||
export function extractCwdFromJsonl(filePath: string): string | null {
|
||||
// Read a capped prefix so huge JSONL files don't blow up memory. 64KB
|
||||
// comfortably fits the largest observed session headers; the old 8KB cap
|
||||
// would sometimes fall inside a single long line and silently drop the
|
||||
// project (JSON.parse failure on the truncated tail).
|
||||
const MAX_BYTES = 64 * 1024;
|
||||
const MAX_LINES = 30;
|
||||
try {
|
||||
// Read only the first 8KB to avoid loading huge JSONL files into memory
|
||||
const fd = openSync(filePath, "r");
|
||||
const buf = Buffer.alloc(8192);
|
||||
const bytesRead = readSync(fd, buf, 0, 8192, 0);
|
||||
const buf = Buffer.alloc(MAX_BYTES);
|
||||
const bytesRead = readSync(fd, buf, 0, MAX_BYTES, 0);
|
||||
closeSync(fd);
|
||||
const text = buf.toString("utf-8", 0, bytesRead);
|
||||
const lines = text.split("\n").slice(0, 15);
|
||||
for (const line of lines) {
|
||||
// Drop the final segment — it may be an incomplete line at the cap boundary.
|
||||
const parts = text.split("\n");
|
||||
const completeLines = parts.length > 1 ? parts.slice(0, -1) : parts;
|
||||
for (const line of completeLines.slice(0, MAX_LINES)) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
const obj = JSON.parse(line);
|
||||
|
||||
Executable
+39
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env bash
|
||||
# gstack-ios-qa-daemon — Mac-side daemon that brokers tailnet/loopback traffic
|
||||
# to a connected iPhone running the in-app StateServer over the CoreDevice USB
|
||||
# tunnel. Single-instance via flock on ~/.gstack/ios-qa-daemon.pid.
|
||||
#
|
||||
# Usage:
|
||||
# gstack-ios-qa-daemon # loopback-only (local USB)
|
||||
# gstack-ios-qa-daemon --tailnet # additionally open tailnet listener
|
||||
#
|
||||
# Environment:
|
||||
# GSTACK_IOS_DAEMON_PORT — loopback listener port (default 9099)
|
||||
# GSTACK_IOS_TARGET_UDID — target iOS device UDID (optional; otherwise
|
||||
# the first paired connected device is used)
|
||||
# GSTACK_IOS_TARGET_BUNDLE_ID — bundle ID of the iOS app hosting StateServer
|
||||
# (default com.gstack.iosqa.fixture)
|
||||
#
|
||||
# Readiness protocol: prints `READY: port=<n> pid=<pid>` to stdout once both
|
||||
# listeners are bound. Spawners read stdin with a ~5s timeout to confirm.
|
||||
#
|
||||
# Exits cleanly when no active loopback clients are connected AND no remote
|
||||
# session tokens are outstanding.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
GSTACK_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
ENTRY="$GSTACK_DIR/ios-qa/daemon/src/index.ts"
|
||||
|
||||
if [ ! -f "$ENTRY" ]; then
|
||||
echo "gstack-ios-qa-daemon: missing $ENTRY (gstack install incomplete?)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v bun >/dev/null 2>&1; then
|
||||
echo "gstack-ios-qa-daemon: bun runtime not on PATH — install from https://bun.sh" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exec bun run "$ENTRY" "$@"
|
||||
Executable
+28
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env bash
|
||||
# gstack-ios-qa-mint — manage the tailnet allowlist for remote iOS QA agents.
|
||||
#
|
||||
# This is the owner-grant path: it writes identities into the local allowlist
|
||||
# so a remote agent on the tailnet can self-service mint a session token via
|
||||
# POST /auth/mint against the daemon.
|
||||
#
|
||||
# Run `gstack-ios-qa-mint --help` for full usage.
|
||||
#
|
||||
# Allowlist file: ~/.gstack/ios-qa-allowlist.json (mode 0600).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
GSTACK_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
ENTRY="$GSTACK_DIR/ios-qa/daemon/src/cli-mint.ts"
|
||||
|
||||
if [ ! -f "$ENTRY" ]; then
|
||||
echo "gstack-ios-qa-mint: missing $ENTRY (gstack install incomplete?)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v bun >/dev/null 2>&1; then
|
||||
echo "gstack-ios-qa-mint: bun runtime not on PATH — install from https://bun.sh" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exec bun run "$ENTRY" "$@"
|
||||
+10
-3
@@ -53,18 +53,25 @@ for path in paths:
|
||||
continue
|
||||
if line in seen:
|
||||
continue
|
||||
# Prefer ISO ts field for sort; fall back to SHA-256.
|
||||
# Prefer ISO ts field for sort; fall back to SHA-256. The line
|
||||
# content is the final tiebreaker so the order is total: two
|
||||
# entries sharing a ts must resolve identically regardless of
|
||||
# which side they arrive on. Without it, equal-ts entries fall
|
||||
# back to insertion order (base, ours, theirs), and since ours
|
||||
# and theirs are swapped depending on which machine runs the
|
||||
# merge, the two sides produce divergent files that never
|
||||
# converge.
|
||||
sort_key = None
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
ts = obj.get('ts') or obj.get('timestamp')
|
||||
if isinstance(ts, str):
|
||||
sort_key = (0, ts)
|
||||
sort_key = (0, ts, line)
|
||||
except (json.JSONDecodeError, ValueError, TypeError):
|
||||
pass
|
||||
if sort_key is None:
|
||||
h = hashlib.sha256(line.encode('utf-8')).hexdigest()
|
||||
sort_key = (1, h)
|
||||
sort_key = (1, h, line)
|
||||
seen[line] = sort_key
|
||||
except FileNotFoundError:
|
||||
# Absent base / absent ours / absent theirs are all valid.
|
||||
|
||||
@@ -15,6 +15,7 @@ INPUT="$1"
|
||||
|
||||
# Validate and sanitize input
|
||||
VALIDATED=$(printf '%s' "$INPUT" | bun -e "
|
||||
import { hasInjection } from '$SCRIPT_DIR/../lib/jsonl-store.ts';
|
||||
const raw = await Bun.stdin.text();
|
||||
let j;
|
||||
try { j = JSON.parse(raw); } catch { process.stderr.write('gstack-learnings-log: invalid JSON, skipping\n'); process.exit(1); }
|
||||
@@ -47,27 +48,11 @@ if (j.source && !ALLOWED_SOURCES.includes(j.source)) {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Content sanitization: strip instruction-like patterns from insight field
|
||||
// These patterns could be used for prompt injection when learnings are loaded into agent context
|
||||
if (j.insight) {
|
||||
const INJECTION_PATTERNS = [
|
||||
/ignore\s+(all\s+)?previous\s+(instructions|context|rules)/i,
|
||||
/you\s+are\s+now\s+/i,
|
||||
/always\s+output\s+no\s+findings/i,
|
||||
/skip\s+(all\s+)?(security|review|checks)/i,
|
||||
/override[:\s]/i,
|
||||
/\bsystem\s*:/i,
|
||||
/\bassistant\s*:/i,
|
||||
/\buser\s*:/i,
|
||||
/do\s+not\s+(report|flag|mention)/i,
|
||||
/approve\s+(all|every|this)/i,
|
||||
];
|
||||
for (const pat of INJECTION_PATTERNS) {
|
||||
if (pat.test(j.insight)) {
|
||||
process.stderr.write('gstack-learnings-log: insight contains suspicious instruction-like content, rejected\n');
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
// Content sanitization: shared injection patterns (lib/jsonl-store.ts, D2A) —
|
||||
// one audited list across learnings + decisions, no drift.
|
||||
if (j.insight && hasInjection(j.insight)) {
|
||||
process.stderr.write('gstack-learnings-log: insight contains suspicious instruction-like content, rejected\n');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Inject timestamp if not present
|
||||
|
||||
+39
-15
@@ -27,35 +27,53 @@ done
|
||||
|
||||
LEARNINGS_FILE="$GSTACK_HOME/projects/$SLUG/learnings.jsonl"
|
||||
|
||||
# Collect all JSONL files to search
|
||||
FILES=()
|
||||
[ -f "$LEARNINGS_FILE" ] && FILES+=("$LEARNINGS_FILE")
|
||||
# Collect cross-project JSONL files separately so the trust gate can distinguish
|
||||
# current-project rows from rows loaded from other projects.
|
||||
CROSS_FILES=()
|
||||
|
||||
if [ "$CROSS_PROJECT" = true ]; then
|
||||
# Add other projects' learnings (max 5, sorted by mtime)
|
||||
for f in $(find "$GSTACK_HOME/projects" -name "learnings.jsonl" -not -path "*/$SLUG/*" 2>/dev/null | head -5); do
|
||||
FILES+=("$f")
|
||||
done
|
||||
# Add other projects' learnings (max 5)
|
||||
while IFS= read -r f; do
|
||||
CROSS_FILES+=("$f")
|
||||
[ ${#CROSS_FILES[@]} -ge 5 ] && break
|
||||
done < <(find "$GSTACK_HOME/projects" -name "learnings.jsonl" -not -path "*/$SLUG/*" 2>/dev/null)
|
||||
fi
|
||||
|
||||
if [ ${#FILES[@]} -eq 0 ]; then
|
||||
if [ ! -f "$LEARNINGS_FILE" ] && [ ${#CROSS_FILES[@]} -eq 0 ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
emit_tagged_file() {
|
||||
local tag="$1"
|
||||
local file="$2"
|
||||
local line
|
||||
while IFS= read -r line || [ -n "$line" ]; do
|
||||
[ -n "$line" ] && printf '%s\t%s\n' "$tag" "$line"
|
||||
done < "$file"
|
||||
}
|
||||
|
||||
# Process all files through bun for JSON parsing, decay, dedup, filtering
|
||||
GSTACK_SEARCH_TYPE="$TYPE" GSTACK_SEARCH_QUERY="$QUERY" GSTACK_SEARCH_LIMIT="$LIMIT" GSTACK_SEARCH_SLUG="$SLUG" GSTACK_SEARCH_CROSS="$CROSS_PROJECT" \
|
||||
cat "${FILES[@]}" 2>/dev/null | GSTACK_SEARCH_TYPE="$TYPE" GSTACK_SEARCH_QUERY="$QUERY" GSTACK_SEARCH_LIMIT="$LIMIT" GSTACK_SEARCH_SLUG="$SLUG" GSTACK_SEARCH_CROSS="$CROSS_PROJECT" bun -e "
|
||||
{
|
||||
[ -f "$LEARNINGS_FILE" ] && emit_tagged_file current "$LEARNINGS_FILE"
|
||||
if [ ${#CROSS_FILES[@]} -gt 0 ]; then
|
||||
for f in "${CROSS_FILES[@]}"; do
|
||||
emit_tagged_file cross "$f"
|
||||
done
|
||||
fi
|
||||
} | GSTACK_SEARCH_TYPE="$TYPE" GSTACK_SEARCH_QUERY="$QUERY" GSTACK_SEARCH_LIMIT="$LIMIT" GSTACK_SEARCH_CROSS="$CROSS_PROJECT" bun -e "
|
||||
const lines = (await Bun.stdin.text()).trim().split('\n').filter(Boolean);
|
||||
const now = Date.now();
|
||||
const type = process.env.GSTACK_SEARCH_TYPE || '';
|
||||
const queryRaw = (process.env.GSTACK_SEARCH_QUERY || '').toLowerCase();
|
||||
const queryTokens = queryRaw.split(/\s+/).filter(Boolean);
|
||||
const limit = parseInt(process.env.GSTACK_SEARCH_LIMIT || '10', 10);
|
||||
const slug = process.env.GSTACK_SEARCH_SLUG || '';
|
||||
|
||||
const entries = [];
|
||||
for (const line of lines) {
|
||||
for (const taggedLine of lines) {
|
||||
try {
|
||||
const tabIndex = taggedLine.indexOf('\t');
|
||||
const sourceTag = tabIndex === -1 ? 'current' : taggedLine.slice(0, tabIndex);
|
||||
const line = tabIndex === -1 ? taggedLine : taggedLine.slice(tabIndex + 1);
|
||||
const e = JSON.parse(line);
|
||||
if (!e.key || !e.type) continue;
|
||||
|
||||
@@ -69,13 +87,19 @@ for (const line of lines) {
|
||||
|
||||
// Determine if this is from the current project or cross-project
|
||||
// Cross-project entries are tagged for display
|
||||
const isCrossProject = !line.includes(slug) && process.env.GSTACK_SEARCH_CROSS === 'true';
|
||||
const isCrossProject = sourceTag === 'cross';
|
||||
e._crossProject = isCrossProject;
|
||||
|
||||
// Trust gate: cross-project learnings only loaded if trusted (user-stated)
|
||||
// Trust gate: cross-project learnings only loaded if trusted (user-stated).
|
||||
// This prevents prompt injection from one project's AI-generated learnings
|
||||
// silently influencing reviews in another project.
|
||||
if (isCrossProject && e.trusted === false) continue;
|
||||
// #1745: this is an ALLOWLIST, not a denylist. The old equals-false check
|
||||
// admitted any row where trusted is missing/undefined (legacy rows written
|
||||
// before the field existed, hand-edited rows, rows from other tools).
|
||||
// Require trusted to be exactly true. NOTE: this whole block is a
|
||||
// double-quoted bun -e string, so bash still does command substitution
|
||||
// inside it. Keep backticks and dollar-paren out of these comments.
|
||||
if (isCrossProject && e.trusted !== true) continue;
|
||||
|
||||
entries.push(e);
|
||||
} catch {}
|
||||
|
||||
+228
-39
@@ -54,7 +54,7 @@ import {
|
||||
rmSync,
|
||||
} from "fs";
|
||||
import { join, basename, dirname } from "path";
|
||||
import { execSync, execFileSync, spawnSync, spawn, type ChildProcess } from "child_process";
|
||||
import { execFileSync, spawnSync, spawn, type ChildProcess } from "child_process";
|
||||
import { homedir } from "os";
|
||||
import { createHash } from "crypto";
|
||||
|
||||
@@ -64,6 +64,8 @@ import {
|
||||
detectEngineTier,
|
||||
withErrorContext,
|
||||
} from "../lib/gstack-memory-helpers";
|
||||
import { execGbrainText, spawnGbrainAsync } from "../lib/gbrain-exec";
|
||||
import { checkOwnedStagingDir, STAGING_MARKER } from "../lib/staging-guard";
|
||||
|
||||
// ── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -193,7 +195,7 @@ Options:
|
||||
--all-history Walk transcripts older than 90 days too.
|
||||
--sources <list> Comma-separated subset: ${ALL_TYPES.join(",")}
|
||||
--limit <N> Stop after N pages written (smoke testing).
|
||||
--no-write Skip gbrain put_page calls (still updates state file).
|
||||
--no-write Skip gbrain put calls (still updates state file).
|
||||
Used by tests + dry runs without actual ingest.
|
||||
--scan-secrets Opt-in per-file gitleaks scan during prepare. Off by
|
||||
default; gstack-brain-sync already gates the git-push
|
||||
@@ -809,16 +811,14 @@ let _gbrainAvailability: boolean | null = null;
|
||||
function gbrainAvailable(): boolean {
|
||||
if (_gbrainAvailability !== null) return _gbrainAvailability;
|
||||
try {
|
||||
execSync("command -v gbrain", { stdio: "ignore" });
|
||||
// Probe `--help` for the `import` subcommand. gbrain v0.20.0+ ships
|
||||
// `import <dir>` (batch markdown import via path-authoritative slugs).
|
||||
// If absent, we surface a single clean error here rather than failing
|
||||
// the whole stage with a confusing usage message from gbrain itself.
|
||||
const help = execFileSync("gbrain", ["--help"], {
|
||||
encoding: "utf-8",
|
||||
timeout: 5000,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
// `gbrain --help` probes only CLI availability, not DB connectivity, so
|
||||
// it doesn't strictly need DATABASE_URL. But routing through the helper
|
||||
// keeps the invariant test from chasing exceptions per call site.
|
||||
const help = execGbrainText(["--help"], { timeout: 5000 });
|
||||
_gbrainAvailability = /^\s+import\s/m.test(help);
|
||||
} catch {
|
||||
_gbrainAvailability = false;
|
||||
@@ -908,13 +908,23 @@ interface StagingResult {
|
||||
* Filename = `${slug}.md`. mkdir is recursive. Existing files overwrite.
|
||||
* Errors per-file are collected; the whole batch is best-effort.
|
||||
*/
|
||||
/**
|
||||
* Staging-relative path for a prepared page's slug. Single source of truth so
|
||||
* writeStaged() (which mints the map) and the resume-path reconstruction (#1802
|
||||
* C4) compute identical keys — if they diverge, readNewFailures() silently stops
|
||||
* mapping gbrain's failures back to sources and failed files get marked ingested.
|
||||
*/
|
||||
export function stagedRelPath(slug: string): string {
|
||||
return `${slug}.md`;
|
||||
}
|
||||
|
||||
function writeStaged(prepared: PreparedPage[], stagingDir: string): StagingResult {
|
||||
mkdirSync(stagingDir, { recursive: true });
|
||||
const stagedPathToSource = new Map<string, string>();
|
||||
const errors: Array<{ slug: string; error: string }> = [];
|
||||
let written = 0;
|
||||
for (const p of prepared) {
|
||||
const relPath = `${p.slug}.md`;
|
||||
const relPath = stagedRelPath(p.slug);
|
||||
const absPath = join(stagingDir, relPath);
|
||||
try {
|
||||
mkdirSync(dirname(absPath), { recursive: true });
|
||||
@@ -979,7 +989,7 @@ function parseImportJson(stdout: string): ImportJsonResult | null {
|
||||
* staging-dir-relative filename gbrain saw (e.g. "transcripts/foo.md").
|
||||
* stagedPathToSource maps that back to the original source file.
|
||||
*/
|
||||
function readNewFailures(
|
||||
export function readNewFailures(
|
||||
syncFailuresPath: string,
|
||||
preImportOffset: number,
|
||||
stagedPathToSource: Map<string, string>,
|
||||
@@ -1062,7 +1072,7 @@ async function probeMode(args: CliArgs): Promise<ProbeReport> {
|
||||
}
|
||||
|
||||
// Per ED2: ~25-35 min for ~11.7K transcripts = ~150ms/page synchronous
|
||||
// (gitleaks + render + put_page + embedding). Scale linearly.
|
||||
// (gitleaks + render + put + embedding). Scale linearly.
|
||||
const estimateMinutes = Math.max(1, Math.round((newCount + updatedCount) * 0.15 / 60));
|
||||
|
||||
return {
|
||||
@@ -1199,6 +1209,17 @@ function preparePages(
|
||||
function makeStagingDir(): string {
|
||||
const dir = join(GSTACK_HOME, `.staging-ingest-${process.pid}-${Date.now()}`);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
// Mint the ownership marker (#1802) so cleanupStagingDir() and decideResume()
|
||||
// can prove this dir was created by us before any recursive delete or resume.
|
||||
// #1802 C5: fail hard if the marker can't be written — a marker-less dir would
|
||||
// be refused by the guard forever (leaked, never cleaned). Tear down the
|
||||
// partial dir and rethrow so the caller fails loudly instead of leaking.
|
||||
try {
|
||||
writeFileSync(join(dir, STAGING_MARKER), `${process.pid}\n${Date.now()}\n`, "utf-8");
|
||||
} catch (err) {
|
||||
try { rmSync(dir, { recursive: true, force: true }); } catch { /* best-effort */ }
|
||||
throw err;
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
|
||||
@@ -1260,8 +1281,21 @@ function isRemoteHttpMcpMode(): boolean {
|
||||
* cleanup failure.
|
||||
*/
|
||||
function cleanupStagingDir(dir: string): void {
|
||||
// #1802 deletion chokepoint: never recurse-delete a path we cannot PROVE we
|
||||
// own. A poisoned resume could otherwise route the repo root here.
|
||||
const verdict = checkOwnedStagingDir(dir, GSTACK_HOME);
|
||||
if (!verdict.ok) {
|
||||
console.error(
|
||||
`[gbrain] staging cleanup REFUSED: "${dir}" is not an owned staging dir ` +
|
||||
`(${verdict.reason}). Skipping rm -rf to prevent data loss (#1802).`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
// #1802 C5: delete the realpath-resolved dir the guard validated, not the
|
||||
// raw input — closes the TOCTOU gap where `dir` is a symlink swapped between
|
||||
// the check above and this rmSync. canonicalPath is always set when ok.
|
||||
rmSync(verdict.canonicalPath ?? dir, { recursive: true, force: true });
|
||||
} catch {
|
||||
// best-effort
|
||||
}
|
||||
@@ -1273,13 +1307,39 @@ function cleanupStagingDir(dir: string): void {
|
||||
* 1. forward the signal to the child (otherwise gbrain orphans, holds the
|
||||
* PGLite write lock, and burns CPU — observed during 2026-05-10 cold-run
|
||||
* testing)
|
||||
* 2. synchronously clean up the staging dir BEFORE process.exit (otherwise
|
||||
* finally blocks in async callers don't run after process.exit from
|
||||
* inside a signal handler, leaking the staging dir on every interrupt)
|
||||
* 2. PRESERVE the staging dir when gbrain has written an import-checkpoint
|
||||
* pointing at it (the next /sync-gbrain run can resume from
|
||||
* processedIndex+1). Otherwise synchronously clean up before
|
||||
* process.exit, since `finally` blocks in ingestPass never run after
|
||||
* process.exit fires from inside a signal handler.
|
||||
*
|
||||
* Resume semantics added for #1611: prior behavior unconditionally cleaned
|
||||
* up the staging dir on SIGTERM, so the gbrain checkpoint always pointed at
|
||||
* a missing dir and the next run had to restage from scratch.
|
||||
*/
|
||||
let _activeImportChild: ChildProcess | null = null;
|
||||
let _activeStagingDir: string | null = null;
|
||||
let _signalHandlersInstalled = false;
|
||||
|
||||
/**
|
||||
* Returns true if gbrain has written ~/.gbrain/import-checkpoint.json with
|
||||
* `dir` matching the current active staging dir. Indicates the next run
|
||||
* can resume against this staging dir.
|
||||
*/
|
||||
function stagingDirIsCheckpointed(stagingDir: string): boolean {
|
||||
try {
|
||||
// Read HOME from env so tests can redirect; homedir() caches.
|
||||
const home = process.env.HOME || homedir();
|
||||
const cpPath = join(home, ".gbrain", "import-checkpoint.json");
|
||||
if (!existsSync(cpPath)) return false;
|
||||
const raw = readFileSync(cpPath, "utf-8");
|
||||
const cp = JSON.parse(raw) as { dir?: string };
|
||||
return cp.dir === stagingDir;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function installSignalForwarder(): void {
|
||||
if (_signalHandlersInstalled) return;
|
||||
_signalHandlersInstalled = true;
|
||||
@@ -1291,11 +1351,24 @@ function installSignalForwarder(): void {
|
||||
// child may have already exited between the alive-check and the kill
|
||||
}
|
||||
}
|
||||
// Synchronously clean up the active staging dir before exiting. The async
|
||||
// `finally` blocks in ingestPass never run after process.exit fires from
|
||||
// inside this handler, so cleanup has to happen here.
|
||||
if (_activeStagingDir) {
|
||||
cleanupStagingDir(_activeStagingDir);
|
||||
if (stagingDirIsCheckpointed(_activeStagingDir)) {
|
||||
// Preserve for next-run resume. The orchestrator's decideResume()
|
||||
// (in gstack-gbrain-sync.ts) will see the checkpoint + dir and
|
||||
// re-invoke gbrain import against this same staging dir, picking
|
||||
// up from processedIndex+1. See #1611.
|
||||
try {
|
||||
process.stderr.write(
|
||||
`[memory-ingest] ${signal} received — preserving staging dir for resume: ${_activeStagingDir}\n`,
|
||||
);
|
||||
} catch {
|
||||
// best-effort: stderr may be closed already
|
||||
}
|
||||
} else {
|
||||
// No checkpoint pointing here — the import never reached gbrain or
|
||||
// crashed before writing one. Clean up so we don't leak the dir.
|
||||
cleanupStagingDir(_activeStagingDir);
|
||||
}
|
||||
_activeStagingDir = null;
|
||||
}
|
||||
// Re-raise to default action so the parent actually exits. Without this,
|
||||
@@ -1311,17 +1384,39 @@ function installSignalForwarder(): void {
|
||||
* that kill the child on parent SIGTERM/SIGINT. Returns the same shape as
|
||||
* spawnSync's result so the caller doesn't care which mode was used.
|
||||
*/
|
||||
/**
|
||||
* #1611: the `gbrain import` is the long pole on big brains. Its timeout is
|
||||
* configurable via GSTACK_INGEST_TIMEOUT_MS (default 30 min, 1min–24h) so large
|
||||
* memory corpora aren't SIGTERM'd mid-import. On timeout we SIGTERM the child,
|
||||
* which preserves gbrain's import-checkpoint.json (see installSignalForwarder)
|
||||
* so the next run resumes instead of restarting from scratch.
|
||||
*/
|
||||
const DEFAULT_IMPORT_TIMEOUT_MS = 30 * 60 * 1000;
|
||||
export function resolveImportTimeoutMs(
|
||||
raw: string | undefined = process.env.GSTACK_INGEST_TIMEOUT_MS,
|
||||
): number {
|
||||
if (raw === undefined || raw === "") return DEFAULT_IMPORT_TIMEOUT_MS;
|
||||
const n = Number.parseInt(raw, 10);
|
||||
if (!Number.isFinite(n) || Number.isNaN(n) || n < 60_000 || n > 86_400_000) {
|
||||
console.error(
|
||||
`[memory-ingest] GSTACK_INGEST_TIMEOUT_MS="${raw}" invalid (need 60000–86400000ms); using ${DEFAULT_IMPORT_TIMEOUT_MS}ms`,
|
||||
);
|
||||
return DEFAULT_IMPORT_TIMEOUT_MS;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
function runGbrainImport(
|
||||
stagingDir: string,
|
||||
timeoutMs: number,
|
||||
): Promise<{ status: number | null; stdout: string; stderr: string }> {
|
||||
): Promise<{ status: number | null; stdout: string; stderr: string; timedOut: boolean }> {
|
||||
installSignalForwarder();
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn(
|
||||
"gbrain",
|
||||
["import", stagingDir, "--no-embed", "--json"],
|
||||
{ stdio: ["ignore", "pipe", "pipe"] },
|
||||
);
|
||||
// Seed DATABASE_URL from gbrain's own config so this stage works
|
||||
// inside Next.js / Prisma / Rails projects with their own
|
||||
// .env.local (codex review #7 — defense in depth on top of the
|
||||
// parent gstack-gbrain-sync seeding the bun grandchild's env).
|
||||
const child = spawnGbrainAsync(["import", stagingDir, "--no-embed", "--json"]);
|
||||
_activeImportChild = child;
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
@@ -1347,6 +1442,7 @@ function runGbrainImport(
|
||||
status: timedOut ? null : status,
|
||||
stdout,
|
||||
stderr,
|
||||
timedOut,
|
||||
});
|
||||
});
|
||||
child.on("error", (err) => {
|
||||
@@ -1356,6 +1452,7 @@ function runGbrainImport(
|
||||
status: null,
|
||||
stdout,
|
||||
stderr: stderr + `\n[spawn-error] ${(err as Error).message}`,
|
||||
timedOut,
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1375,7 +1472,7 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
|
||||
if (args.noWrite) {
|
||||
// --no-write: skip the gbrain import call but still record state for
|
||||
// prepared pages (treat them as ingested for dedup purposes). Matches
|
||||
// the prior contract from --help: "Skip gbrain put_page calls (still
|
||||
// the prior contract from --help: "Skip gbrain put calls (still
|
||||
// updates state file)".
|
||||
const nowIso = new Date().toISOString();
|
||||
for (const p of prep.prepared) {
|
||||
@@ -1445,19 +1542,69 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
|
||||
// entirely. gstack-brain-sync push will pick the dir up via its allowlist
|
||||
// and the brain admin's pull job will index transcripts into the remote
|
||||
// brain. Local PGLite (if any) stays code-only.
|
||||
//
|
||||
// Resume branch for #1611: when the orchestrator sets
|
||||
// GSTACK_INGEST_RESUME_DIR (because gbrain's import-checkpoint.json points
|
||||
// at an existing dir from a prior SIGTERM'd run), reuse that staging dir
|
||||
// and skip the prepare/writeStaged phase entirely. gbrain's checkpoint
|
||||
// tells it where to resume.
|
||||
const remoteHttpMode = isRemoteHttpMcpMode();
|
||||
const stagingDir = remoteHttpMode
|
||||
? makePersistentTranscriptDir()
|
||||
: makeStagingDir();
|
||||
const resumeDir = process.env.GSTACK_INGEST_RESUME_DIR;
|
||||
// #1802 second entry point: this binary is runnable directly, so it must not
|
||||
// trust GSTACK_INGEST_RESUME_DIR just because it exists — a stale/poisoned env
|
||||
// could make us `gbrain import` (and later clean up) an arbitrary directory.
|
||||
// Prove ownership here too, independently of the orchestrator's decideResume.
|
||||
const resuming = !remoteHttpMode
|
||||
&& typeof resumeDir === "string"
|
||||
&& resumeDir.length > 0
|
||||
&& existsSync(resumeDir)
|
||||
&& checkOwnedStagingDir(resumeDir, GSTACK_HOME).ok;
|
||||
if (!remoteHttpMode && resumeDir && resumeDir.length > 0 && !resuming) {
|
||||
console.error(
|
||||
`[memory-ingest] ignoring GSTACK_INGEST_RESUME_DIR="${resumeDir}" — not a proven staging dir (#1802); staging fresh.`,
|
||||
);
|
||||
}
|
||||
const stagingDir = resuming
|
||||
? resumeDir!
|
||||
: remoteHttpMode
|
||||
? makePersistentTranscriptDir()
|
||||
: makeStagingDir();
|
||||
// Register staging dir with the signal forwarder so SIGTERM/SIGINT can
|
||||
// synchronously clean it up before process.exit (the async finally block
|
||||
// below does NOT run after a signal-handler exit). In remote-http mode we
|
||||
// skip registration — the dir is meant to persist.
|
||||
// either preserve (when gbrain checkpointed it) or synchronously clean up.
|
||||
// The async finally block below does NOT run after a signal-handler exit.
|
||||
// In remote-http mode we skip registration — the dir is meant to persist.
|
||||
if (!remoteHttpMode) {
|
||||
_activeStagingDir = stagingDir;
|
||||
}
|
||||
// #1802 C3: set when the import-timeout branch leaves a resumable checkpoint
|
||||
// pointing at this staging dir, so the finally preserves it for the next run
|
||||
// instead of deleting it (the SIGTERM forwarder's preserve branch only runs
|
||||
// when the PARENT is signalled, which an internal timeout never does).
|
||||
let preserveStaging = false;
|
||||
try {
|
||||
const staging = writeStaged(prep.prepared, stagingDir);
|
||||
let staging: StagingResult;
|
||||
if (resuming) {
|
||||
// Pages are already on disk from the previous run. Skip writeStaged.
|
||||
// The "written" count for the verdict reflects what's on disk now;
|
||||
// gbrain's import will skip already-completed entries via its own
|
||||
// checkpoint (processedIndex+1).
|
||||
if (!args.quiet) {
|
||||
console.error(
|
||||
`[memory-ingest] resuming previous staging dir ${stagingDir} (skipping prepare phase)`,
|
||||
);
|
||||
}
|
||||
// #1802 C4: reconstruct stagedPathToSource from the prepared pages so
|
||||
// readNewFailures() can still map gbrain's per-file failures back to
|
||||
// sources on resume. An empty map made every failed file fall through to
|
||||
// state-recording — i.e. silently marked ingested despite failing.
|
||||
const stagedPathToSource = new Map<string, string>();
|
||||
for (const p of prep.prepared) {
|
||||
stagedPathToSource.set(stagedRelPath(p.slug), p.source_path);
|
||||
}
|
||||
staging = { staging_dir: stagingDir, written: prep.prepared.length, errors: [], stagedPathToSource };
|
||||
} else {
|
||||
staging = writeStaged(prep.prepared, stagingDir);
|
||||
}
|
||||
failed += staging.errors.length;
|
||||
if (!args.quiet && staging.errors.length > 0) {
|
||||
for (const e of staging.errors.slice(0, 5)) {
|
||||
@@ -1543,13 +1690,42 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
|
||||
// spawn, parent termination orphans the gbrain process (observed
|
||||
// during 2026-05-10 cold-run testing — gbrain kept running 15 min
|
||||
// after the orchestrator timed out).
|
||||
const importResult = await runGbrainImport(stagingDir, 30 * 60 * 1000);
|
||||
const importResult = await runGbrainImport(stagingDir, resolveImportTimeoutMs());
|
||||
|
||||
const stdout = importResult.stdout || "";
|
||||
const stderr = importResult.stderr || "";
|
||||
const importJson = parseImportJson(stdout);
|
||||
|
||||
if (importResult.status !== 0) {
|
||||
// #1611/#1802 C3: on timeout, gbrain may have written
|
||||
// import-checkpoint.json so the next /sync-gbrain can resume. But an
|
||||
// INTERNAL timeout (runGbrainImport kills the child and returns here)
|
||||
// never signals the parent, so the SIGTERM forwarder's preserve branch
|
||||
// doesn't run — and the finally would otherwise delete the staging dir
|
||||
// despite a "checkpoint preserved" message. Mirror the forwarder: preserve
|
||||
// only when gbrain actually checkpointed against this dir; otherwise let
|
||||
// the finally clean up (nothing to resume) and say so honestly.
|
||||
if (importResult.timedOut) {
|
||||
const mins = Math.round(resolveImportTimeoutMs() / 60000);
|
||||
const checkpointed = stagingDirIsCheckpointed(stagingDir);
|
||||
const msg = checkpointed
|
||||
? `gbrain import timed out after ${mins}min; checkpoint preserved — re-run ` +
|
||||
`/sync-gbrain to resume (raise GSTACK_INGEST_TIMEOUT_MS for big brains)`
|
||||
: `gbrain import timed out after ${mins}min before writing a checkpoint; ` +
|
||||
`re-run /sync-gbrain to restage (raise GSTACK_INGEST_TIMEOUT_MS for big brains)`;
|
||||
if (checkpointed) preserveStaging = true;
|
||||
console.error(`[memory-ingest] ${msg}`);
|
||||
return {
|
||||
written: 0,
|
||||
skipped_secret: prep.skippedSecret,
|
||||
skipped_dedup: prep.skippedDedup,
|
||||
skipped_unattributed: prep.skippedUnattributed,
|
||||
failed,
|
||||
duration_ms: Date.now() - t0,
|
||||
partial_pages: prep.partialPages,
|
||||
system_error: msg,
|
||||
};
|
||||
}
|
||||
const tail = (stderr.trim().split("\n").pop() || "").slice(0, 300);
|
||||
const msg = `gbrain import exited ${importResult.status}: ${tail}`;
|
||||
console.error(`[memory-ingest] ERR: ${msg}`);
|
||||
@@ -1645,7 +1821,15 @@ async function ingestPass(args: CliArgs): Promise<BulkResult> {
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
cleanupStagingDir(stagingDir);
|
||||
// #1802 D1: in remote-http mode `stagingDir` is the PERSISTENT transcript
|
||||
// dir (makePersistentTranscriptDir, under ~/.gstack/transcripts/) that
|
||||
// gstack-brain-sync push must pick up — it is NOT a `.staging-ingest-*` dir
|
||||
// and must never be deleted here. The remote-http branch above already
|
||||
// documents this intent ("Skip the ... cleanupStagingDir paths"), but a
|
||||
// `finally` runs on its `return`, so the gate has to live here. Gating on
|
||||
// mode (rather than widening the ownership guard) keeps checkOwnedStagingDir
|
||||
// strict: it only ever sees `.staging-ingest-*` dirs.
|
||||
if (!remoteHttpMode && !preserveStaging) cleanupStagingDir(stagingDir);
|
||||
_activeStagingDir = null;
|
||||
}
|
||||
|
||||
@@ -1745,7 +1929,12 @@ async function main(): Promise<void> {
|
||||
if (result.system_error) process.exit(1);
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(`gstack-memory-ingest fatal: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
});
|
||||
// Guard so the module is import-safe for unit tests (e.g. resolveImportTimeoutMs).
|
||||
// The orchestrator runs it as `bun gstack-memory-ingest.ts ...`, where
|
||||
// import.meta.main is true, so the CLI path is unaffected.
|
||||
if (import.meta.main) {
|
||||
main().catch((err) => {
|
||||
console.error(`gstack-memory-ingest fatal: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
* gstack-model-benchmark --prompt "hi" --models claude,gpt,gemini --dry-run
|
||||
*/
|
||||
|
||||
import '../lib/conductor-env-shim';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { runBenchmark, formatTable, formatJson, formatMarkdown, type BenchmarkInput } from '../test/helpers/benchmark-runner';
|
||||
@@ -39,16 +40,40 @@ const ADAPTER_FACTORIES = {
|
||||
|
||||
type OutputFormat = 'table' | 'json' | 'markdown';
|
||||
|
||||
const CLI_ARGS = process.argv.slice(2);
|
||||
const VALUE_FLAGS = new Set(['--models', '--prompt', '--workdir', '--timeout-ms', '--output']);
|
||||
|
||||
function arg(name: string, def?: string): string | undefined {
|
||||
const idx = process.argv.findIndex(a => a === name || a.startsWith(name + '='));
|
||||
const idx = CLI_ARGS.findIndex(a => a === name || a.startsWith(name + '='));
|
||||
if (idx < 0) return def;
|
||||
const eqIdx = process.argv[idx].indexOf('=');
|
||||
if (eqIdx >= 0) return process.argv[idx].slice(eqIdx + 1);
|
||||
return process.argv[idx + 1];
|
||||
const eqIdx = CLI_ARGS[idx].indexOf('=');
|
||||
if (eqIdx >= 0) return CLI_ARGS[idx].slice(eqIdx + 1);
|
||||
return CLI_ARGS[idx + 1];
|
||||
}
|
||||
|
||||
function flag(name: string): boolean {
|
||||
return process.argv.includes(name);
|
||||
return CLI_ARGS.includes(name);
|
||||
}
|
||||
|
||||
function positionalArgs(args: string[]): string[] {
|
||||
const positional: string[] = [];
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const current = args[i];
|
||||
if (current === '--') {
|
||||
positional.push(...args.slice(i + 1));
|
||||
break;
|
||||
}
|
||||
if (current.startsWith('--')) {
|
||||
const eqIdx = current.indexOf('=');
|
||||
const flagName = eqIdx >= 0 ? current.slice(0, eqIdx) : current;
|
||||
if (eqIdx < 0 && VALUE_FLAGS.has(flagName) && i + 1 < args.length) {
|
||||
i++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
positional.push(current);
|
||||
}
|
||||
return positional;
|
||||
}
|
||||
|
||||
function parseProviders(s: string | undefined): Array<'claude' | 'gpt' | 'gemini'> {
|
||||
@@ -78,7 +103,7 @@ function resolvePrompt(positional: string | undefined): string {
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const positional = process.argv.slice(2).find(a => !a.startsWith('--'));
|
||||
const positional = positionalArgs(CLI_ARGS)[0];
|
||||
const prompt = resolvePrompt(positional);
|
||||
const providers = parseProviders(arg('--models'));
|
||||
const workdir = arg('--workdir', process.cwd())!;
|
||||
|
||||
+61
-20
@@ -10,7 +10,14 @@
|
||||
//
|
||||
// Usage:
|
||||
// gstack-next-version --base <branch> --bump <major|minor|patch|micro> \
|
||||
// --current-version <X.Y.Z.W> [--workspace-root <path>|null] [--json]
|
||||
// --current-version <X.Y.Z.W> [--workspace-root <path>|null] \
|
||||
// [--version-path <path>] [--json]
|
||||
//
|
||||
// VERSION path resolution (monorepo support):
|
||||
// 1. --version-path <path> CLI flag (highest priority)
|
||||
// 2. .gstack/version-path file at the repo root (single-line relative path,
|
||||
// committed so all collaborators benefit)
|
||||
// 3. "VERSION" at the repo root (default, backward-compatible)
|
||||
//
|
||||
// Exit codes:
|
||||
// 0 — emitted JSON successfully (may include "offline":true or "host":"unknown")
|
||||
@@ -45,6 +52,7 @@ type Output = {
|
||||
version: string;
|
||||
current_version: string;
|
||||
base_version: string;
|
||||
version_path: string;
|
||||
bump: Bump;
|
||||
host: "github" | "gitlab" | "unknown";
|
||||
offline: boolean;
|
||||
@@ -114,6 +122,28 @@ function runCommand(cmd: string, args: string[], timeoutMs = 15000): { ok: boole
|
||||
};
|
||||
}
|
||||
|
||||
// VERSION-path resolution for monorepos. Priority: CLI flag > .gstack/version-path
|
||||
// at repo root > "VERSION". Pure function; takes the repo root as an argument so
|
||||
// tests can drive it with a fixture dir without mocking git.
|
||||
function resolveVersionPath(override: string | undefined, repoRoot: string): string {
|
||||
if (override) return override.trim();
|
||||
const configFile = join(repoRoot, ".gstack", "version-path");
|
||||
if (existsSync(configFile)) {
|
||||
try {
|
||||
const firstLine = readFileSync(configFile, "utf8").split("\n")[0]?.trim() ?? "";
|
||||
if (firstLine) return firstLine;
|
||||
} catch {
|
||||
// fall through to default
|
||||
}
|
||||
}
|
||||
return "VERSION";
|
||||
}
|
||||
|
||||
function repoToplevel(): string {
|
||||
const r = runCommand("git", ["rev-parse", "--show-toplevel"]);
|
||||
return r.ok ? r.stdout.trim() : process.cwd();
|
||||
}
|
||||
|
||||
function detectHost(): "github" | "gitlab" | "unknown" {
|
||||
const remote = runCommand("git", ["remote", "get-url", "origin"]);
|
||||
if (remote.ok) {
|
||||
@@ -128,19 +158,19 @@ function detectHost(): "github" | "gitlab" | "unknown" {
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
function readBaseVersion(base: string, warnings: string[]): string {
|
||||
function readBaseVersion(base: string, versionPath: string, warnings: string[]): string {
|
||||
// git fetch is best-effort; we tolerate failure and fall back to whatever
|
||||
// origin/<base> currently points at.
|
||||
runCommand("git", ["fetch", "origin", base, "--quiet"], 10000);
|
||||
const r = runCommand("git", ["show", `origin/${base}:VERSION`]);
|
||||
const r = runCommand("git", ["show", `origin/${base}:${versionPath}`]);
|
||||
if (!r.ok) {
|
||||
warnings.push(`could not read VERSION at origin/${base}; assuming 0.0.0.0`);
|
||||
warnings.push(`could not read ${versionPath} at origin/${base}; assuming 0.0.0.0`);
|
||||
return "0.0.0.0";
|
||||
}
|
||||
return r.stdout.trim();
|
||||
}
|
||||
|
||||
async function fetchGithubClaimed(base: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
|
||||
async function fetchGithubClaimed(base: string, versionPath: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
|
||||
const list = runCommand("gh", [
|
||||
"pr",
|
||||
"list",
|
||||
@@ -187,14 +217,18 @@ async function fetchGithubClaimed(base: string, excludePR: number | null, warnin
|
||||
const pr = queue.shift();
|
||||
if (!pr) return;
|
||||
// gh passes branch name via argv, not shell — safe.
|
||||
// encodeURI handles spaces in subproject paths (e.g. "Tinas Second Brain/...")
|
||||
// while leaving "/" untouched so the GitHub Contents API gets the path intact.
|
||||
const content = runCommand("gh", [
|
||||
"api",
|
||||
`repos/{owner}/{repo}/contents/VERSION?ref=${encodeURIComponent(pr.headRefName)}`,
|
||||
`repos/{owner}/{repo}/contents/${encodeURI(versionPath)}?ref=${encodeURIComponent(pr.headRefName)}`,
|
||||
"-q",
|
||||
".content",
|
||||
]);
|
||||
if (!content.ok) {
|
||||
warnings.push(`PR #${pr.number}: could not fetch VERSION (fork or private)`);
|
||||
warnings.push(
|
||||
`PR #${pr.number}: could not fetch ${versionPath} (fork, private, or wrong path — try --version-path or .gstack/version-path)`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
let versionStr: string;
|
||||
@@ -215,7 +249,7 @@ async function fetchGithubClaimed(base: string, excludePR: number | null, warnin
|
||||
return { claimed: results, offline: false };
|
||||
}
|
||||
|
||||
async function fetchGitlabClaimed(base: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
|
||||
async function fetchGitlabClaimed(base: string, versionPath: string, excludePR: number | null, warnings: string[]): Promise<{ claimed: ClaimedPR[]; offline: boolean }> {
|
||||
const list = runCommand("glab", [
|
||||
"mr",
|
||||
"list",
|
||||
@@ -243,12 +277,15 @@ async function fetchGitlabClaimed(base: string, excludePR: number | null, warnin
|
||||
}
|
||||
const results: ClaimedPR[] = [];
|
||||
for (const mr of mrs) {
|
||||
// GitLab files API takes the full path URL-encoded (slashes become %2F).
|
||||
const content = runCommand("glab", [
|
||||
"api",
|
||||
`projects/:id/repository/files/VERSION?ref=${encodeURIComponent(mr.source_branch)}`,
|
||||
`projects/:id/repository/files/${encodeURIComponent(versionPath)}?ref=${encodeURIComponent(mr.source_branch)}`,
|
||||
]);
|
||||
if (!content.ok) {
|
||||
warnings.push(`MR !${mr.iid}: could not fetch VERSION`);
|
||||
warnings.push(
|
||||
`MR !${mr.iid}: could not fetch ${versionPath} (wrong path? — try --version-path or .gstack/version-path)`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
@@ -285,7 +322,7 @@ function currentRepoSlug(): string {
|
||||
return m ? m[1] : "";
|
||||
}
|
||||
|
||||
function scanSiblings(root: string | null, claimed: ClaimedPR[], warnings: string[]): Sibling[] {
|
||||
function scanSiblings(root: string | null, versionPath: string, claimed: ClaimedPR[], warnings: string[]): Sibling[] {
|
||||
if (!root || !existsSync(root)) return [];
|
||||
const mySlug = currentRepoSlug();
|
||||
if (!mySlug) {
|
||||
@@ -308,7 +345,7 @@ function scanSiblings(root: string | null, claimed: ClaimedPR[], warnings: strin
|
||||
continue;
|
||||
}
|
||||
if (!existsSync(join(p, ".git")) && !existsSync(join(p, ".git/HEAD"))) continue;
|
||||
const versionFile = join(p, "VERSION");
|
||||
const versionFile = join(p, versionPath);
|
||||
if (!existsSync(versionFile)) continue;
|
||||
let version: string;
|
||||
try {
|
||||
@@ -346,12 +383,13 @@ function markActiveSiblings(siblings: Sibling[], baseVersion: Version): Sibling[
|
||||
});
|
||||
}
|
||||
|
||||
function parseArgs(argv: string[]): { base: string; bump: Bump; current: string; workspaceRoot?: string; excludePR: number | null; help: boolean } {
|
||||
function parseArgs(argv: string[]): { base: string; bump: Bump; current: string; workspaceRoot?: string; excludePR: number | null; versionPath?: string; help: boolean } {
|
||||
let base = "";
|
||||
let bump: Bump | "" = "";
|
||||
let current = "";
|
||||
let workspaceRoot: string | undefined;
|
||||
let excludePR: number | null = null;
|
||||
let versionPath: string | undefined;
|
||||
let help = false;
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const a = argv[i];
|
||||
@@ -359,6 +397,7 @@ function parseArgs(argv: string[]): { base: string; bump: Bump; current: string;
|
||||
else if (a === "--bump") bump = (argv[++i] ?? "") as Bump;
|
||||
else if (a === "--current-version") current = argv[++i] ?? "";
|
||||
else if (a === "--workspace-root") workspaceRoot = argv[++i];
|
||||
else if (a === "--version-path") versionPath = argv[++i];
|
||||
else if (a === "--exclude-pr") {
|
||||
const n = Number(argv[++i]);
|
||||
excludePR = Number.isFinite(n) && n > 0 ? n : null;
|
||||
@@ -375,7 +414,7 @@ function parseArgs(argv: string[]): { base: string; bump: Bump; current: string;
|
||||
console.error(`Error: --bump must be major|minor|patch|micro (got ${bump})`);
|
||||
process.exit(2);
|
||||
}
|
||||
return { base, bump: bump as Bump, current, workspaceRoot, excludePR, help: false };
|
||||
return { base, bump: bump as Bump, current, workspaceRoot, excludePR, versionPath, help: false };
|
||||
}
|
||||
|
||||
// Auto-detect: if --exclude-pr wasn't passed, check whether the current branch
|
||||
@@ -392,13 +431,14 @@ async function main() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
if (args.help) {
|
||||
console.log(
|
||||
"Usage: gstack-next-version --base <branch> --bump <level> --current-version <X.Y.Z.W> [--workspace-root <path|null>]",
|
||||
"Usage: gstack-next-version --base <branch> --bump <level> --current-version <X.Y.Z.W> [--workspace-root <path|null>] [--version-path <path>]",
|
||||
);
|
||||
process.exit(0);
|
||||
}
|
||||
const warnings: string[] = [];
|
||||
const host = detectHost();
|
||||
const baseVersion = args.current || readBaseVersion(args.base, warnings);
|
||||
const versionPath = resolveVersionPath(args.versionPath, repoToplevel());
|
||||
const baseVersion = args.current || readBaseVersion(args.base, versionPath, warnings);
|
||||
const baseParsed = parseVersion(baseVersion);
|
||||
if (!baseParsed) {
|
||||
console.error(`Error: could not parse base version '${baseVersion}'`);
|
||||
@@ -413,9 +453,9 @@ async function main() {
|
||||
let claimed: ClaimedPR[] = [];
|
||||
let offline = false;
|
||||
if (host === "github") {
|
||||
({ claimed, offline } = await fetchGithubClaimed(args.base, excludePR, warnings));
|
||||
({ claimed, offline } = await fetchGithubClaimed(args.base, versionPath, excludePR, warnings));
|
||||
} else if (host === "gitlab") {
|
||||
({ claimed, offline } = await fetchGitlabClaimed(args.base, excludePR, warnings));
|
||||
({ claimed, offline } = await fetchGitlabClaimed(args.base, versionPath, excludePR, warnings));
|
||||
} else {
|
||||
warnings.push("host unknown; queue-awareness unavailable");
|
||||
}
|
||||
@@ -433,7 +473,7 @@ async function main() {
|
||||
const { version: picked, reason } = pickNextSlot(baseParsed, claimedVersions, args.bump);
|
||||
|
||||
const workspaceRoot = resolveWorkspaceRoot(args.workspaceRoot);
|
||||
const siblings = markActiveSiblings(scanSiblings(workspaceRoot, claimed, warnings), baseParsed);
|
||||
const siblings = markActiveSiblings(scanSiblings(workspaceRoot, versionPath, claimed, warnings), baseParsed);
|
||||
const activeSiblings = siblings.filter((s) => s.is_active);
|
||||
|
||||
// If an active sibling outranks our pick, bump past it (same bump level).
|
||||
@@ -453,6 +493,7 @@ async function main() {
|
||||
version: fmtVersion(finalVersion),
|
||||
current_version: args.current || baseVersion,
|
||||
base_version: baseVersion,
|
||||
version_path: versionPath,
|
||||
bump: args.bump,
|
||||
host,
|
||||
offline,
|
||||
@@ -466,7 +507,7 @@ async function main() {
|
||||
}
|
||||
|
||||
// Pure-function exports for testing
|
||||
export { parseVersion, fmtVersion, bumpVersion, cmpVersion, pickNextSlot, markActiveSiblings };
|
||||
export { parseVersion, fmtVersion, bumpVersion, cmpVersion, pickNextSlot, markActiveSiblings, resolveVersionPath };
|
||||
|
||||
// Only run main() when invoked as a script, not when imported by tests.
|
||||
if (import.meta.main) {
|
||||
|
||||
+6
-2
@@ -9,7 +9,7 @@
|
||||
# CI / container env where HOME may be unset.
|
||||
#
|
||||
# Chains:
|
||||
# GSTACK_STATE_ROOT: GSTACK_HOME -> CLAUDE_PLUGIN_DATA -> $HOME/.gstack -> .gstack
|
||||
# GSTACK_STATE_ROOT: GSTACK_HOME -> CLAUDE_PLUGIN_DATA (only when CLAUDE_PLUGIN_ROOT=*gstack*) -> $HOME/.gstack -> .gstack
|
||||
# PLAN_ROOT: GSTACK_PLAN_DIR -> CLAUDE_PLANS_DIR -> $HOME/.claude/plans -> .claude/plans
|
||||
# TMP_ROOT: TMPDIR -> TMP -> .gstack/tmp (and mkdir -p, best-effort)
|
||||
#
|
||||
@@ -21,7 +21,11 @@ set -u
|
||||
# State root: where gstack writes projects/, sessions/, analytics/.
|
||||
if [ -n "${GSTACK_HOME:-}" ]; then
|
||||
_state_root="$GSTACK_HOME"
|
||||
elif [ -n "${CLAUDE_PLUGIN_DATA:-}" ]; then
|
||||
elif [ -n "${CLAUDE_PLUGIN_DATA:-}" ] && echo "${CLAUDE_PLUGIN_ROOT:-}" | grep -qi "gstack"; then
|
||||
# Guard: only trust CLAUDE_PLUGIN_DATA when CLAUDE_PLUGIN_ROOT confirms we are
|
||||
# running as the gstack plugin. Without this, a CLAUDE_PLUGIN_DATA from another
|
||||
# plugin (e.g. codex) that leaked into the session env via CLAUDE_ENV_FILE would
|
||||
# be picked up, writing all gstack state into the wrong directory.
|
||||
_state_root="$CLAUDE_PLUGIN_DATA"
|
||||
elif [ -n "${HOME:-}" ]; then
|
||||
_state_root="$HOME/.gstack"
|
||||
|
||||
+82
-3
@@ -28,7 +28,8 @@
|
||||
set -euo pipefail
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null)"
|
||||
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
|
||||
# GSTACK_STATE_ROOT takes precedence over GSTACK_HOME (test isolation per D16).
|
||||
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
|
||||
mkdir -p "$GSTACK_HOME/projects/$SLUG"
|
||||
|
||||
INPUT="$1"
|
||||
@@ -49,12 +50,48 @@ if (!j.skill || !/^[a-z0-9-]+\$/.test(j.skill)) {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Required: question_id (kebab-case, <=64 chars)
|
||||
// Required: question_id (kebab-case, <=64 chars).
|
||||
// Cathedral T5: hook-sourced events use 'hook-<10-char-hash>' which is
|
||||
// kebab-case-compatible and passes the same regex.
|
||||
if (!j.question_id || !/^[a-z0-9-]+\$/.test(j.question_id) || j.question_id.length > 64) {
|
||||
process.stderr.write('gstack-question-log: invalid question_id, must be kebab-case <=64 chars\n');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Optional: source — tags which writer produced this event.
|
||||
// 'agent' (default) — preamble-driven write from inside the running agent
|
||||
// 'hook' — PostToolUse hook captured it deterministically (T5)
|
||||
// 'auq-other' — user picked 'Other' and typed free text (Layer 8)
|
||||
// 'auto-decided' — PreToolUse enforcement hook substituted the answer (T6)
|
||||
// 'codex-import-marker' / 'codex-import-pattern' — T9 backfill from Codex
|
||||
const ALLOWED_SOURCES = ['agent', 'hook', 'auq-other', 'auto-decided', 'codex-import-marker', 'codex-import-pattern'];
|
||||
if (j.source !== undefined) {
|
||||
if (!ALLOWED_SOURCES.includes(j.source)) {
|
||||
process.stderr.write('gstack-question-log: invalid source, must be one of: ' + ALLOWED_SOURCES.join(', ') + '\n');
|
||||
process.exit(1);
|
||||
}
|
||||
} else {
|
||||
j.source = 'agent';
|
||||
}
|
||||
|
||||
// Optional: tool_use_id — Claude Code hook stdin field; used for dedup.
|
||||
if (j.tool_use_id !== undefined) {
|
||||
if (typeof j.tool_use_id !== 'string' || j.tool_use_id.length > 128) {
|
||||
process.stderr.write('gstack-question-log: tool_use_id must be string <=128 chars\n');
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Optional: free_text — sanitize (no newlines, <=300 chars).
|
||||
if (j.free_text !== undefined) {
|
||||
if (typeof j.free_text !== 'string') {
|
||||
process.stderr.write('gstack-question-log: free_text must be string\n');
|
||||
process.exit(1);
|
||||
}
|
||||
if (j.free_text.length > 300) j.free_text = j.free_text.slice(0, 300);
|
||||
j.free_text = j.free_text.replace(/\n+/g, ' ');
|
||||
}
|
||||
|
||||
// Required: question_summary (non-empty, <=200 chars, no newlines)
|
||||
if (typeof j.question_summary !== 'string' || !j.question_summary.length) {
|
||||
process.stderr.write('gstack-question-log: question_summary required\n');
|
||||
@@ -164,7 +201,49 @@ if [ $VALIDATE_RC -ne 0 ] || [ -z "$VALIDATED" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$VALIDATED" >> "$GSTACK_HOME/projects/$SLUG/question-log.jsonl"
|
||||
LOG_FILE="$GSTACK_HOME/projects/$SLUG/question-log.jsonl"
|
||||
|
||||
# Cathedral T5: composite-source dedup. If this exact (source, tool_use_id)
|
||||
# was already logged within the last 100 lines, skip — protects against
|
||||
# hook + agent both writing the same fire (D3 plan-tune cathedral decision).
|
||||
# Lookup is bounded so the bin stays cheap on hot paths.
|
||||
DEDUP_SKIP=""
|
||||
if [ -f "$LOG_FILE" ]; then
|
||||
DEDUP_SKIP=$(VALIDATED_JSON="$VALIDATED" LOG_FILE_PATH="$LOG_FILE" bun -e '
|
||||
const fs = require("fs");
|
||||
const j = JSON.parse(process.env.VALIDATED_JSON);
|
||||
if (!j.tool_use_id) { console.log(""); process.exit(0); }
|
||||
const want = j.source + ":" + j.tool_use_id;
|
||||
const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").slice(-100);
|
||||
for (const ln of lines) {
|
||||
try {
|
||||
const p = JSON.parse(ln);
|
||||
if (p.source && p.tool_use_id && (p.source + ":" + p.tool_use_id) === want) {
|
||||
console.log("dup");
|
||||
process.exit(0);
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
console.log("");
|
||||
' 2>/dev/null)
|
||||
fi
|
||||
|
||||
if [ "$DEDUP_SKIP" = "dup" ]; then
|
||||
echo "DEDUP: skipped (source=$(echo "$VALIDATED" | bun -e 'const j=JSON.parse(await Bun.stdin.text()); console.log(j.source);'), tool_use_id duplicate)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "$VALIDATED" >> "$LOG_FILE"
|
||||
|
||||
# Cathedral T5: fire-and-forget --derive so inferred dimensions stay current
|
||||
# without per-event latency (D17). Sub-second op; output suppressed; never
|
||||
# blocks the hook caller. Skipped via GSTACK_QUESTION_LOG_NO_DERIVE=1 for
|
||||
# tests that don't want the side effect.
|
||||
if [ -z "${GSTACK_QUESTION_LOG_NO_DERIVE:-}" ]; then
|
||||
(
|
||||
nohup "$SCRIPT_DIR/gstack-developer-profile" --derive >/dev/null 2>&1 &
|
||||
) >/dev/null 2>&1
|
||||
fi
|
||||
|
||||
# NOTE: question-log.jsonl is deliberately NOT enqueued for gbrain-sync.
|
||||
# Per Codex v2 review, audit/derivation data stays local alongside the
|
||||
|
||||
@@ -23,7 +23,8 @@ set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
|
||||
# GSTACK_STATE_ROOT takes precedence over GSTACK_HOME (test isolation per D16).
|
||||
GSTACK_HOME="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-$HOME/.gstack}}"
|
||||
eval "$("$SCRIPT_DIR/gstack-slug" 2>/dev/null || true)"
|
||||
SLUG="${SLUG:-unknown}"
|
||||
PREF_FILE="$GSTACK_HOME/projects/$SLUG/question-preferences.json"
|
||||
@@ -68,6 +69,21 @@ do_check() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Split-chain carve-out: per-option calls in N-option splits emit
|
||||
// question_ids of the form <skill>-split-<option-slug>. These are
|
||||
// NEVER AUTO_DECIDE-eligible regardless of stored preferences — the
|
||||
// whole point of splitting is restoring user sovereignty over the
|
||||
// option set. See scripts/resolvers/preamble/generate-ask-user-format.ts
|
||||
// \"Handling 5+ options — split, never drop\" for the surrounding
|
||||
// mechanism that generates these ids.
|
||||
if (/-split-/.test(qid)) {
|
||||
console.log('ASK_NORMALLY');
|
||||
if (pref === 'never-ask' || pref === 'ask-only-for-one-way') {
|
||||
console.log('NOTE: split-chain per-option calls always ASK_NORMALLY; your ' + pref + ' preference does not apply to options inside a sequential split.');
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
switch (pref) {
|
||||
case 'never-ask':
|
||||
console.log('AUTO_DECIDE');
|
||||
|
||||
Executable
+241
@@ -0,0 +1,241 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* gstack-redact — scan text for secrets/PII/legal content via the shared engine.
|
||||
*
|
||||
* Skill-facing CLI over lib/redact-engine.ts. Reads from stdin (default) or
|
||||
* --from-file, scans, and prints findings as JSON (--json) or a human table.
|
||||
*
|
||||
* Exit codes (consumed by skill bash to gate dispatch/file/edit/commit):
|
||||
* 0 clean (no HIGH, no MEDIUM)
|
||||
* 2 MEDIUM present (no HIGH) — skill runs the per-finding AskUserQuestion
|
||||
* 3 HIGH present — skill blocks
|
||||
*
|
||||
* WARN findings (tool-fence-degraded credentials) never change the exit code.
|
||||
*
|
||||
* Flags:
|
||||
* --json Emit JSON {findings, counts, repoVisibility, oversize}
|
||||
* --repo-visibility V public | private | unknown (default unknown=public-strict wording)
|
||||
* --from-file PATH Read input from PATH instead of stdin
|
||||
* --allowlist PATH Newline-delimited exact spans to suppress
|
||||
* --self-email EMAIL Suppress this email (the invoking user's own)
|
||||
* --repo-public-emails PATH Newline-delimited repo-public emails to suppress
|
||||
* --auto-redact IDS Comma-separated finding ids to auto-redact;
|
||||
* prints the redacted body to stdout + diff to stderr.
|
||||
* --max-bytes N Override the fail-closed size cap (default 1 MiB).
|
||||
*
|
||||
* Security note: this is a GUARDRAIL, not airtight enforcement. A determined
|
||||
* user can always bypass it (direct gh/git). It catches accidents.
|
||||
*/
|
||||
import * as fs from "fs";
|
||||
import * as path from "path";
|
||||
import { spawnSync } from "child_process";
|
||||
import {
|
||||
scan,
|
||||
applyRedactions,
|
||||
exitCodeFor,
|
||||
type RepoVisibility,
|
||||
type ScanOptions,
|
||||
type Finding,
|
||||
} from "../lib/redact-engine";
|
||||
|
||||
const MAX_STDIN_BYTES = 16 * 1024 * 1024; // hard ceiling before the engine cap
|
||||
|
||||
// ── pre-push hook install/uninstall (chains any existing hook) ────────────────
|
||||
|
||||
const MANAGED_MARKER = "# gstack-redact pre-push (managed)";
|
||||
|
||||
function hooksPath(): string {
|
||||
const r = spawnSync("git", ["rev-parse", "--git-path", "hooks"], { encoding: "utf8" });
|
||||
if (r.status !== 0) {
|
||||
process.stderr.write("gstack-redact: not in a git repo\n");
|
||||
process.exit(1);
|
||||
}
|
||||
return r.stdout.trim();
|
||||
}
|
||||
|
||||
function installPrepushHook(): void {
|
||||
const dir = hooksPath();
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
const hookPath = path.join(dir, "pre-push");
|
||||
const prepushBin = path.join(import.meta.dir, "gstack-redact-prepush");
|
||||
|
||||
// If a non-managed hook exists, preserve it as pre-push.local and chain it.
|
||||
if (fs.existsSync(hookPath)) {
|
||||
const existing = fs.readFileSync(hookPath, "utf8");
|
||||
if (existing.includes(MANAGED_MARKER)) {
|
||||
process.stdout.write("gstack-redact: pre-push hook already installed.\n");
|
||||
return;
|
||||
}
|
||||
const localPath = path.join(dir, "pre-push.local");
|
||||
fs.renameSync(hookPath, localPath);
|
||||
fs.chmodSync(localPath, 0o755);
|
||||
process.stdout.write("gstack-redact: preserved existing hook as pre-push.local (chained).\n");
|
||||
}
|
||||
|
||||
// stdin is single-consume: capture it once, feed both the chained hook and ours.
|
||||
const wrapper = `#!/usr/bin/env bash
|
||||
${MANAGED_MARKER}
|
||||
set -euo pipefail
|
||||
_input="$(cat)"
|
||||
_local="$(git rev-parse --git-path hooks/pre-push.local)"
|
||||
if [ -x "$_local" ]; then
|
||||
printf '%s' "$_input" | "$_local" "$@" || exit $?
|
||||
fi
|
||||
printf '%s' "$_input" | bun "${prepushBin}" "$@"
|
||||
`;
|
||||
fs.writeFileSync(hookPath, wrapper, { mode: 0o755 });
|
||||
fs.chmodSync(hookPath, 0o755);
|
||||
process.stdout.write(`gstack-redact: installed pre-push hook at ${hookPath}\n`);
|
||||
}
|
||||
|
||||
function uninstallPrepushHook(): void {
|
||||
const dir = hooksPath();
|
||||
const hookPath = path.join(dir, "pre-push");
|
||||
const localPath = path.join(dir, "pre-push.local");
|
||||
if (!fs.existsSync(hookPath) || !fs.readFileSync(hookPath, "utf8").includes(MANAGED_MARKER)) {
|
||||
process.stdout.write("gstack-redact: no managed pre-push hook to remove.\n");
|
||||
return;
|
||||
}
|
||||
if (fs.existsSync(localPath)) {
|
||||
fs.renameSync(localPath, hookPath); // restore the chained original
|
||||
process.stdout.write("gstack-redact: removed managed hook, restored pre-push.local.\n");
|
||||
} else {
|
||||
fs.unlinkSync(hookPath);
|
||||
process.stdout.write("gstack-redact: removed managed pre-push hook.\n");
|
||||
}
|
||||
}
|
||||
|
||||
function arg(name: string): string | undefined {
|
||||
const i = process.argv.indexOf(name);
|
||||
return i >= 0 ? process.argv[i + 1] : undefined;
|
||||
}
|
||||
function flag(name: string): boolean {
|
||||
return process.argv.includes(name);
|
||||
}
|
||||
|
||||
function readInput(): string {
|
||||
const file = arg("--from-file");
|
||||
if (file) {
|
||||
const st = fs.statSync(file);
|
||||
if (st.size > MAX_STDIN_BYTES) {
|
||||
// Don't even read it — fail closed at the CLI boundary.
|
||||
process.stderr.write(`gstack-redact: input file too large (${st.size} bytes)\n`);
|
||||
process.exit(3);
|
||||
}
|
||||
return fs.readFileSync(file, "utf8");
|
||||
}
|
||||
// stdin
|
||||
const chunks: Buffer[] = [];
|
||||
let total = 0;
|
||||
const fd = 0;
|
||||
const buf = Buffer.alloc(65536);
|
||||
while (true) {
|
||||
let n = 0;
|
||||
try {
|
||||
n = fs.readSync(fd, buf, 0, buf.length, null);
|
||||
} catch (e: any) {
|
||||
if (e.code === "EAGAIN") continue;
|
||||
if (e.code === "EOF") break;
|
||||
throw e;
|
||||
}
|
||||
if (n === 0) break;
|
||||
total += n;
|
||||
if (total > MAX_STDIN_BYTES) {
|
||||
process.stderr.write("gstack-redact: stdin too large\n");
|
||||
process.exit(3);
|
||||
}
|
||||
chunks.push(Buffer.from(buf.subarray(0, n)));
|
||||
}
|
||||
return Buffer.concat(chunks).toString("utf8");
|
||||
}
|
||||
|
||||
function readLines(path: string | undefined): string[] | undefined {
|
||||
if (!path || !fs.existsSync(path)) return undefined;
|
||||
return fs
|
||||
.readFileSync(path, "utf8")
|
||||
.split("\n")
|
||||
.map((l) => l.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function buildOpts(): ScanOptions {
|
||||
const vis = (arg("--repo-visibility") as RepoVisibility) || "unknown";
|
||||
const maxBytes = arg("--max-bytes");
|
||||
// #1824: validate the RAW string, not the parse result. parseInt("123abc")
|
||||
// is 123 and parseInt("foo") is NaN — both silently corrupt the fail-closed
|
||||
// oversize guard. Require a clean positive integer or reject before scanning.
|
||||
let maxBytesOpt: number | undefined;
|
||||
if (maxBytes !== undefined) {
|
||||
if (!/^\d+$/.test(maxBytes) || Number(maxBytes) <= 0) {
|
||||
process.stderr.write(
|
||||
`gstack-redact: --max-bytes must be a positive integer (got "${maxBytes}")\n`,
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
maxBytesOpt = Number(maxBytes);
|
||||
}
|
||||
return {
|
||||
repoVisibility: ["public", "private", "unknown"].includes(vis) ? vis : "unknown",
|
||||
allowlist: readLines(arg("--allowlist")),
|
||||
selfEmail: arg("--self-email"),
|
||||
repoPublicEmails: readLines(arg("--repo-public-emails")),
|
||||
...(maxBytesOpt !== undefined ? { maxBytes: maxBytesOpt } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function humanTable(findings: Finding[]): string {
|
||||
if (!findings.length) return " (no findings)";
|
||||
const rows = findings.map(
|
||||
(f) =>
|
||||
` ${f.severity.padEnd(6)} ${f.id.padEnd(24)} ${String(f.line).padStart(4)}:${String(
|
||||
f.col,
|
||||
).padEnd(3)} ${f.preview}`,
|
||||
);
|
||||
return rows.join("\n");
|
||||
}
|
||||
|
||||
function main() {
|
||||
// Subcommands (positional, not flags).
|
||||
const sub = process.argv[2];
|
||||
if (sub === "install-prepush-hook") return installPrepushHook();
|
||||
if (sub === "uninstall-prepush-hook") return uninstallPrepushHook();
|
||||
|
||||
const opts = buildOpts();
|
||||
const input = readInput();
|
||||
|
||||
// Auto-redact mode: print redacted body to stdout, diff to stderr, exit 0.
|
||||
const autoIds = arg("--auto-redact");
|
||||
if (autoIds) {
|
||||
const { body, diff, skipped } = applyRedactions(input, autoIds.split(","), opts);
|
||||
process.stdout.write(body);
|
||||
if (diff) process.stderr.write(diff + "\n");
|
||||
if (skipped.length) {
|
||||
process.stderr.write(
|
||||
`\ngstack-redact: ${skipped.length} finding(s) could not be auto-redacted (structural) — edit manually:\n` +
|
||||
skipped.map((f) => ` ${f.id} @ ${f.line}:${f.col}`).join("\n") +
|
||||
"\n",
|
||||
);
|
||||
}
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const result = scan(input, opts);
|
||||
const code = exitCodeFor(result);
|
||||
|
||||
if (flag("--json")) {
|
||||
process.stdout.write(JSON.stringify(result, null, 2) + "\n");
|
||||
} else {
|
||||
const vis = result.repoVisibility.toUpperCase();
|
||||
process.stdout.write(`gstack-redact scan — repo ${vis}\n`);
|
||||
if (result.oversize) {
|
||||
process.stdout.write(" BLOCKED — input too large to scan safely (fail-closed)\n");
|
||||
} else {
|
||||
process.stdout.write(humanTable(result.findings) + "\n");
|
||||
const { HIGH, MEDIUM, LOW, WARN } = result.counts;
|
||||
process.stdout.write(` HIGH=${HIGH} MEDIUM=${MEDIUM} LOW=${LOW} WARN=${WARN}\n`);
|
||||
}
|
||||
}
|
||||
process.exit(code);
|
||||
}
|
||||
|
||||
main();
|
||||
Executable
+146
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* gstack-redact-prepush — git pre-push hook that scans the diff being pushed for
|
||||
* HIGH-severity credentials and blocks the push on a hit.
|
||||
*
|
||||
* THIS IS A GUARDRAIL, NOT ENFORCEMENT. `git push --no-verify` bypasses it, as
|
||||
* does `GSTACK_REDACT_PREPUSH=skip`. It catches accidental credential pushes,
|
||||
* the most common real-world leak. It does NOT scan history, binary/LFS/submodule
|
||||
* files, or non-added lines. History scanning is /cso's job.
|
||||
*
|
||||
* Git pre-push interface: refs are read from STDIN, one per line:
|
||||
* <local ref> <local sha> <remote ref> <remote sha>
|
||||
* We scan the ADDED lines of <remote sha>..<local sha> per ref (what's being
|
||||
* pushed). Special cases:
|
||||
* - remote sha all-zeroes → new branch: diff against merge-base with the
|
||||
* remote's default branch (fallback: scan all commits unique to local ref).
|
||||
* - local sha all-zeroes → branch delete: nothing to scan, skip.
|
||||
* - force-push → remote..local still gives the net new content.
|
||||
*
|
||||
* Behavior:
|
||||
* - HIGH finding in added lines → print + exit 1 (block), for public AND private.
|
||||
* - MEDIUM → warn (non-blocking). LOW/WARN → silent.
|
||||
* - GSTACK_REDACT_PREPUSH=skip → log + exit 0 (escape valve).
|
||||
*
|
||||
* Installed/uninstalled via `gstack-redact install-prepush-hook` (see the
|
||||
* gstack-redact CLI), which chains any pre-existing hook.
|
||||
*/
|
||||
import { spawnSync } from "child_process";
|
||||
import * as fs from "fs";
|
||||
import * as os from "os";
|
||||
import * as path from "path";
|
||||
import { scan, type Finding } from "../lib/redact-engine";
|
||||
|
||||
const ZERO = /^0+$/;
|
||||
// The canonical empty-tree object; diffing against it yields all content as added.
|
||||
const EMPTY_TREE = "4b825dc642cb6eb9a060e54bf8d69288fbee4904";
|
||||
|
||||
function git(args: string[]): string {
|
||||
const r = spawnSync("git", args, { encoding: "utf8", maxBuffer: 64 * 1024 * 1024 });
|
||||
return r.status === 0 ? (r.stdout ?? "") : "";
|
||||
}
|
||||
|
||||
function defaultRemoteBranch(): string {
|
||||
// origin/HEAD → origin/main, fall back to main/master.
|
||||
const sym = git(["symbolic-ref", "refs/remotes/origin/HEAD"]).trim();
|
||||
if (sym) return sym.replace("refs/remotes/", "");
|
||||
for (const b of ["origin/main", "origin/master"]) {
|
||||
if (git(["rev-parse", "--verify", b]).trim()) return b;
|
||||
}
|
||||
return "origin/main";
|
||||
}
|
||||
|
||||
/** Return the added-line text for a ref update being pushed. */
|
||||
function addedLinesFor(localSha: string, remoteSha: string): string {
|
||||
let range: string;
|
||||
if (ZERO.test(remoteSha)) {
|
||||
// New branch: prefer what's unique to localSha vs the remote default branch.
|
||||
// With no merge-base (e.g. no remote yet), diff against the empty tree so ALL
|
||||
// branch content is scanned as added — fail-safe (scans more, never less).
|
||||
const base = git(["merge-base", localSha, defaultRemoteBranch()]).trim();
|
||||
range = base ? `${base}..${localSha}` : `${EMPTY_TREE}..${localSha}`;
|
||||
} else {
|
||||
// Existing branch (incl. force-push): net new content remote..local.
|
||||
range = `${remoteSha}..${localSha}`;
|
||||
}
|
||||
// -U0: only changed lines; we keep lines starting with '+' (added), drop the
|
||||
// +++ file header. Unified diff added lines start with a single '+'.
|
||||
const diff = git(["diff", "--unified=0", "--no-color", range]);
|
||||
const added: string[] = [];
|
||||
for (const line of diff.split("\n")) {
|
||||
if (line.startsWith("+") && !line.startsWith("+++")) {
|
||||
added.push(line.slice(1));
|
||||
}
|
||||
}
|
||||
return added.join("\n");
|
||||
}
|
||||
|
||||
function logSkip(reason: string): void {
|
||||
try {
|
||||
const home = process.env.GSTACK_HOME || path.join(os.homedir(), ".gstack");
|
||||
const dir = path.join(home, "security");
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
fs.appendFileSync(
|
||||
path.join(dir, "prepush-skip.jsonl"),
|
||||
JSON.stringify({ ts: new Date().toISOString(), reason }) + "\n",
|
||||
);
|
||||
} catch {
|
||||
// best-effort; never block a push because logging failed
|
||||
}
|
||||
}
|
||||
|
||||
function main() {
|
||||
if ((process.env.GSTACK_REDACT_PREPUSH || "").toLowerCase() === "skip") {
|
||||
logSkip(process.env.GSTACK_REDACT_PREPUSH_REASON || "env-skip");
|
||||
process.stderr.write("gstack-redact-prepush: skipped via GSTACK_REDACT_PREPUSH=skip\n");
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const stdin = fs.readFileSync(0, "utf8");
|
||||
const refs = stdin
|
||||
.split("\n")
|
||||
.map((l) => l.trim())
|
||||
.filter(Boolean)
|
||||
.map((l) => l.split(/\s+/));
|
||||
|
||||
const allHigh: Finding[] = [];
|
||||
let mediumCount = 0;
|
||||
|
||||
for (const [, localSha, , remoteSha] of refs) {
|
||||
if (!localSha || ZERO.test(localSha)) continue; // branch delete → nothing pushed
|
||||
const added = addedLinesFor(localSha, remoteSha || "0");
|
||||
if (!added.trim()) continue;
|
||||
// Visibility doesn't change HIGH behavior; pass private so nothing is treated
|
||||
// as public-strict (HIGH blocks regardless either way).
|
||||
const result = scan(added, { repoVisibility: "private" });
|
||||
for (const f of result.findings) {
|
||||
if (f.severity === "HIGH") allHigh.push(f);
|
||||
else if (f.severity === "MEDIUM") mediumCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (mediumCount > 0) {
|
||||
process.stderr.write(
|
||||
`gstack-redact-prepush: ${mediumCount} MEDIUM finding(s) in pushed diff (PII/internal). ` +
|
||||
"Not blocking. Review before this becomes public.\n",
|
||||
);
|
||||
}
|
||||
|
||||
if (allHigh.length > 0) {
|
||||
process.stderr.write(
|
||||
"\n⛔ gstack-redact-prepush BLOCKED the push — credential(s) in the pushed diff:\n\n",
|
||||
);
|
||||
for (const f of allHigh) {
|
||||
process.stderr.write(` HIGH ${f.id} ${f.preview}\n`);
|
||||
}
|
||||
process.stderr.write(
|
||||
"\nRotate the credential (a pushed secret is compromised) and remove it from the diff.\n" +
|
||||
"This is a guardrail: `git push --no-verify` or `GSTACK_REDACT_PREPUSH=skip git push` bypass it.\n",
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -46,6 +46,17 @@ _cleanup_skill_entry() {
|
||||
fi
|
||||
}
|
||||
|
||||
_link_root_skill_alias() {
|
||||
local target="$SKILLS_DIR/_gstack-command"
|
||||
|
||||
[ -f "$INSTALL_DIR/SKILL.md" ] || return 0
|
||||
[ -L "$target" ] && rm -f "$target"
|
||||
mkdir -p "$target"
|
||||
ln -snf "$INSTALL_DIR/SKILL.md" "$target/SKILL.md"
|
||||
}
|
||||
|
||||
_link_root_skill_alias
|
||||
|
||||
# Discover skills (directories with SKILL.md, excluding meta dirs)
|
||||
SKILL_COUNT=0
|
||||
for skill_dir in "$INSTALL_DIR"/*/; do
|
||||
|
||||
Executable
+53
@@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env bash
|
||||
# gstack-session-kind — classify the current agent session so skills know whether
|
||||
# a human can answer an interactive prompt (AskUserQuestion).
|
||||
#
|
||||
# Usage: gstack-session-kind → prints one of: spawned | headless | interactive
|
||||
#
|
||||
# Used by the preamble (generate-preamble-bash.ts) which echoes
|
||||
# SESSION_KIND: <value>
|
||||
# so the AskUserQuestion-failure fallback rule can branch without a shell-out at
|
||||
# failure time:
|
||||
# spawned → orchestrator session (OpenClaw). Auto-choose recommended option
|
||||
# per the skill's SPAWNED_SESSION block. Never prose, never BLOCKED.
|
||||
# headless → no human present (claude -p evals / CI). BLOCK on AUQ failure.
|
||||
# interactive → a human is present. Prose-fallback on AUQ failure.
|
||||
#
|
||||
# Detection is best-effort. On ANY ambiguity it prints `interactive` — BLOCK only on
|
||||
# a positive headless signal, since a stray prose message in an unmarked one-shot
|
||||
# `-p` run just ends the turn (harmless), whereas wrongly BLOCKING a real human is not.
|
||||
#
|
||||
# Why env vars and not TTY/entrypoint: an interactive Conductor session reports
|
||||
# CLAUDE_CODE_ENTRYPOINT=sdk-ts with no TTY — identical to a headless SDK eval. The
|
||||
# signals that actually discriminate are the host/orchestrator/CI env markers below.
|
||||
set -euo pipefail
|
||||
|
||||
# 1. Orchestrator-spawned session (OpenClaw). Authoritative block lives in the skill;
|
||||
# we only surface the classification.
|
||||
if [ -n "${OPENCLAW_SESSION:-}" ]; then
|
||||
echo "spawned"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 2. Explicit headless override (set by the eval/E2E harness for determinism).
|
||||
if [ -n "${GSTACK_HEADLESS:-}" ]; then
|
||||
echo "headless"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 3. Positive interactive-host signals: a human-driven host is present.
|
||||
# - Conductor app sets CONDUCTOR_* workspace vars.
|
||||
# - Plain interactive `claude` CLI sets CLAUDE_CODE_ENTRYPOINT=cli.
|
||||
if [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ] || [ "${CLAUDE_CODE_ENTRYPOINT:-}" = "cli" ]; then
|
||||
echo "interactive"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 4. CI / automation markers with no interactive host → headless.
|
||||
if [ -n "${CI:-}" ] || [ -n "${GITHUB_ACTIONS:-}" ]; then
|
||||
echo "headless"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 5. No positive headless signal → assume a human is present (degrade-safe default).
|
||||
echo "interactive"
|
||||
+237
-34
@@ -1,21 +1,44 @@
|
||||
#!/usr/bin/env bash
|
||||
# gstack-settings-hook — add/remove SessionStart hooks in Claude Code settings.json
|
||||
# gstack-settings-hook — manage Claude Code hooks in ~/.claude/settings.json
|
||||
#
|
||||
# Usage:
|
||||
# gstack-settings-hook add <hook-command> # add SessionStart hook
|
||||
# gstack-settings-hook remove <hook-command> # remove SessionStart hook
|
||||
# Two shapes:
|
||||
#
|
||||
# 1. Legacy (SessionStart only — used by setup --team and gstack-uninstall):
|
||||
# gstack-settings-hook add <cmd> # adds SessionStart hook
|
||||
# gstack-settings-hook remove <cmd> # removes matching SessionStart hook
|
||||
#
|
||||
# 2. Schema-aware (plan-tune cathedral T3 — supports PreToolUse + PostToolUse):
|
||||
# gstack-settings-hook add-event --event <SessionStart|PreToolUse|PostToolUse> \
|
||||
# --command <cmd> --source <tag> [--matcher <regex>] [--timeout <s>]
|
||||
# gstack-settings-hook remove-source --source <tag>
|
||||
# gstack-settings-hook diff-event --event ... --command ... --source ... [--matcher ...]
|
||||
# gstack-settings-hook rollback # restore latest backup
|
||||
# gstack-settings-hook list-sources # show all gstack-tagged hook entries
|
||||
#
|
||||
# Every add-event/remove-source writes a backup to ~/.claude/settings.json.bak.<ts>
|
||||
# before mutating (Codex correction — silent settings.json mutation is wrong).
|
||||
#
|
||||
# Dedup: legacy `add`/`remove` dedupe by the historical `gstack-session-update`
|
||||
# substring. Schema-aware `add-event` dedupes by (event, matcher, _gstack_source) so
|
||||
# multiple gstack registrations (plan-tune, ...) don't collide.
|
||||
#
|
||||
# Requires: bun (already a gstack hard dependency)
|
||||
# Writes atomically: .tmp + rename to prevent corruption on crash/disk-full.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ACTION="${1:-}"
|
||||
HOOK_CMD="${2:-}"
|
||||
SETTINGS_FILE="${GSTACK_SETTINGS_FILE:-$HOME/.claude/settings.json}"
|
||||
|
||||
if [ -z "$ACTION" ] || [ -z "$HOOK_CMD" ]; then
|
||||
echo "Usage: gstack-settings-hook {add|remove} <hook-command>" >&2
|
||||
if [ -z "$ACTION" ]; then
|
||||
cat <<EOF >&2
|
||||
Usage:
|
||||
gstack-settings-hook add <hook-command> # legacy SessionStart add
|
||||
gstack-settings-hook remove <hook-command> # legacy SessionStart remove
|
||||
gstack-settings-hook add-event --event <name> --command <cmd> --source <tag> [--matcher <re>] [--timeout <s>]
|
||||
gstack-settings-hook remove-source --source <tag>
|
||||
gstack-settings-hook diff-event --event <name> --command <cmd> --source <tag> [--matcher <re>] [--timeout <s>]
|
||||
gstack-settings-hook rollback
|
||||
gstack-settings-hook list-sources
|
||||
EOF
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -24,59 +47,239 @@ if ! command -v bun >/dev/null 2>&1; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
backup_settings() {
|
||||
if [ -f "$SETTINGS_FILE" ]; then
|
||||
local ts
|
||||
ts=$(date +%Y%m%d-%H%M%S)
|
||||
cp "$SETTINGS_FILE" "$SETTINGS_FILE.bak.$ts"
|
||||
echo "$SETTINGS_FILE.bak.$ts" > "$SETTINGS_FILE.bak-latest"
|
||||
fi
|
||||
}
|
||||
|
||||
# --- legacy SessionStart add/remove (backwards compat) -----------------
|
||||
|
||||
case "$ACTION" in
|
||||
add)
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" GSTACK_HOOK_CMD="$HOOK_CMD" bun -e "
|
||||
const fs = require('fs');
|
||||
HOOK_CMD="${2:-}"
|
||||
if [ -z "$HOOK_CMD" ]; then
|
||||
echo "Usage: gstack-settings-hook add <hook-command>" >&2
|
||||
exit 1
|
||||
fi
|
||||
backup_settings
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" GSTACK_HOOK_CMD="$HOOK_CMD" bun -e '
|
||||
const fs = require("fs");
|
||||
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
|
||||
const hookCmd = process.env.GSTACK_HOOK_CMD;
|
||||
|
||||
let settings = {};
|
||||
try { settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8')); } catch {}
|
||||
|
||||
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch {}
|
||||
if (!settings.hooks) settings.hooks = {};
|
||||
if (!settings.hooks.SessionStart) settings.hooks.SessionStart = [];
|
||||
|
||||
// Dedup: check if hook command already registered
|
||||
const exists = settings.hooks.SessionStart.some(entry =>
|
||||
entry.hooks && entry.hooks.some(h => h.command && h.command.includes('gstack-session-update'))
|
||||
entry.hooks && entry.hooks.some(h => h.command && h.command.includes("gstack-session-update"))
|
||||
);
|
||||
|
||||
if (!exists) {
|
||||
settings.hooks.SessionStart.push({
|
||||
hooks: [{ type: 'command', command: hookCmd }]
|
||||
hooks: [{ type: "command", command: hookCmd }]
|
||||
});
|
||||
}
|
||||
|
||||
const tmp = settingsPath + '.tmp';
|
||||
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + '\n');
|
||||
const tmp = settingsPath + ".tmp";
|
||||
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + "\n");
|
||||
fs.renameSync(tmp, settingsPath);
|
||||
" 2>/dev/null
|
||||
' 2>/dev/null
|
||||
;;
|
||||
|
||||
remove)
|
||||
HOOK_CMD="${2:-}"
|
||||
if [ -z "$HOOK_CMD" ]; then
|
||||
echo "Usage: gstack-settings-hook remove <hook-command>" >&2
|
||||
exit 1
|
||||
fi
|
||||
[ -f "$SETTINGS_FILE" ] || exit 1
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" bun -e "
|
||||
const fs = require('fs');
|
||||
backup_settings
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" bun -e '
|
||||
const fs = require("fs");
|
||||
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
|
||||
|
||||
let settings = {};
|
||||
try { settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8')); } catch { process.exit(0); }
|
||||
|
||||
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch { process.exit(0); }
|
||||
if (settings.hooks && settings.hooks.SessionStart) {
|
||||
settings.hooks.SessionStart = settings.hooks.SessionStart.filter(entry =>
|
||||
!(entry.hooks && entry.hooks.some(h => h.command && h.command.includes('gstack-session-update')))
|
||||
!(entry.hooks && entry.hooks.some(h => h.command && h.command.includes("gstack-session-update")))
|
||||
);
|
||||
if (settings.hooks.SessionStart.length === 0) delete settings.hooks.SessionStart;
|
||||
if (Object.keys(settings.hooks).length === 0) delete settings.hooks;
|
||||
}
|
||||
|
||||
const tmp = settingsPath + '.tmp';
|
||||
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + '\n');
|
||||
const tmp = settingsPath + ".tmp";
|
||||
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + "\n");
|
||||
fs.renameSync(tmp, settingsPath);
|
||||
" 2>/dev/null
|
||||
' 2>/dev/null
|
||||
;;
|
||||
|
||||
add-event|diff-event)
|
||||
EVENT=""
|
||||
COMMAND=""
|
||||
SOURCE=""
|
||||
MATCHER=""
|
||||
TIMEOUT=""
|
||||
shift
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--event) EVENT="$2"; shift 2 ;;
|
||||
--command) COMMAND="$2"; shift 2 ;;
|
||||
--source) SOURCE="$2"; shift 2 ;;
|
||||
--matcher) MATCHER="$2"; shift 2 ;;
|
||||
--timeout) TIMEOUT="$2"; shift 2 ;;
|
||||
*) echo "unknown flag: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
if [ -z "$EVENT" ] || [ -z "$COMMAND" ] || [ -z "$SOURCE" ]; then
|
||||
echo "add-event/diff-event require --event, --command, --source" >&2
|
||||
exit 1
|
||||
fi
|
||||
case "$EVENT" in
|
||||
SessionStart|PreToolUse|PostToolUse|UserPromptSubmit|Stop|Notification) ;;
|
||||
*) echo "invalid --event '$EVENT'; must be one of SessionStart|PreToolUse|PostToolUse|UserPromptSubmit|Stop|Notification" >&2; exit 1 ;;
|
||||
esac
|
||||
if [ "$ACTION" = "add-event" ]; then
|
||||
backup_settings
|
||||
fi
|
||||
DIFF_ONLY=""
|
||||
if [ "$ACTION" = "diff-event" ]; then DIFF_ONLY=1; fi
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" \
|
||||
GSTACK_EVENT="$EVENT" \
|
||||
GSTACK_COMMAND="$COMMAND" \
|
||||
GSTACK_SOURCE="$SOURCE" \
|
||||
GSTACK_MATCHER="$MATCHER" \
|
||||
GSTACK_TIMEOUT="$TIMEOUT" \
|
||||
GSTACK_DIFF_ONLY="$DIFF_ONLY" \
|
||||
bun -e '
|
||||
const fs = require("fs");
|
||||
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
|
||||
const event = process.env.GSTACK_EVENT;
|
||||
const cmd = process.env.GSTACK_COMMAND;
|
||||
const source = process.env.GSTACK_SOURCE;
|
||||
const matcher = process.env.GSTACK_MATCHER || "";
|
||||
const timeoutRaw = process.env.GSTACK_TIMEOUT || "";
|
||||
const diffOnly = process.env.GSTACK_DIFF_ONLY === "1";
|
||||
|
||||
let settings = {};
|
||||
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch {}
|
||||
|
||||
const before = JSON.stringify(settings, null, 2);
|
||||
|
||||
if (!settings.hooks) settings.hooks = {};
|
||||
if (!settings.hooks[event]) settings.hooks[event] = [];
|
||||
|
||||
const matchesEntry = (entry) => {
|
||||
const sameMatcher = (entry.matcher || "") === matcher;
|
||||
const sameSource = entry._gstack_source === source;
|
||||
return sameMatcher && sameSource;
|
||||
};
|
||||
|
||||
let existing = settings.hooks[event].find(matchesEntry);
|
||||
const hookEntry = { type: "command", command: cmd };
|
||||
if (timeoutRaw) {
|
||||
const n = Number(timeoutRaw);
|
||||
if (Number.isFinite(n) && n > 0) hookEntry.timeout = n;
|
||||
}
|
||||
|
||||
if (existing) {
|
||||
existing.hooks = [hookEntry];
|
||||
} else {
|
||||
const newEntry = { _gstack_source: source, hooks: [hookEntry] };
|
||||
if (matcher) newEntry.matcher = matcher;
|
||||
settings.hooks[event].push(newEntry);
|
||||
}
|
||||
|
||||
const after = JSON.stringify(settings, null, 2);
|
||||
|
||||
if (diffOnly) {
|
||||
console.log("--- BEFORE");
|
||||
console.log(before);
|
||||
console.log("--- AFTER");
|
||||
console.log(after);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const tmp = settingsPath + ".tmp";
|
||||
fs.writeFileSync(tmp, after + "\n");
|
||||
fs.renameSync(tmp, settingsPath);
|
||||
console.log("OK: " + event + " hook registered (source: " + source + ")");
|
||||
'
|
||||
;;
|
||||
|
||||
remove-source)
|
||||
SOURCE=""
|
||||
shift
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--source) SOURCE="$2"; shift 2 ;;
|
||||
*) echo "unknown flag: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
if [ -z "$SOURCE" ]; then
|
||||
echo "remove-source requires --source <tag>" >&2
|
||||
exit 1
|
||||
fi
|
||||
[ -f "$SETTINGS_FILE" ] || exit 0
|
||||
backup_settings
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" GSTACK_SOURCE="$SOURCE" bun -e '
|
||||
const fs = require("fs");
|
||||
const settingsPath = process.env.GSTACK_SETTINGS_PATH;
|
||||
const source = process.env.GSTACK_SOURCE;
|
||||
let settings = {};
|
||||
try { settings = JSON.parse(fs.readFileSync(settingsPath, "utf8")); } catch { process.exit(0); }
|
||||
if (!settings.hooks) { process.exit(0); }
|
||||
let removed = 0;
|
||||
for (const event of Object.keys(settings.hooks)) {
|
||||
const before = settings.hooks[event].length;
|
||||
settings.hooks[event] = settings.hooks[event].filter(entry => entry._gstack_source !== source);
|
||||
removed += before - settings.hooks[event].length;
|
||||
if (settings.hooks[event].length === 0) delete settings.hooks[event];
|
||||
}
|
||||
if (Object.keys(settings.hooks).length === 0) delete settings.hooks;
|
||||
const tmp = settingsPath + ".tmp";
|
||||
fs.writeFileSync(tmp, JSON.stringify(settings, null, 2) + "\n");
|
||||
fs.renameSync(tmp, settingsPath);
|
||||
console.log("OK: removed " + removed + " hook entry/entries tagged source=" + source);
|
||||
'
|
||||
;;
|
||||
|
||||
rollback)
|
||||
if [ ! -f "$SETTINGS_FILE.bak-latest" ]; then
|
||||
echo "rollback: no backup pointer at $SETTINGS_FILE.bak-latest" >&2
|
||||
exit 1
|
||||
fi
|
||||
LATEST=$(cat "$SETTINGS_FILE.bak-latest")
|
||||
if [ ! -f "$LATEST" ]; then
|
||||
echo "rollback: pointer references missing backup $LATEST" >&2
|
||||
exit 1
|
||||
fi
|
||||
cp "$LATEST" "$SETTINGS_FILE"
|
||||
echo "OK: restored $SETTINGS_FILE from $LATEST"
|
||||
;;
|
||||
|
||||
list-sources)
|
||||
[ -f "$SETTINGS_FILE" ] || { echo "(no settings file)"; exit 0; }
|
||||
GSTACK_SETTINGS_PATH="$SETTINGS_FILE" bun -e '
|
||||
const fs = require("fs");
|
||||
let settings = {};
|
||||
try { settings = JSON.parse(fs.readFileSync(process.env.GSTACK_SETTINGS_PATH, "utf8")); } catch { process.exit(0); }
|
||||
const hooks = settings.hooks || {};
|
||||
let any = false;
|
||||
for (const event of Object.keys(hooks)) {
|
||||
for (const entry of hooks[event]) {
|
||||
if (entry._gstack_source) {
|
||||
any = true;
|
||||
console.log(event + "\t" + entry._gstack_source + "\t" + (entry.matcher || "(no matcher)"));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!any) console.log("(no gstack-tagged hooks)");
|
||||
'
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unknown action: $ACTION (expected add or remove)" >&2
|
||||
echo "Unknown action: $ACTION" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
@@ -31,6 +31,14 @@ fi
|
||||
# 3. Fallback to basename only when there's truly no git remote configured
|
||||
SLUG="${SLUG:-$(basename "$PWD" | tr -cd 'a-zA-Z0-9._-')}"
|
||||
|
||||
# 3b. Re-sanitize unconditionally before the value is echoed into `eval`/`source`
|
||||
# output. The compute (2) and fallback (3) paths already filter, but a value
|
||||
# read straight from the cache file (1) does NOT — a poisoned
|
||||
# ~/.gstack/slug-cache/<key> would otherwise inject shell into
|
||||
# `eval "$(gstack-slug)"`. Filtering here honors the [a-zA-Z0-9._-] invariant
|
||||
# promised in the header on every path, and heals a poisoned cache on write (4).
|
||||
SLUG=$(printf '%s' "$SLUG" | tr -cd 'a-zA-Z0-9._-')
|
||||
|
||||
# 4. Cache the slug for future sessions (atomic write, fail silently)
|
||||
if [[ -n "$SLUG" ]]; then
|
||||
mkdir -p "$CACHE_DIR" 2>/dev/null || true
|
||||
|
||||
@@ -107,7 +107,13 @@ BATCH="$BATCH]"
|
||||
[ "$COUNT" -eq 0 ] && exit 0
|
||||
|
||||
# ─── POST to edge function ───────────────────────────────────
|
||||
RESP_FILE="$(mktemp /tmp/gstack-sync-XXXXXX 2>/dev/null || echo "/tmp/gstack-sync-$$")"
|
||||
# Create response file atomically. If mktemp fails, refuse to continue rather
|
||||
# than fall back to a predictable $$-based path (race + overwrite footgun).
|
||||
RESP_FILE="$(mktemp "${TMPDIR:-/tmp}/gstack-sync-XXXXXX")" || {
|
||||
echo "gstack-telemetry-sync: mktemp failed — skipping this run" >&2
|
||||
exit 0
|
||||
}
|
||||
trap 'rm -f "$RESP_FILE"' EXIT
|
||||
HTTP_CODE="$(curl -s -w '%{http_code}' --max-time 10 \
|
||||
-X POST "${SUPABASE_URL}/functions/v1/telemetry-ingest" \
|
||||
-H "Content-Type: application/json" \
|
||||
|
||||
@@ -29,11 +29,13 @@ if [ ! -f "$TIMELINE_FILE" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
cat "$TIMELINE_FILE" 2>/dev/null | bun -e "
|
||||
cat "$TIMELINE_FILE" 2>/dev/null | GSTACK_TIMELINE_SINCE="$SINCE" GSTACK_TIMELINE_BRANCH="$BRANCH" GSTACK_TIMELINE_LIMIT="$LIMIT" bun -e "
|
||||
const lines = (await Bun.stdin.text()).trim().split('\n').filter(Boolean);
|
||||
const since = '${SINCE}';
|
||||
const branch = '${BRANCH}';
|
||||
const limit = ${LIMIT};
|
||||
const since = process.env.GSTACK_TIMELINE_SINCE || '';
|
||||
const branch = process.env.GSTACK_TIMELINE_BRANCH || '';
|
||||
const limitRaw = process.env.GSTACK_TIMELINE_LIMIT || '20';
|
||||
const parsedLimit = Number.parseInt(limitRaw, 10);
|
||||
const limit = Number.isSafeInteger(parsedLimit) && parsedLimit > 0 ? parsedLimit : 20;
|
||||
|
||||
let sinceMs = 0;
|
||||
if (since) {
|
||||
|
||||
@@ -232,6 +232,10 @@ SETTINGS_HOOK="$(dirname "$0")/gstack-settings-hook"
|
||||
SESSION_UPDATE="$(dirname "$0")/gstack-session-update"
|
||||
if [ -x "$SETTINGS_HOOK" ]; then
|
||||
"$SETTINGS_HOOK" remove "$SESSION_UPDATE" 2>/dev/null && REMOVED+=("SessionStart hook") || true
|
||||
# Cathedral T8 cleanup: also remove plan-tune PreToolUse + PostToolUse hooks.
|
||||
if "$SETTINGS_HOOK" remove-source --source plan-tune-cathedral 2>/dev/null | grep -q "removed [1-9]"; then
|
||||
REMOVED+=("plan-tune cathedral hooks")
|
||||
fi
|
||||
fi
|
||||
|
||||
# ─── Remove global state ────────────────────────────────────
|
||||
|
||||
Executable
+212
@@ -0,0 +1,212 @@
|
||||
#!/usr/bin/env bun
|
||||
// gstack-version-bump — deterministic version-state classifier + writer for /ship.
|
||||
//
|
||||
// Extracted from ship Step 12 prose (v2 plan T9, hybrid CLI extraction). The
|
||||
// idempotency classification and the dual-write to VERSION + package.json are
|
||||
// pure deterministic logic; running them as tested code removes the single
|
||||
// worst /ship footgun — re-bumping an already-shipped branch — from prose the
|
||||
// agent could skip or misread when the step lives in a lazy-loaded section.
|
||||
//
|
||||
// What STAYS agent judgment (NOT here): the bump-LEVEL decision (micro/patch vs
|
||||
// minor/major, which may AskUserQuestion on feature signals) and the queue
|
||||
// collision prompt. The slot pick itself is bin/gstack-next-version. This CLI
|
||||
// only answers "what state am I in?" and "write this exact version".
|
||||
//
|
||||
// Subcommands:
|
||||
// classify --base <branch> [--version-path <p>]
|
||||
// Compares VERSION vs origin/<base>:VERSION vs package.json.version.
|
||||
// Emits JSON: { state, baseVersion, currentVersion, pkgVersion, pkgExists }
|
||||
// state ∈ FRESH | ALREADY_BUMPED | DRIFT_STALE_PKG | DRIFT_UNEXPECTED
|
||||
// Exit 0 on a decidable state (incl. DRIFT_UNEXPECTED — it's a real state
|
||||
// the caller must handle), exit 2 on bad args / unresolvable base.
|
||||
//
|
||||
// write --version <X.Y.Z.W> [--version-path <p>]
|
||||
// Validates the 4-digit pattern, writes VERSION + package.json.version.
|
||||
// Use for the FRESH bump (or an approved queue rebump). Exit 3 on a
|
||||
// half-write (VERSION written, package.json failed) so the caller knows
|
||||
// drift exists; the next classify() will report DRIFT_STALE_PKG.
|
||||
//
|
||||
// repair [--version-path <p>]
|
||||
// DRIFT_STALE_PKG path: sync package.json.version to the current VERSION
|
||||
// file. No bump. Validates the VERSION pattern first.
|
||||
//
|
||||
// Contract: classify NEVER writes. write/repair mutate VERSION + package.json
|
||||
// only. No git mutation, no network. Mirrors gstack-next-version's reader/writer
|
||||
// split so /ship composes them.
|
||||
|
||||
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { join } from "node:path";
|
||||
|
||||
const VERSION_RE = /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/;
|
||||
const DEFAULT = "0.0.0.0";
|
||||
|
||||
type State = "FRESH" | "ALREADY_BUMPED" | "DRIFT_STALE_PKG" | "DRIFT_UNEXPECTED";
|
||||
|
||||
function fail(msg: string, code = 2): never {
|
||||
process.stderr.write(`gstack-version-bump: ${msg}\n`);
|
||||
process.exit(code);
|
||||
}
|
||||
|
||||
function argVal(args: string[], flag: string): string | undefined {
|
||||
const i = args.indexOf(flag);
|
||||
return i >= 0 && i + 1 < args.length ? args[i + 1] : undefined;
|
||||
}
|
||||
|
||||
/** Resolve the VERSION file path: --version-path, else .gstack/version-path, else "VERSION". */
|
||||
function resolveVersionPath(cwd: string, explicit?: string): string {
|
||||
if (explicit) return join(cwd, explicit);
|
||||
const pin = join(cwd, ".gstack", "version-path");
|
||||
if (existsSync(pin)) {
|
||||
const p = readFileSync(pin, "utf-8").trim();
|
||||
if (p) return join(cwd, p);
|
||||
}
|
||||
return join(cwd, "VERSION");
|
||||
}
|
||||
|
||||
function readVersionFile(p: string): string {
|
||||
try {
|
||||
const v = readFileSync(p, "utf-8").replace(/[\r\n\s]/g, "");
|
||||
return v || DEFAULT;
|
||||
} catch {
|
||||
return DEFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
/** package.json version + existence, parsed without spawning node. */
|
||||
function readPkgVersion(cwd: string): { exists: boolean; version: string } {
|
||||
const pkgPath = join(cwd, "package.json");
|
||||
if (!existsSync(pkgPath)) return { exists: false, version: "" };
|
||||
let raw: string;
|
||||
try {
|
||||
raw = readFileSync(pkgPath, "utf-8");
|
||||
} catch {
|
||||
return { exists: true, version: "" };
|
||||
}
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(raw);
|
||||
} catch {
|
||||
fail("package.json is not valid JSON. Fix the file before re-running /ship.", 2);
|
||||
}
|
||||
const version = (parsed as { version?: unknown })?.version;
|
||||
return { exists: true, version: typeof version === "string" ? version : "" };
|
||||
}
|
||||
|
||||
function writePkgVersion(cwd: string, version: string): void {
|
||||
const pkgPath = join(cwd, "package.json");
|
||||
const raw = readFileSync(pkgPath, "utf-8");
|
||||
const parsed = JSON.parse(raw) as Record<string, unknown>;
|
||||
parsed.version = version;
|
||||
writeFileSync(pkgPath, JSON.stringify(parsed, null, 2) + "\n");
|
||||
}
|
||||
|
||||
function baseVersion(cwd: string, base: string, versionRel: string): string {
|
||||
// Verify the base ref resolves, mirroring the Step 12 guard.
|
||||
try {
|
||||
execFileSync("git", ["rev-parse", "--verify", `origin/${base}`], { cwd, stdio: "ignore" });
|
||||
} catch {
|
||||
fail(`Unable to resolve origin/${base}. Run 'git fetch origin' or verify the base branch exists.`, 2);
|
||||
}
|
||||
try {
|
||||
const out = execFileSync("git", ["show", `origin/${base}:${versionRel}`], { cwd }).toString();
|
||||
const v = out.replace(/[\r\n\s]/g, "");
|
||||
return v || DEFAULT;
|
||||
} catch {
|
||||
// VERSION absent on base (new repo / new file) → treat as 0.0.0.0.
|
||||
return DEFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
function classifyState(current: string, base: string, pkgExists: boolean, pkgVersion: string): State {
|
||||
if (current === base) {
|
||||
// VERSION unchanged vs base. A diverging package.json means someone hand-edited
|
||||
// package.json bypassing /ship — unsafe to guess which is authoritative.
|
||||
if (pkgExists && pkgVersion && pkgVersion !== current) return "DRIFT_UNEXPECTED";
|
||||
return "FRESH";
|
||||
}
|
||||
// VERSION already moved past base.
|
||||
if (pkgExists && pkgVersion && pkgVersion !== current) return "DRIFT_STALE_PKG";
|
||||
return "ALREADY_BUMPED";
|
||||
}
|
||||
|
||||
function cmdClassify(args: string[], cwd: string): void {
|
||||
const base = argVal(args, "--base");
|
||||
if (!base) fail("classify requires --base <branch>", 2);
|
||||
const versionPath = resolveVersionPath(cwd, argVal(args, "--version-path"));
|
||||
const versionRel = argVal(args, "--version-path") ?? "VERSION";
|
||||
const current = readVersionFile(versionPath);
|
||||
const baseV = baseVersion(cwd, base!, versionRel);
|
||||
const pkg = readPkgVersion(cwd);
|
||||
const state = classifyState(current, baseV, pkg.exists, pkg.version);
|
||||
process.stdout.write(
|
||||
JSON.stringify({
|
||||
state,
|
||||
baseVersion: baseV,
|
||||
currentVersion: current,
|
||||
pkgVersion: pkg.version || null,
|
||||
pkgExists: pkg.exists,
|
||||
}) + "\n",
|
||||
);
|
||||
// DRIFT_UNEXPECTED is a real, decidable state — the caller stops on it, but the
|
||||
// classification itself succeeded, so exit 0. (Bad args / unresolvable base are
|
||||
// the only exit-2 cases.)
|
||||
}
|
||||
|
||||
function cmdWrite(args: string[], cwd: string): void {
|
||||
const version = argVal(args, "--version");
|
||||
if (!version) fail("write requires --version <X.Y.Z.W>", 2);
|
||||
if (!VERSION_RE.test(version!)) {
|
||||
fail(`NEW_VERSION (${version}) does not match MAJOR.MINOR.PATCH.MICRO. Aborting.`, 2);
|
||||
}
|
||||
const versionPath = resolveVersionPath(cwd, argVal(args, "--version-path"));
|
||||
writeFileSync(versionPath, version + "\n");
|
||||
if (existsSync(join(cwd, "package.json"))) {
|
||||
try {
|
||||
writePkgVersion(cwd, version!);
|
||||
} catch {
|
||||
fail(
|
||||
"failed to update package.json. VERSION was written but package.json is now stale. " +
|
||||
"Re-run — classify will report DRIFT_STALE_PKG and repair will sync it.",
|
||||
3,
|
||||
);
|
||||
}
|
||||
}
|
||||
process.stdout.write(JSON.stringify({ wrote: version, packageJson: existsSync(join(cwd, "package.json")) }) + "\n");
|
||||
}
|
||||
|
||||
function cmdRepair(args: string[], cwd: string): void {
|
||||
const versionPath = resolveVersionPath(cwd, argVal(args, "--version-path"));
|
||||
const current = readVersionFile(versionPath);
|
||||
if (!VERSION_RE.test(current)) {
|
||||
fail(
|
||||
`VERSION file contents (${current}) do not match MAJOR.MINOR.PATCH.MICRO. ` +
|
||||
"Refusing to propagate invalid semver into package.json. Fix VERSION, then re-run /ship.",
|
||||
2,
|
||||
);
|
||||
}
|
||||
if (!existsSync(join(cwd, "package.json"))) {
|
||||
fail("repair: no package.json to sync.", 2);
|
||||
}
|
||||
try {
|
||||
writePkgVersion(cwd, current);
|
||||
} catch {
|
||||
fail("drift repair failed — could not update package.json.", 3);
|
||||
}
|
||||
process.stdout.write(JSON.stringify({ repaired: current }) + "\n");
|
||||
}
|
||||
|
||||
// Exported for unit tests (pure logic, no I/O).
|
||||
export { classifyState, VERSION_RE, type State };
|
||||
|
||||
if (import.meta.main) {
|
||||
const [sub, ...rest] = process.argv.slice(2);
|
||||
const cwd = process.cwd();
|
||||
switch (sub) {
|
||||
case "classify": cmdClassify(rest, cwd); break;
|
||||
case "write": cmdWrite(rest, cwd); break;
|
||||
case "repair": cmdRepair(rest, cwd); break;
|
||||
default:
|
||||
fail("usage: gstack-version-bump <classify|write|repair> [flags]", 2);
|
||||
}
|
||||
}
|
||||
+97
-16
@@ -2,13 +2,7 @@
|
||||
name: browse
|
||||
preamble-tier: 1
|
||||
version: 1.1.0
|
||||
description: |
|
||||
Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with
|
||||
elements, verify page state, diff before/after actions, take annotated screenshots, check
|
||||
responsive layouts, test forms and uploads, handle dialogs, and assert element states.
|
||||
~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
|
||||
user flow, or file a bug with evidence. Use when asked to "open in browser", "test the
|
||||
site", "take a screenshot", or "dogfood this". (gstack)
|
||||
description: Fast headless browser for QA testing and site dogfooding. (gstack)
|
||||
triggers:
|
||||
- browse a page
|
||||
- headless browser
|
||||
@@ -22,6 +16,16 @@ allowed-tools:
|
||||
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
|
||||
<!-- Regenerate: bun run gen:skill-docs -->
|
||||
|
||||
|
||||
## When to invoke this skill
|
||||
|
||||
Navigate any URL, interact with
|
||||
elements, verify page state, diff before/after actions, take annotated screenshots, check
|
||||
responsive layouts, test forms and uploads, handle dialogs, and assert element states.
|
||||
~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
|
||||
user flow, or file a bug with evidence. Use when asked to "open in browser", "test the
|
||||
site", "take a screenshot", or "dogfood this".
|
||||
|
||||
## Preamble (run first)
|
||||
|
||||
```bash
|
||||
@@ -42,6 +46,16 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX"
|
||||
source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true
|
||||
REPO_MODE=${REPO_MODE:-unknown}
|
||||
echo "REPO_MODE: $REPO_MODE"
|
||||
_SESSION_KIND=$(~/.claude/skills/gstack/bin/gstack-session-kind 2>/dev/null || echo "interactive")
|
||||
case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac
|
||||
echo "SESSION_KIND: $_SESSION_KIND"
|
||||
# Conductor host: AskUserQuestion is unreliable here (native disabled, MCP
|
||||
# variant flaky), so skills render decisions as prose instead of calling the
|
||||
# tool. Gated on !headless so an eval/CI run INSIDE Conductor (GSTACK_HEADLESS)
|
||||
# still BLOCKs rather than rendering prose to nobody.
|
||||
if [ "$_SESSION_KIND" != "headless" ] && { [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ]; }; then
|
||||
echo "CONDUCTOR_SESSION: true"
|
||||
fi
|
||||
_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no")
|
||||
echo "LAKE_INTRO: $_LAKE_SEEN"
|
||||
_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true)
|
||||
@@ -57,7 +71,7 @@ _QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning
|
||||
echo "QUESTION_TUNING: $_QUESTION_TUNING"
|
||||
mkdir -p ~/.gstack/analytics
|
||||
if [ "$_TEL" != "off" ]; then
|
||||
echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(_repo=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null | tr -cd 'a-zA-Z0-9._-'); echo "${_repo:-unknown}")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
|
||||
fi
|
||||
for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
|
||||
if [ -f "$_PF" ]; then
|
||||
@@ -99,6 +113,19 @@ _CHECKPOINT_MODE=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_mode
|
||||
_CHECKPOINT_PUSH=$(~/.claude/skills/gstack/bin/gstack-config get checkpoint_push 2>/dev/null || echo "false")
|
||||
echo "CHECKPOINT_MODE: $_CHECKPOINT_MODE"
|
||||
echo "CHECKPOINT_PUSH: $_CHECKPOINT_PUSH"
|
||||
# Plan-mode hint for skills like /spec that branch behavior on plan-mode state.
|
||||
# Claude Code exposes plan mode via system reminders; we detect best-effort
|
||||
# from CLAUDE_PLAN_FILE (set by the harness when plan mode is active) and
|
||||
# fall back to "inactive". Codex hosts and Claude execution mode both end up
|
||||
# inactive, which is the safe default (defaults to file+execute pipeline).
|
||||
if [ -n "${CLAUDE_PLAN_FILE:-}${GSTACK_PLAN_MODE_FORCE:-}" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
elif [ "${GSTACK_PLAN_MODE:-}" = "active" ]; then
|
||||
export GSTACK_PLAN_MODE="active"
|
||||
else
|
||||
export GSTACK_PLAN_MODE="inactive"
|
||||
fi
|
||||
echo "GSTACK_PLAN_MODE: $GSTACK_PLAN_MODE"
|
||||
[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true
|
||||
```
|
||||
|
||||
@@ -108,7 +135,7 @@ In plan mode, allowed because they inform the plan: `$B`, `$D`, `codex exec`/`co
|
||||
|
||||
## Skill Invocation During Plan Mode
|
||||
|
||||
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If no variant is callable, the skill is BLOCKED — stop and report `BLOCKED — AskUserQuestion unavailable` per the AskUserQuestion Format rule. At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
|
||||
If the user invokes a skill in plan mode, the skill takes precedence over generic plan mode behavior. **Treat the skill file as executable instructions, not reference.** Follow it step by step starting from Step 0; the first AskUserQuestion is the workflow entering plan mode, not a violation of it. AskUserQuestion (any variant — `mcp__*__AskUserQuestion` or native; see "AskUserQuestion Format → Tool resolution") satisfies plan mode's end-of-turn requirement. If AskUserQuestion is unavailable or a call fails, follow the AskUserQuestion Format failure fallback: `headless` → BLOCKED; `interactive` → the prose fallback (also satisfies end-of-turn). At a STOP point, stop immediately. Do not continue the workflow or call ExitPlanMode there. Commands marked "PLAN MODE EXCEPTION — ALWAYS RUN" execute. Call ExitPlanMode only after the skill workflow completes, or if the user tells you to cancel the skill or leave plan mode.
|
||||
|
||||
If `PROACTIVE` is `"false"`, do not auto-invoke or proactively suggest skills. If a skill seems useful, ask: "I think /skillname might help here — want me to run it?"
|
||||
|
||||
@@ -143,7 +170,7 @@ touch ~/.gstack/.writing-style-prompted
|
||||
|
||||
Skip if `WRITING_STYLE_PENDING` is `no`.
|
||||
|
||||
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Lake** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
|
||||
If `LAKE_INTRO` is `no`: say "gstack follows the **Boil the Ocean** principle — do the complete thing when AI makes marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" Offer to open:
|
||||
|
||||
```bash
|
||||
open https://garryslist.org/posts/boil-the-ocean
|
||||
@@ -154,7 +181,7 @@ Only run `open` if yes. Always run `touch`.
|
||||
|
||||
If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: ask telemetry once via AskUserQuestion:
|
||||
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code, file paths, or repo names.
|
||||
> Help gstack get better. Share usage data only: skill, duration, crashes, stable device ID. No code or file paths. Your repo name is recorded locally only and stripped before any upload.
|
||||
|
||||
Options:
|
||||
- A) Help gstack get better! (recommended)
|
||||
@@ -230,6 +257,7 @@ Key routing rules:
|
||||
- Ship/deploy/PR → invoke /ship or /land-and-deploy
|
||||
- Save progress → invoke /context-save
|
||||
- Resume context → invoke /context-restore
|
||||
- Author a backlog-ready spec/issue → invoke /spec
|
||||
```
|
||||
|
||||
Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"`
|
||||
@@ -474,9 +502,7 @@ Replace `SKILL_NAME`, `OUTCOME`, and `USED_BROWSE` before running.
|
||||
|
||||
## Plan Status Footer
|
||||
|
||||
In plan mode before ExitPlanMode: if the plan file lacks `## GSTACK REVIEW REPORT`, run `~/.claude/skills/gstack/bin/gstack-review-read` and append the standard runs/status/findings table. With `NO_REVIEWS` or empty, append a 5-row placeholder with verdict "NO REVIEWS YET — run `/autoplan`". If a richer report exists, skip.
|
||||
|
||||
PLAN MODE EXCEPTION — always allowed (it's the plan file).
|
||||
Skills that run plan reviews (`/plan-*-review`, `/codex review`) include the EXIT PLAN MODE GATE blocking checklist at the end of the skill, which verifies the plan file ends with `## GSTACK REVIEW REPORT` before ExitPlanMode is called. Skills that don't run plan reviews (operational skills like `/ship`, `/qa`, `/review`) typically don't operate in plan mode and have no review report to verify; this footer is a no-op for them. Writing the plan file is the one edit allowed in plan mode.
|
||||
|
||||
# browse: QA Testing & Dogfooding
|
||||
|
||||
@@ -625,6 +651,51 @@ $B screenshot /tmp/out.png --selector .tweet-card
|
||||
```
|
||||
Scale must be 1-3 (gstack policy cap). Changing `--scale` recreates the browser context; refs from `snapshot` are invalidated (rerun `snapshot`), but `load-html` content is replayed automatically. Not supported in headed mode.
|
||||
|
||||
### 14. Offline render mode (rasterize your own HTML/JSON, zero network)
|
||||
|
||||
This is the blessed path for "I just want to turn my own local HTML or JSON into a
|
||||
PNG/PDF/bytes on disk" — Excalidraw diagrams, tweet/quote cards, og-images,
|
||||
report rasterization. It is **plain headless, shared Chromium, no proxy, no Xvfb,
|
||||
no anti-bot stealth**. Default `$B` is already exactly this; you do not pass
|
||||
`--headed` or `--proxy`. One Chromium per box, shared by every skill — **do not
|
||||
`npm i puppeteer` and ship a second browser** (see the note under the cheatsheet).
|
||||
|
||||
Two output shapes, pick by what you have:
|
||||
|
||||
**A) Visual output → `screenshot --selector` (preferred).** If the thing you want
|
||||
is a picture of something on the page, screenshot it. The PNG is written from the
|
||||
browser process straight to disk — the image bytes never cross the CDP wire.
|
||||
|
||||
```bash
|
||||
echo '<div id="card" style="width:400px;height:200px;background:#1da1f2;color:#fff;padding:20px">hi</div>' > /tmp/card.html
|
||||
$B viewport 480x600 --scale 2
|
||||
$B load-html /tmp/card.html
|
||||
$B screenshot /tmp/card.png --selector '#card' # disk path — no megabytes over CDP
|
||||
```
|
||||
(Use the disk path, NOT `screenshot --base64` — base64 serializes the bytes back
|
||||
through the command channel, which is the cost you're trying to avoid.)
|
||||
|
||||
**B) Bytes a function returns → `js --out` / `eval --out`.** When a library hands
|
||||
you the result as a return value (a base64 data URL, a blob, computed JSON) rather
|
||||
than painting a stable element — e.g. Excalidraw's export function returns a PNG
|
||||
data URL — write the evaluate result straight to disk. `--out` decodes a
|
||||
`data:*;base64,...` result to raw bytes automatically (pass `--raw` to write the
|
||||
literal string). The payload is written by the daemon and never serialized back
|
||||
out to the CLI/stdout.
|
||||
|
||||
```bash
|
||||
# Load the render bundle, signal readiness, then render-to-file.
|
||||
$B load-html /tmp/excalidraw-export.html # bundle sets window.__render + a #done flag
|
||||
$B wait '#done' # deterministic ready handshake
|
||||
$B js "window.__render(SCENE_JSON)" --out /tmp/diagram.png # data URL → decoded PNG on disk
|
||||
```
|
||||
|
||||
`--out` is a WRITE: it needs the `write` scope and is never allowed over the
|
||||
pair-agent tunnel (a remote agent can't write to your disk). Parent directories
|
||||
are created; malformed base64 errors instead of writing corrupt bytes. Pick A when
|
||||
you can (no CDP transfer at all); reach for B only when the bytes come back as a
|
||||
return value.
|
||||
|
||||
## Puppeteer → browse cheatsheet
|
||||
|
||||
Migrating from Puppeteer? Here's the 1:1 mapping for the core workflow:
|
||||
@@ -638,6 +709,8 @@ Migrating from Puppeteer? Here's the 1:1 mapping for the core workflow:
|
||||
| `await (await page.$('.x')).screenshot({path})` | `$B screenshot <path> --selector .x` |
|
||||
| `await page.screenshot({fullPage: true, path})` | `$B screenshot <path>` (full page default) |
|
||||
| `await page.screenshot({clip: {x, y, w, h}, path})` | `$B screenshot <path> --clip x,y,w,h` |
|
||||
| `const r = await page.evaluate(fn)` | `$B js "<expr>"` (result to stdout) |
|
||||
| `fs.writeFileSync(out, Buffer.from(dataUrl.split(',')[1],'base64'))` | `$B js "<expr>" --out <file>` (data URL auto-decoded) |
|
||||
|
||||
Worked example (the tweet-renderer flow — Puppeteer → browse):
|
||||
|
||||
@@ -652,6 +725,13 @@ $B screenshot /tmp/out.png --selector .tweet-card
|
||||
|
||||
Aliases: typing `setcontent` or `set-content` routes to `load-html` automatically. Typing a typo (`load-htm`) returns `Did you mean 'load-html'?`.
|
||||
|
||||
**Don't bundle your own puppeteer/Chromium.** `browse` is the one shared Chromium
|
||||
per box. Skills that need to rasterize local HTML/JSON (diagrams, cards, og-images)
|
||||
should route through `browse` — `screenshot --selector` for visual output,
|
||||
`load-html` + `js --out` for bytes a function returns — instead of
|
||||
`npm i puppeteer` and downloading a second Chromium that drifts out of version sync.
|
||||
One install to pin, one daemon's lifecycle to manage.
|
||||
|
||||
## User Handoff
|
||||
|
||||
When you hit something you can't handle in headless mode (CAPTCHA, complex auth, multi-factor
|
||||
@@ -856,10 +936,10 @@ $B prettyscreenshot --cleanup --scroll-to ".pricing" --width 1440 ~/Desktop/hero
|
||||
| `cookies` | All cookies as JSON |
|
||||
| `css <sel> <prop>` | Computed CSS value |
|
||||
| `dialog [--clear]` | Dialog messages |
|
||||
| `eval <file>` | Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. |
|
||||
| `eval <file> [--out <file>] [--raw]` | Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. With --out <file>, the result is written to disk (base64 data URL decoded to bytes unless --raw); --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel). |
|
||||
| `inspect [selector] [--all] [--history]` | Deep CSS inspection via CDP — full rule cascade, box model, computed styles |
|
||||
| `is <prop> <sel|@ref>` | State check on element. Valid <prop> values: visible, hidden, enabled, disabled, checked, editable, focused (case-sensitive). <sel> accepts a CSS selector OR an @ref token from a prior snapshot (e.g. @e3, @c1) — refs are interchangeable with selectors anywhere a selector is expected. |
|
||||
| `js <expr>` | Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. |
|
||||
| `js <expr> [--out <file>] [--raw]` | Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. With --out <file>, the result is written to disk instead of returned (a base64 data URL is decoded to raw bytes unless --raw is given) — ideal for rasterizing local renders to PNG without serializing megabytes back through the CLI. --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel). |
|
||||
| `network [--clear]` | Network requests |
|
||||
| `perf` | Page load timings |
|
||||
| `storage | storage set <key> <value>` | Read both localStorage and sessionStorage as JSON. With "set <key> <value>", write to localStorage only (sessionStorage is read-only via this command — set it with `js sessionStorage.setItem(...)`). |
|
||||
@@ -905,6 +985,7 @@ $B prettyscreenshot --cleanup --scroll-to ".pricing" --width 1440 ~/Desktop/hero
|
||||
| `disconnect` | Disconnect headed browser, return to headless mode |
|
||||
| `focus [@ref]` | Bring headed browser window to foreground (macOS) |
|
||||
| `handoff [message]` | Open visible Chrome at current page for user takeover |
|
||||
| `memory [--json]` | Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. JSON output with --json. |
|
||||
| `restart` | Restart server |
|
||||
| `resume` | Re-snapshot after user takeover, return control to AI |
|
||||
| `state save|load <name>` | Save/load browser state (cookies + URLs) |
|
||||
|
||||
@@ -135,6 +135,51 @@ $B screenshot /tmp/out.png --selector .tweet-card
|
||||
```
|
||||
Scale must be 1-3 (gstack policy cap). Changing `--scale` recreates the browser context; refs from `snapshot` are invalidated (rerun `snapshot`), but `load-html` content is replayed automatically. Not supported in headed mode.
|
||||
|
||||
### 14. Offline render mode (rasterize your own HTML/JSON, zero network)
|
||||
|
||||
This is the blessed path for "I just want to turn my own local HTML or JSON into a
|
||||
PNG/PDF/bytes on disk" — Excalidraw diagrams, tweet/quote cards, og-images,
|
||||
report rasterization. It is **plain headless, shared Chromium, no proxy, no Xvfb,
|
||||
no anti-bot stealth**. Default `$B` is already exactly this; you do not pass
|
||||
`--headed` or `--proxy`. One Chromium per box, shared by every skill — **do not
|
||||
`npm i puppeteer` and ship a second browser** (see the note under the cheatsheet).
|
||||
|
||||
Two output shapes, pick by what you have:
|
||||
|
||||
**A) Visual output → `screenshot --selector` (preferred).** If the thing you want
|
||||
is a picture of something on the page, screenshot it. The PNG is written from the
|
||||
browser process straight to disk — the image bytes never cross the CDP wire.
|
||||
|
||||
```bash
|
||||
echo '<div id="card" style="width:400px;height:200px;background:#1da1f2;color:#fff;padding:20px">hi</div>' > /tmp/card.html
|
||||
$B viewport 480x600 --scale 2
|
||||
$B load-html /tmp/card.html
|
||||
$B screenshot /tmp/card.png --selector '#card' # disk path — no megabytes over CDP
|
||||
```
|
||||
(Use the disk path, NOT `screenshot --base64` — base64 serializes the bytes back
|
||||
through the command channel, which is the cost you're trying to avoid.)
|
||||
|
||||
**B) Bytes a function returns → `js --out` / `eval --out`.** When a library hands
|
||||
you the result as a return value (a base64 data URL, a blob, computed JSON) rather
|
||||
than painting a stable element — e.g. Excalidraw's export function returns a PNG
|
||||
data URL — write the evaluate result straight to disk. `--out` decodes a
|
||||
`data:*;base64,...` result to raw bytes automatically (pass `--raw` to write the
|
||||
literal string). The payload is written by the daemon and never serialized back
|
||||
out to the CLI/stdout.
|
||||
|
||||
```bash
|
||||
# Load the render bundle, signal readiness, then render-to-file.
|
||||
$B load-html /tmp/excalidraw-export.html # bundle sets window.__render + a #done flag
|
||||
$B wait '#done' # deterministic ready handshake
|
||||
$B js "window.__render(SCENE_JSON)" --out /tmp/diagram.png # data URL → decoded PNG on disk
|
||||
```
|
||||
|
||||
`--out` is a WRITE: it needs the `write` scope and is never allowed over the
|
||||
pair-agent tunnel (a remote agent can't write to your disk). Parent directories
|
||||
are created; malformed base64 errors instead of writing corrupt bytes. Pick A when
|
||||
you can (no CDP transfer at all); reach for B only when the bytes come back as a
|
||||
return value.
|
||||
|
||||
## Puppeteer → browse cheatsheet
|
||||
|
||||
Migrating from Puppeteer? Here's the 1:1 mapping for the core workflow:
|
||||
@@ -148,6 +193,8 @@ Migrating from Puppeteer? Here's the 1:1 mapping for the core workflow:
|
||||
| `await (await page.$('.x')).screenshot({path})` | `$B screenshot <path> --selector .x` |
|
||||
| `await page.screenshot({fullPage: true, path})` | `$B screenshot <path>` (full page default) |
|
||||
| `await page.screenshot({clip: {x, y, w, h}, path})` | `$B screenshot <path> --clip x,y,w,h` |
|
||||
| `const r = await page.evaluate(fn)` | `$B js "<expr>"` (result to stdout) |
|
||||
| `fs.writeFileSync(out, Buffer.from(dataUrl.split(',')[1],'base64'))` | `$B js "<expr>" --out <file>` (data URL auto-decoded) |
|
||||
|
||||
Worked example (the tweet-renderer flow — Puppeteer → browse):
|
||||
|
||||
@@ -162,6 +209,13 @@ $B screenshot /tmp/out.png --selector .tweet-card
|
||||
|
||||
Aliases: typing `setcontent` or `set-content` routes to `load-html` automatically. Typing a typo (`load-htm`) returns `Did you mean 'load-html'?`.
|
||||
|
||||
**Don't bundle your own puppeteer/Chromium.** `browse` is the one shared Chromium
|
||||
per box. Skills that need to rasterize local HTML/JSON (diagrams, cards, og-images)
|
||||
should route through `browse` — `screenshot --selector` for visual output,
|
||||
`load-html` + `js --out` for bytes a function returns — instead of
|
||||
`npm i puppeteer` and downloading a second Chromium that drifts out of version sync.
|
||||
One install to pin, one daemon's lifecycle to manage.
|
||||
|
||||
## User Handoff
|
||||
|
||||
When you hit something you can't handle in headless mode (CAPTCHA, complex auth, multi-factor
|
||||
|
||||
+211
-21
@@ -18,9 +18,12 @@
|
||||
import { chromium, type Browser, type BrowserContext, type BrowserContextOptions, type Page, type Locator, type Cookie } from 'playwright';
|
||||
import { writeSecureFile, mkdirSecure } from './file-permissions';
|
||||
import { addConsoleEntry, addNetworkEntry, addDialogEntry, networkBuffer, type DialogEntry } from './buffers';
|
||||
import { emitActivity } from './activity';
|
||||
import { validateNavigationUrl } from './url-validation';
|
||||
import { TabSession, type RefEntry } from './tab-session';
|
||||
import { resolveChromiumProfile, cleanSingletonLocks } from './config';
|
||||
import { withCdpSession } from './cdp-bridge';
|
||||
import type { MemorySnapshot, MemoryStructureStats, MemoryTabSnapshot, MemoryProcess } from './memory-snapshot';
|
||||
|
||||
/**
|
||||
* Detect whether GSTACK_CHROMIUM_PATH points at a custom Chromium build that
|
||||
@@ -59,6 +62,13 @@ export function isCustomChromium(): boolean {
|
||||
*/
|
||||
export function shouldEnableChromiumSandbox(): boolean {
|
||||
if (process.platform === 'win32') return false;
|
||||
// Explicit user override for Ubuntu/AppArmor and similar environments where
|
||||
// unprivileged Chromium sandboxing is blocked even for normal users (the
|
||||
// sandbox needs unprivileged user namespaces that the host policy denies,
|
||||
// so /qa hangs without --no-sandbox). Setting GSTACK_CHROMIUM_NO_SANDBOX=1
|
||||
// forces the sandbox off without changing the default for everyone else.
|
||||
// See #1562.
|
||||
if (process.env.GSTACK_CHROMIUM_NO_SANDBOX === '1') return false;
|
||||
const isRoot = typeof process.getuid === 'function' && process.getuid() === 0;
|
||||
return !(process.env.CI || process.env.CONTAINER || isRoot);
|
||||
}
|
||||
@@ -187,11 +197,60 @@ export class BrowserManager {
|
||||
private connectionMode: 'launched' | 'headed' = 'launched';
|
||||
private intentionalDisconnect = false;
|
||||
|
||||
// ─── Tab Count Guardrail (D5 + Codex single-tab flag) ───────
|
||||
// Idempotent threshold trackers: each guardrail fires exactly once per
|
||||
// upward crossing of its threshold and re-arms when the tab count drops
|
||||
// back below. Pre-guardrail, nothing tracked tab count growth and a
|
||||
// user could accumulate hundreds of tabs (each holding 50–300 MB of
|
||||
// Chromium-side RSS) without warning until the OS OOM-killer fired.
|
||||
// The toast UX lives in the sidebar (extension/sidepanel.js); the
|
||||
// server-side responsibility is the audit-trail activity entry that
|
||||
// appears in the activity feed even when the sidebar is closed.
|
||||
private static readonly TAB_GUARDRAIL_SOFT = 50;
|
||||
private static readonly TAB_GUARDRAIL_HARD = 200;
|
||||
private tabGuardrailSoftHit = false;
|
||||
private tabGuardrailHardHit = false;
|
||||
|
||||
/**
|
||||
* Called from context.on('page') after a new tab is tracked. Emits at
|
||||
* most one activity entry per upward crossing of each threshold.
|
||||
*/
|
||||
private checkTabGuardrails(): void {
|
||||
const total = this.pages.size;
|
||||
if (!this.tabGuardrailSoftHit && total >= BrowserManager.TAB_GUARDRAIL_SOFT) {
|
||||
this.tabGuardrailSoftHit = true;
|
||||
const msg = `Tab count crossed ${BrowserManager.TAB_GUARDRAIL_SOFT} (now ${total}). Consider closing unused tabs — each Chromium tab holds 50–300 MB.`;
|
||||
console.warn(`[browse] ${msg}`);
|
||||
emitActivity({ type: 'error', command: 'tab-guardrail', error: msg, tabs: total });
|
||||
}
|
||||
if (!this.tabGuardrailHardHit && total >= BrowserManager.TAB_GUARDRAIL_HARD) {
|
||||
this.tabGuardrailHardHit = true;
|
||||
const msg = `Tab count crossed ${BrowserManager.TAB_GUARDRAIL_HARD} (now ${total}). OOM risk imminent. Open the sidebar to see top RAM consumers.`;
|
||||
console.error(`[browse] ${msg}`);
|
||||
emitActivity({ type: 'error', command: 'tab-guardrail', error: msg, tabs: total });
|
||||
}
|
||||
}
|
||||
|
||||
/** Called from page.on('close') so the guardrails re-arm. */
|
||||
private recheckTabGuardrailsOnClose(): void {
|
||||
const total = this.pages.size;
|
||||
if (this.tabGuardrailSoftHit && total < BrowserManager.TAB_GUARDRAIL_SOFT) {
|
||||
this.tabGuardrailSoftHit = false;
|
||||
}
|
||||
if (this.tabGuardrailHardHit && total < BrowserManager.TAB_GUARDRAIL_HARD) {
|
||||
this.tabGuardrailHardHit = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Called when the headed browser disconnects without intentional teardown
|
||||
// (user closed the window). Wired up by server.ts to run full cleanup
|
||||
// (sidebar-agent, state file, profile locks) before exiting with code 2.
|
||||
// Returns void or a Promise; rejections are caught and fall back to exit(2).
|
||||
public onDisconnect: (() => void | Promise<void>) | null = null;
|
||||
// `exitCode` is the resolved process exit code from the disconnect cause:
|
||||
// 0 on clean user-initiated quit (e.g., Cmd+Q on headed Chromium), 2 on
|
||||
// crash/signal-kill. Callers (server.ts) forward it to their shutdown
|
||||
// pipeline so process supervisors (gbrowser's gbd) read the right signal.
|
||||
public onDisconnect: ((exitCode?: number) => void | Promise<void>) | null = null;
|
||||
|
||||
getConnectionMode(): 'launched' | 'headed' { return this.connectionMode; }
|
||||
|
||||
@@ -296,12 +355,16 @@ export class BrowserManager {
|
||||
}
|
||||
|
||||
if (extensionsDir) {
|
||||
launchArgs.push(
|
||||
`--disable-extensions-except=${extensionsDir}`,
|
||||
`--load-extension=${extensionsDir}`,
|
||||
'--window-position=-9999,-9999',
|
||||
'--window-size=1,1',
|
||||
);
|
||||
// Skip --load-extension when running against a custom Chromium build that
|
||||
// already bakes the extension in (e.g., GBrowser / GStack Browser.app).
|
||||
// Loading it twice causes a ServiceWorkerState::SetWorkerId DCHECK crash.
|
||||
if (!isCustomChromium()) {
|
||||
launchArgs.push(
|
||||
`--disable-extensions-except=${extensionsDir}`,
|
||||
`--load-extension=${extensionsDir}`,
|
||||
);
|
||||
}
|
||||
launchArgs.push('--window-position=-9999,-9999', '--window-size=1,1');
|
||||
useHeadless = false; // extensions require headed mode; off-screen window simulates headless
|
||||
console.log(`[browse] Extensions loaded from: ${extensionsDir}`);
|
||||
}
|
||||
@@ -621,6 +684,7 @@ export class BrowserManager {
|
||||
// Inject indicator on the new tab
|
||||
page.evaluate(indicatorScript).catch(() => {});
|
||||
console.log(`[browse] New tab detected (id=${id}, total=${this.pages.size})`);
|
||||
this.checkTabGuardrails();
|
||||
});
|
||||
|
||||
// Persistent context opens a default page — adopt it instead of creating a new one
|
||||
@@ -666,7 +730,7 @@ export class BrowserManager {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const result = this.onDisconnect();
|
||||
const result = this.onDisconnect(exitCode);
|
||||
if (result && typeof (result as Promise<void>).catch === 'function') {
|
||||
(result as Promise<void>).catch((err) => {
|
||||
console.error('[browse] onDisconnect rejected:', err);
|
||||
@@ -1005,6 +1069,116 @@ export class BrowserManager {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Diagnostic for `$B memory` and the /memory endpoint.
|
||||
*
|
||||
* Collects:
|
||||
* - Bun process memory (cross-platform, accurate, no shelling).
|
||||
* - Per-tab JS heap via CDP Performance.getMetrics — the most portable
|
||||
* per-tab signal CDP exposes. Misses native/GPU/Skia/cache memory
|
||||
* (Codex flag on the eng-review; see follow-up TODO "native/GPU
|
||||
* memory breakdown").
|
||||
* - Chromium process tree via SystemInfo.getProcessInfo — PID + type
|
||||
* + CPU time. Per-process RSS is NOT exposed via CDP and the eng
|
||||
* review (D2 USE_CDP) explicitly chose CDP over shelling to `ps`,
|
||||
* so RSS columns are absent and `notes[]` says why.
|
||||
*
|
||||
* `structures` is passed in by the caller (read-commands / server) so
|
||||
* browser-manager doesn't take a hard dep on every buffer-owning module.
|
||||
*/
|
||||
async getMemorySnapshot(structures: MemoryStructureStats): Promise<MemorySnapshot> {
|
||||
const bunMem = process.memoryUsage();
|
||||
const notes: string[] = [];
|
||||
|
||||
// Per-tab JS heap. Lazy: only the pages we already track. A target
|
||||
// that died mid-snapshot is omitted, never throws.
|
||||
const tabs: MemoryTabSnapshot[] = [];
|
||||
for (const [id, page] of this.pages) {
|
||||
try {
|
||||
const url = (() => { try { return page.url(); } catch { return ''; } })();
|
||||
const title = await page.title().catch(() => '');
|
||||
const metrics = await withCdpSession(page, async (session) => {
|
||||
await session.send('Performance.enable').catch(() => undefined);
|
||||
const result = await session.send('Performance.getMetrics');
|
||||
return ((result as { metrics?: Array<{ name: string; value: number }> }).metrics) ?? [];
|
||||
});
|
||||
const mm: Record<string, number> = {};
|
||||
for (const m of metrics) mm[m.name] = m.value;
|
||||
tabs.push({
|
||||
id,
|
||||
url,
|
||||
title,
|
||||
jsHeapUsed: mm.JSHeapUsedSize ?? 0,
|
||||
jsHeapTotal: mm.JSHeapTotalSize ?? 0,
|
||||
documents: mm.Documents ?? 0,
|
||||
nodes: mm.Nodes ?? 0,
|
||||
listeners: mm.JSEventListeners ?? 0,
|
||||
});
|
||||
} catch {
|
||||
// Target died or CDP unavailable mid-snapshot — skip this tab.
|
||||
}
|
||||
}
|
||||
|
||||
// Chromium process tree. Browser handle may be on the `browser` field
|
||||
// (launched mode) or accessible via `context.browser()` (persistent
|
||||
// context / headed mode); try both.
|
||||
let processes: MemoryProcess[] | null = null;
|
||||
const browser: Browser | null = this.browser ?? (this.context ? this.context.browser() : null);
|
||||
if (browser) {
|
||||
try {
|
||||
// `newBrowserCDPSession` is browser-wide. Not exposed on every
|
||||
// Playwright TypeScript surface, but present at runtime on the
|
||||
// Browser instance — use a typed cast to avoid the @ts-expect-error.
|
||||
type BrowserWithCDP = Browser & {
|
||||
newBrowserCDPSession?: () => Promise<{
|
||||
send: (method: string, params?: unknown) => Promise<unknown>;
|
||||
detach: () => Promise<void>;
|
||||
}>;
|
||||
};
|
||||
const maybeFactory = (browser as BrowserWithCDP).newBrowserCDPSession;
|
||||
if (typeof maybeFactory === 'function') {
|
||||
const browserSession = await maybeFactory.call(browser);
|
||||
try {
|
||||
const info = (await browserSession.send('SystemInfo.getProcessInfo')) as {
|
||||
processInfo?: Array<{ id: number; type: string; cpuTime: number }>;
|
||||
};
|
||||
processes = (info.processInfo ?? []).map((p) => ({
|
||||
id: p.id,
|
||||
type: p.type,
|
||||
cpuTime: p.cpuTime,
|
||||
}));
|
||||
notes.push(
|
||||
'Per-Chromium-process RSS not collected — SystemInfo.getProcessInfo exposes PID+type+CPU only. ' +
|
||||
'See follow-up TODO "native/GPU memory breakdown" for the deferred fix.',
|
||||
);
|
||||
} finally {
|
||||
await browserSession.detach().catch(() => undefined);
|
||||
}
|
||||
} else {
|
||||
notes.push('Playwright build does not expose newBrowserCDPSession; per-process info skipped.');
|
||||
}
|
||||
} catch (err: any) {
|
||||
notes.push(`CDP browser session unavailable: ${err?.message ?? String(err)}`);
|
||||
}
|
||||
} else {
|
||||
notes.push('Browser handle unavailable (server connection mode); per-process info skipped.');
|
||||
}
|
||||
|
||||
return {
|
||||
bunServer: {
|
||||
rss: bunMem.rss,
|
||||
heapUsed: bunMem.heapUsed,
|
||||
heapTotal: bunMem.heapTotal,
|
||||
external: bunMem.external,
|
||||
},
|
||||
tabs,
|
||||
processes,
|
||||
structures,
|
||||
capturedAt: Date.now(),
|
||||
notes,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Ref Map (delegates to active session) ──────────────────
|
||||
setRefMap(refs: Map<string, RefEntry>) {
|
||||
this.getActiveSession().setRefMap(refs);
|
||||
@@ -1533,6 +1707,7 @@ export class BrowserManager {
|
||||
break;
|
||||
}
|
||||
}
|
||||
this.recheckTabGuardrailsOnClose();
|
||||
});
|
||||
|
||||
// Clear ref map on navigation — refs point to stale elements after page change
|
||||
@@ -1601,23 +1776,38 @@ export class BrowserManager {
|
||||
}
|
||||
});
|
||||
|
||||
// Capture response sizes via response finished
|
||||
// Capture response sizes via requestfinished — but DO NOT call
|
||||
// response.body() here. Pre-fix, this listener materialized every
|
||||
// response body across CDP just to read .length: multi-GB/hour of
|
||||
// Buffer churn on long-lived headed Chromium with media-heavy
|
||||
// pages, the primary Bun-side accelerant on the gbrowser-OOM
|
||||
// investigation. req.sizes() pulls from the Network.loadingFinished
|
||||
// event Chromium already emits — accurate for chunked transfer,
|
||||
// gzip-compressed responses, and streaming media, all the cases
|
||||
// where the previous Content-Length-header approach would have
|
||||
// missed the size.
|
||||
//
|
||||
// The "single context-level CDP listener" architecture (D10's
|
||||
// stretch goal — would reduce per-page listener count from N to 1
|
||||
// via Target.setAutoAttach) is deferred. TODOS.md tracks it.
|
||||
page.on('requestfinished', async (req) => {
|
||||
try {
|
||||
const res = await req.response();
|
||||
if (res) {
|
||||
const url = req.url();
|
||||
const body = await res.body().catch(() => null);
|
||||
const size = body ? body.length : 0;
|
||||
for (let i = networkBuffer.length - 1; i >= 0; i--) {
|
||||
const entry = networkBuffer.get(i);
|
||||
if (entry && entry.url === url && !entry.size) {
|
||||
networkBuffer.set(i, { ...entry, size });
|
||||
break;
|
||||
}
|
||||
const sizes = await req.sizes().catch(() => null);
|
||||
if (!sizes) return;
|
||||
const url = req.url();
|
||||
const size = sizes.responseBodySize ?? 0;
|
||||
for (let i = networkBuffer.length - 1; i >= 0; i--) {
|
||||
const entry = networkBuffer.get(i);
|
||||
if (entry && entry.url === url && !entry.size) {
|
||||
networkBuffer.set(i, { ...entry, size });
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
} catch {
|
||||
// Best-effort: requestfinished fires for aborted/cached requests too,
|
||||
// where sizes() is unavailable. Missing size is acceptable; an
|
||||
// unbounded throw would noise the console for every cache hit.
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,18 +25,84 @@ import { logTelemetry } from './telemetry';
|
||||
const CDP_TIMEOUT_MS = 5000;
|
||||
const CDP_ACQUIRE_TIMEOUT_MS = 5000;
|
||||
|
||||
// Per-page CDPSession cache. Created lazily on first allow-listed call,
|
||||
// cleaned up when the page closes.
|
||||
// ─── CDP session lifecycle helpers ─────────────────────────────
|
||||
//
|
||||
// Every direct `newCDPSession(page)` call needs a matching `session.detach()`
|
||||
// to release the Chromium-side CDP target. Forgetting the detach leaves the
|
||||
// target attached until the underlying transport drops (often process exit),
|
||||
// which on a long-lived headed browser shows up as steadily-climbing
|
||||
// browser-process RSS. To make the leak class unforgettable, callers should
|
||||
// go through one of these two helpers and a static-grep test
|
||||
// (browse/test/cdp-session-cleanup.test.ts) fails CI if any source file
|
||||
// calls `newCDPSession(` outside this module.
|
||||
|
||||
/**
|
||||
* Ephemeral CDP session with try/finally detach. Use for one-shot CDP work
|
||||
* where the caller doesn't need session reuse — e.g. archive snapshots,
|
||||
* `$B memory`, a single `Page.captureScreenshot`. The session is detached
|
||||
* in `finally` regardless of whether `fn` threw, so the Chromium target
|
||||
* doesn't leak on the error path.
|
||||
*
|
||||
* For repeated use of the same page (e.g. the `$B cdp` bridge or the
|
||||
* inspector), use `getOrCreateCdpSession` instead — it caches and detaches
|
||||
* on page close.
|
||||
*/
|
||||
export async function withCdpSession<T>(
|
||||
page: Page,
|
||||
fn: (session: any) => Promise<T>,
|
||||
): Promise<T> {
|
||||
const session = await page.context().newCDPSession(page);
|
||||
try {
|
||||
return await fn(session);
|
||||
} finally {
|
||||
try {
|
||||
await session.detach();
|
||||
} catch {
|
||||
// Best-effort cleanup. Session may already be detached (target closed,
|
||||
// context recreated, browser disconnect). Swallowing all errors is the
|
||||
// correct cleanup posture per CLAUDE.md "best-effort cleanup paths".
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cached long-lived CDP session keyed by Page. First call creates the
|
||||
* session and registers a `page.once('close', ...)` hook that removes the
|
||||
* cache entry AND calls `session.detach()`. Pre-helper code only removed
|
||||
* the cache entry, leaving the Chromium-side target attached.
|
||||
*
|
||||
* Pass a caller-owned WeakMap so this helper doesn't impose a single global
|
||||
* cache — the `$B cdp` bridge and the inspector each keep their own session
|
||||
* pool with different invariants (e.g. the inspector also detaches on
|
||||
* `framenavigated` because DOM/CSS domain state is tied to the document).
|
||||
*/
|
||||
export async function getOrCreateCdpSession(
|
||||
page: Page,
|
||||
cache: WeakMap<Page, any>,
|
||||
): Promise<any> {
|
||||
let session = cache.get(page);
|
||||
if (session) return session;
|
||||
session = await page.context().newCDPSession(page);
|
||||
cache.set(page, session);
|
||||
page.once('close', () => {
|
||||
cache.delete(page);
|
||||
session.detach().catch(() => {
|
||||
// Best-effort cleanup — see withCdpSession finally block.
|
||||
});
|
||||
});
|
||||
return session;
|
||||
}
|
||||
|
||||
// ─── $B cdp bridge ─────────────────────────────────────────────
|
||||
|
||||
// Per-page CDPSession cache. Lifecycle delegated to getOrCreateCdpSession
|
||||
// which registers a close hook that BOTH removes the cache entry AND calls
|
||||
// session.detach() — pre-helper code only did the former, leaving the
|
||||
// Chromium-side target attached.
|
||||
const sessionCache: WeakMap<Page, any> = new WeakMap();
|
||||
|
||||
async function getCdpSession(page: Page): Promise<any> {
|
||||
let s = sessionCache.get(page);
|
||||
if (s) return s;
|
||||
s = await page.context().newCDPSession(page);
|
||||
sessionCache.set(page, s);
|
||||
// Clear cache on detach so we don't hold a stale handle.
|
||||
page.once('close', () => sessionCache.delete(page));
|
||||
return s;
|
||||
return getOrCreateCdpSession(page, sessionCache);
|
||||
}
|
||||
|
||||
export interface CdpDispatchInput {
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
*/
|
||||
|
||||
import type { Page } from 'playwright';
|
||||
import { getOrCreateCdpSession } from './cdp-bridge';
|
||||
|
||||
// ─── Types ──────────────────────────────────────────────────────
|
||||
|
||||
@@ -106,15 +107,23 @@ async function getOrCreateSession(page: Page): Promise<any> {
|
||||
}
|
||||
}
|
||||
|
||||
session = await page.context().newCDPSession(page);
|
||||
cdpSessions.set(page, session);
|
||||
session = await getOrCreateCdpSession(page, cdpSessions);
|
||||
|
||||
// Enable DOM and CSS domains
|
||||
await session.send('DOM.enable');
|
||||
await session.send('CSS.enable');
|
||||
initializedPages.add(page);
|
||||
// Enable DOM and CSS domains on first init for this page. The session
|
||||
// itself is cached + close-detached by getOrCreateCdpSession; the
|
||||
// initializedPages WeakSet is inspector-layer state that needs its
|
||||
// own close hook to stay in sync.
|
||||
if (!initializedPages.has(page)) {
|
||||
await session.send('DOM.enable');
|
||||
await session.send('CSS.enable');
|
||||
initializedPages.add(page);
|
||||
page.once('close', () => initializedPages.delete(page));
|
||||
}
|
||||
|
||||
// Auto-detach on navigation
|
||||
// Auto-detach on navigation — DOM/CSS domain state is tied to the
|
||||
// document. Close-detach (from getOrCreateCdpSession) handles the
|
||||
// tab-close case; framenavigated catches in-tab navigation that
|
||||
// invalidates inspector state without closing the tab.
|
||||
page.once('framenavigated', () => {
|
||||
try {
|
||||
session.detach().catch(() => {});
|
||||
@@ -130,7 +139,41 @@ async function getOrCreateSession(page: Page): Promise<any> {
|
||||
|
||||
// ─── Modification History ───────────────────────────────────────
|
||||
|
||||
// Bounded FIFO of style modifications. Pre-cap, this was an unbounded
|
||||
// module-scoped array that grew for every CSS edit made through $B css
|
||||
// across the whole browser session — small per-entry footprint but no
|
||||
// upper bound, the kind of slow leak that compounds over multi-day
|
||||
// inspector use. The cap is 200 because per-session undo workflows
|
||||
// rarely walk back more than a handful of edits, and a user who really
|
||||
// wants to roll a long change back can `$B css reset` to revert all of
|
||||
// them. totalPushed is monotonic across the session so undoModification
|
||||
// can tell the user when their target index has been evicted, instead
|
||||
// of just "no modification at index N".
|
||||
const MOD_HISTORY_CAP = 200;
|
||||
const modificationHistory: StyleModification[] = [];
|
||||
let modHistoryTotalPushed = 0;
|
||||
|
||||
function pushModification(mod: StyleModification): void {
|
||||
modificationHistory.push(mod);
|
||||
modHistoryTotalPushed++;
|
||||
while (modificationHistory.length > MOD_HISTORY_CAP) {
|
||||
modificationHistory.shift();
|
||||
}
|
||||
}
|
||||
|
||||
// Test-only entry: exposes the history-cap mechanics (push, reset, cap value)
|
||||
// without requiring a CDP-driven Page. Production code must go through
|
||||
// modifyStyle / undoModification / resetModifications.
|
||||
export const __testInternals = {
|
||||
pushModification,
|
||||
MOD_HISTORY_CAP,
|
||||
getRawHistory: () => modificationHistory.slice(),
|
||||
getTotalPushed: () => modHistoryTotalPushed,
|
||||
resetForTest: () => {
|
||||
modificationHistory.length = 0;
|
||||
modHistoryTotalPushed = 0;
|
||||
},
|
||||
};
|
||||
|
||||
// ─── Specificity Calculation ────────────────────────────────────
|
||||
|
||||
@@ -559,7 +602,7 @@ export async function modifyStyle(
|
||||
method,
|
||||
};
|
||||
|
||||
modificationHistory.push(modification);
|
||||
pushModification(modification);
|
||||
return modification;
|
||||
}
|
||||
|
||||
@@ -569,7 +612,12 @@ export async function modifyStyle(
|
||||
export async function undoModification(page: Page, index?: number): Promise<void> {
|
||||
const idx = index ?? modificationHistory.length - 1;
|
||||
if (idx < 0 || idx >= modificationHistory.length) {
|
||||
throw new Error(`No modification at index ${idx}. History has ${modificationHistory.length} entries.`);
|
||||
const evictedNote = modHistoryTotalPushed > MOD_HISTORY_CAP
|
||||
? ` (most recent ${MOD_HISTORY_CAP} only — ${modHistoryTotalPushed - MOD_HISTORY_CAP} earlier entries evicted at the cap)`
|
||||
: '';
|
||||
throw new Error(
|
||||
`No modification at index ${idx}. History has ${modificationHistory.length} entries${evictedNote}.`,
|
||||
);
|
||||
}
|
||||
|
||||
const mod = modificationHistory[idx];
|
||||
@@ -622,6 +670,23 @@ export function getModificationHistory(): StyleModification[] {
|
||||
return [...modificationHistory];
|
||||
}
|
||||
|
||||
/**
|
||||
* Diagnostic accessor for the $B memory snapshot. Returns current buffer
|
||||
* occupancy, the cap, and how many entries have been evicted since the
|
||||
* last reset.
|
||||
*/
|
||||
export function getModificationHistoryStats(): {
|
||||
current: number;
|
||||
cap: number;
|
||||
evicted: number;
|
||||
} {
|
||||
return {
|
||||
current: modificationHistory.length,
|
||||
cap: MOD_HISTORY_CAP,
|
||||
evicted: Math.max(0, modHistoryTotalPushed - MOD_HISTORY_CAP),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset all modifications, restoring original values.
|
||||
*/
|
||||
@@ -648,6 +713,7 @@ export async function resetModifications(page: Page): Promise<void> {
|
||||
}
|
||||
}
|
||||
modificationHistory.length = 0;
|
||||
modHistoryTotalPushed = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
+262
-92
@@ -11,11 +11,13 @@
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { spawn as nodeSpawn } from 'child_process';
|
||||
import { safeUnlink, safeUnlinkQuiet, safeKill, isProcessAlive } from './error-handling';
|
||||
import { writeSecureFile, mkdirSecure } from './file-permissions';
|
||||
import { resolveConfig, ensureStateDir, readVersionHash } from './config';
|
||||
import { parseProxyConfig, computeConfigHash, ProxyConfigError } from './proxy-config';
|
||||
import { redactProxyUrl } from './proxy-redact';
|
||||
import { spawnTerminalAgent } from './terminal-agent-control';
|
||||
|
||||
const config = resolveConfig();
|
||||
const IS_WINDOWS = process.platform === 'win32';
|
||||
@@ -209,6 +211,86 @@ function cleanupLegacyState(): void {
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Chromium profile lock helpers (#1781) ─────────────────────
|
||||
/** Profile dir used by headed/connect Chromium sessions. */
|
||||
function chromiumProfileDir(): string {
|
||||
return path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile');
|
||||
}
|
||||
|
||||
/** Remove Chromium SingletonLock/Socket/Cookie so a relaunch can acquire the
|
||||
* profile. Safe to call when absent. */
|
||||
function cleanChromiumProfileLocks(profileDir: string = chromiumProfileDir()): void {
|
||||
for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) {
|
||||
safeUnlinkQuiet(path.join(profileDir, lockFile));
|
||||
}
|
||||
}
|
||||
|
||||
/** Kill an orphaned Chromium that still holds the profile's SingletonLock. The
|
||||
* lock symlink target is "hostname-PID"; killing that PID tears down its
|
||||
* renderer tree so the next launch starts clean. No-op when absent/stale. */
|
||||
async function killOrphanChromium(profileDir: string = chromiumProfileDir()): Promise<void> {
|
||||
try {
|
||||
const lockTarget = fs.readlinkSync(path.join(profileDir, 'SingletonLock')); // "hostname-12345"
|
||||
const orphanPid = parseInt(lockTarget.split('-').pop() || '', 10);
|
||||
if (orphanPid && isProcessAlive(orphanPid)) {
|
||||
safeKill(orphanPid, 'SIGTERM');
|
||||
await new Promise(r => setTimeout(r, 1000));
|
||||
if (isProcessAlive(orphanPid)) {
|
||||
safeKill(orphanPid, 'SIGKILL');
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
}
|
||||
}
|
||||
} catch (err: any) {
|
||||
if (err?.code !== 'ENOENT' && err?.code !== 'EINVAL') throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/** Bounded /health probe. Returns true if the server answers within `attempts`
|
||||
* tries spaced `backoffMs` apart — distinguishes a busy-but-alive daemon from a
|
||||
* dead one (#1781) so a slow server isn't killed and restarted into a crash-loop. */
|
||||
async function probeHealthWithBackoff(port: number, attempts = 3, backoffMs = 250): Promise<boolean> {
|
||||
for (let i = 0; i < attempts; i++) {
|
||||
if (await isServerHealthy(port)) return true;
|
||||
if (i < attempts - 1) await Bun.sleep(backoffMs);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the env for an auto-restart after a crash. headed/proxy/configHash are
|
||||
* reapplied from THIS invocation OR the persisted server state, so a restart
|
||||
* triggered by a plain command (goto/status, no --headed flag) never silently
|
||||
* downgrades a headed session to headless (#1781). Pure + exported for tests.
|
||||
*/
|
||||
export function buildRestartEnv(
|
||||
globalFlags: GlobalFlags | null | undefined,
|
||||
oldState: ServerState | null,
|
||||
): Record<string, string> {
|
||||
const env: Record<string, string> = {};
|
||||
if (globalFlags?.proxyUrl) env.BROWSE_PROXY_URL = globalFlags.proxyUrl;
|
||||
if (globalFlags?.headed || oldState?.mode === 'headed') env.BROWSE_HEADED = '1';
|
||||
const configHash = globalFlags?.configHash || oldState?.configHash;
|
||||
if (configHash) env.BROWSE_CONFIG_HASH = configHash;
|
||||
return env;
|
||||
}
|
||||
|
||||
/** macOS only: pull the headed Chromium window to the user's current Space.
|
||||
* "Google Chrome for Testing" frequently opens behind the active window or on
|
||||
* another Space — the first thing users read as "I can't see the browser"
|
||||
* (#1781). Best-effort, fire-and-forget, never throws. The app name is a fixed
|
||||
* literal (no interpolation). */
|
||||
function raiseHeadedWindowMacOS(): void {
|
||||
if (process.platform !== 'darwin') return;
|
||||
try {
|
||||
nodeSpawn('osascript', ['-e', 'tell application "Google Chrome for Testing" to activate'], {
|
||||
stdio: 'ignore',
|
||||
detached: true,
|
||||
}).unref();
|
||||
} catch {
|
||||
// osascript missing or app not present — non-fatal
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Server Lifecycle ──────────────────────────────────────────
|
||||
async function startServer(extraEnv?: Record<string, string>): Promise<ServerState> {
|
||||
ensureStateDir(config);
|
||||
@@ -217,7 +299,12 @@ async function startServer(extraEnv?: Record<string, string>): Promise<ServerSta
|
||||
safeUnlink(config.stateFile);
|
||||
safeUnlink(path.join(config.stateDir, 'browse-startup-error.log'));
|
||||
|
||||
let proc: any = null;
|
||||
// #1781: clear a stale Chromium profile lock (and kill the orphan still
|
||||
// holding it) before launch, so an auto-restart after an abrupt kill isn't
|
||||
// blocked by the previous Chromium's SingletonLock — the self-inflicted
|
||||
// crash-loop. Previously only the manual connect preamble did this.
|
||||
await killOrphanChromium();
|
||||
cleanChromiumProfileLocks();
|
||||
|
||||
// Allow the caller to opt out of the parent-process watchdog by setting
|
||||
// BROWSE_PARENT_PID=0 in the environment. Useful for CI, non-interactive
|
||||
@@ -240,12 +327,22 @@ async function startServer(extraEnv?: Record<string, string>): Promise<ServerSta
|
||||
`${extraEnvStr})}).unref()`;
|
||||
Bun.spawnSync(['node', '-e', launcherCode], { stdio: ['ignore', 'ignore', 'ignore'] });
|
||||
} else {
|
||||
// macOS/Linux: Bun.spawn + unref works correctly
|
||||
proc = Bun.spawn(['bun', 'run', SERVER_SCRIPT], {
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
// macOS/Linux: Bun.spawn().unref() only removes the child from Bun's event
|
||||
// loop — it does NOT call setsid(), so the spawned server stays in the
|
||||
// parent's process session. When the CLI runs inside a session-managed
|
||||
// shell (e.g. Claude Code's per-command Bash sandbox, Conductor, CI
|
||||
// step runners), the session leader's exit sends SIGHUP to every PID in
|
||||
// the session, killing the bun server (and its Chromium grandchildren).
|
||||
// Even with BROWSE_PARENT_PID=0 disabling the watchdog, SIGHUP still
|
||||
// reaps the server. Use Node's child_process.spawn with detached:true,
|
||||
// which calls setsid() so the server becomes its own session leader
|
||||
// (PPID=1, STAT=Ss) and survives the spawning shell's exit. Mirrors
|
||||
// the Windows path's rationale — same root cause, different OS API.
|
||||
nodeSpawn('bun', ['run', SERVER_SCRIPT], {
|
||||
detached: true,
|
||||
stdio: ['ignore', 'ignore', 'ignore'],
|
||||
env: { ...process.env, BROWSE_STATE_FILE: config.stateFile, BROWSE_PARENT_PID: parentPid, ...extraEnv },
|
||||
});
|
||||
proc.unref();
|
||||
}).unref();
|
||||
}
|
||||
|
||||
// Wait for server to become healthy.
|
||||
@@ -260,27 +357,17 @@ async function startServer(extraEnv?: Record<string, string>): Promise<ServerSta
|
||||
await Bun.sleep(100);
|
||||
}
|
||||
|
||||
// Server didn't start in time — try to get error details
|
||||
if (proc?.stderr) {
|
||||
// macOS/Linux: read stderr from the spawned process
|
||||
const reader = proc.stderr.getReader();
|
||||
const { value } = await reader.read();
|
||||
if (value) {
|
||||
const errText = new TextDecoder().decode(value);
|
||||
throw new Error(`Server failed to start:\n${errText}`);
|
||||
}
|
||||
} else {
|
||||
// Windows: check startup error log (server writes errors to disk since
|
||||
// stderr is unavailable due to stdio: 'ignore' for detachment)
|
||||
const errorLogPath = path.join(config.stateDir, 'browse-startup-error.log');
|
||||
try {
|
||||
const errorLog = fs.readFileSync(errorLogPath, 'utf-8').trim();
|
||||
if (errorLog) {
|
||||
throw new Error(`Server failed to start:\n${errorLog}`);
|
||||
}
|
||||
} catch (e: any) {
|
||||
if (e.code !== 'ENOENT') throw e;
|
||||
// Server didn't start in time — check the on-disk startup error log.
|
||||
// Both platforms now spawn with stdio: 'ignore', so the server writes
|
||||
// errors to disk for the CLI to read (see server.ts start().catch).
|
||||
const errorLogPath = path.join(config.stateDir, 'browse-startup-error.log');
|
||||
try {
|
||||
const errorLog = fs.readFileSync(errorLogPath, 'utf-8').trim();
|
||||
if (errorLog) {
|
||||
throw new Error(`Server failed to start:\n${errorLog}`);
|
||||
}
|
||||
} catch (e: any) {
|
||||
if (e.code !== 'ENOENT') throw e;
|
||||
}
|
||||
throw new Error(`Server failed to start within ${MAX_START_WAIT / 1000}s`);
|
||||
}
|
||||
@@ -486,26 +573,42 @@ async function sendCommand(state: ServerState, command: string, args: string[],
|
||||
}
|
||||
} catch (err: any) {
|
||||
if (err.name === 'AbortError') {
|
||||
console.error('[browse] Command timed out after 30s');
|
||||
// #1781: a 30s timeout on a heavy page usually means busy, not dead.
|
||||
// Don't kill a live server (that's what triggered the crash-loop) — report
|
||||
// and exit so the user can retry rather than losing their (headed) window.
|
||||
const ts = readState();
|
||||
const alive = ts?.pid ? isProcessAlive(ts.pid) : false;
|
||||
console.error(alive
|
||||
? '[browse] Command timed out after 30s (server still alive — busy, not restarting). Retry, or raise load.'
|
||||
: '[browse] Command timed out after 30s');
|
||||
process.exit(1);
|
||||
}
|
||||
// Connection error — server may have crashed
|
||||
// Connection error — server may have crashed, OR may just be busy.
|
||||
if (err.code === 'ECONNREFUSED' || err.code === 'ECONNRESET' || err.message?.includes('fetch failed')) {
|
||||
const oldState = readState();
|
||||
// #1781 busy-vs-dead: a single-threaded daemon under beacon/extension load
|
||||
// can briefly stop answering HTTP while still alive. Before declaring a
|
||||
// crash, if the process is alive give /health a bounded chance to recover
|
||||
// and just retry the command — never kill+restart a live-but-busy server.
|
||||
if (oldState?.pid && isProcessAlive(oldState.pid) && await probeHealthWithBackoff(oldState.port)) {
|
||||
if (retries >= 1) throw new Error('[browse] Server unresponsive after retry — aborting');
|
||||
console.error('[browse] Server was briefly unresponsive (busy); retrying command...');
|
||||
return sendCommand(oldState, command, args, retries + 1);
|
||||
}
|
||||
// Truly dead (or health never recovered) → restart.
|
||||
if (retries >= 1) throw new Error('[browse] Server crashed twice in a row — aborting');
|
||||
console.error('[browse] Server connection lost. Restarting...');
|
||||
// Kill the old server to avoid orphaned chromium processes
|
||||
const oldState = readState();
|
||||
if (oldState && oldState.pid) {
|
||||
await killServer(oldState.pid);
|
||||
}
|
||||
// Reapply --proxy / --headed flags from this invocation when restarting
|
||||
// after a crash. Without this, a proxied daemon that dies mid-command
|
||||
// would silently restart in default direct/headless mode and bypass
|
||||
// the SOCKS bridge.
|
||||
const restartEnv: Record<string, string> = {};
|
||||
if (_globalFlags?.proxyUrl) restartEnv.BROWSE_PROXY_URL = _globalFlags.proxyUrl;
|
||||
if (_globalFlags?.headed) restartEnv.BROWSE_HEADED = '1';
|
||||
if (_globalFlags?.configHash) restartEnv.BROWSE_CONFIG_HASH = _globalFlags.configHash;
|
||||
// startServer() now clears the Chromium SingletonLock + reaps the orphan,
|
||||
// so the relaunch isn't blocked by the dead Chromium's profile lock (#1781).
|
||||
//
|
||||
// Reapply --proxy / --headed when restarting. headed comes from THIS
|
||||
// invocation OR the persisted server mode, so a restart triggered by a
|
||||
// plain command (goto/status, no --headed) never silently downgrades a
|
||||
// headed session to headless (#1781). Same for proxy/configHash.
|
||||
const restartEnv = buildRestartEnv(_globalFlags, oldState);
|
||||
const newState = await startServer(Object.keys(restartEnv).length ? restartEnv : undefined);
|
||||
return sendCommand(newState, command, args, retries + 1);
|
||||
}
|
||||
@@ -966,30 +1069,11 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
|
||||
}
|
||||
}
|
||||
|
||||
// Kill orphaned Chromium processes that may still hold the profile lock.
|
||||
// The server PID is the Bun process; Chromium is a child that can outlive it
|
||||
// if the server is killed abruptly (SIGKILL, crash, manual rm of state file).
|
||||
const profileDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile');
|
||||
try {
|
||||
const singletonLock = path.join(profileDir, 'SingletonLock');
|
||||
const lockTarget = fs.readlinkSync(singletonLock); // e.g. "hostname-12345"
|
||||
const orphanPid = parseInt(lockTarget.split('-').pop() || '', 10);
|
||||
if (orphanPid && isProcessAlive(orphanPid)) {
|
||||
safeKill(orphanPid, 'SIGTERM');
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
if (isProcessAlive(orphanPid)) {
|
||||
safeKill(orphanPid, 'SIGKILL');
|
||||
await new Promise(resolve => setTimeout(resolve, 500));
|
||||
}
|
||||
}
|
||||
} catch (err: any) {
|
||||
if (err?.code !== 'ENOENT' && err?.code !== 'EINVAL') throw err;
|
||||
}
|
||||
|
||||
// Clean up Chromium profile locks (can persist after crashes)
|
||||
for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) {
|
||||
safeUnlinkQuiet(path.join(profileDir, lockFile));
|
||||
}
|
||||
// Kill an orphaned Chromium still holding the profile lock (the Bun server
|
||||
// PID's Chromium child can outlive an abrupt kill/crash), then clear the
|
||||
// lock files so the launch is clean. Shared with the auto-restart path (#1781).
|
||||
await killOrphanChromium();
|
||||
cleanChromiumProfileLocks();
|
||||
|
||||
// Delete stale state file
|
||||
safeUnlinkQuiet(config.stateFile);
|
||||
@@ -1027,38 +1111,29 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
|
||||
});
|
||||
const status = await resp.text();
|
||||
console.log(`Connected to real Chrome\n${status}`);
|
||||
// #1781: surface the window — it often opens behind/on another Space.
|
||||
raiseHeadedWindowMacOS();
|
||||
if (process.platform === 'darwin') {
|
||||
console.log('(If you still don\'t see it, check Mission Control / other Spaces.)');
|
||||
}
|
||||
|
||||
// sidebar-agent.ts spawn was here. Ripped alongside the chat queue —
|
||||
// the Terminal pane runs an interactive PTY now, no more one-shot
|
||||
// claude -p subprocesses to multiplex.
|
||||
|
||||
// Auto-start terminal agent (non-compiled bun process). Owns the PTY
|
||||
// WebSocket for the sidebar Terminal pane.
|
||||
let termAgentScript = path.resolve(__dirname, 'terminal-agent.ts');
|
||||
if (!fs.existsSync(termAgentScript)) {
|
||||
termAgentScript = path.resolve(path.dirname(process.execPath), '..', 'src', 'terminal-agent.ts');
|
||||
}
|
||||
// WebSocket for the sidebar Terminal pane. Routes through the shared
|
||||
// spawnTerminalAgent helper so the CLI cold-start path and the
|
||||
// server.ts watchdog respawn path share one implementation. The
|
||||
// helper handles prior-PID cleanup, script lookup, and env wiring.
|
||||
try {
|
||||
if (fs.existsSync(termAgentScript)) {
|
||||
// Kill old terminal-agents so a stale port file can't trick the
|
||||
// server into routing /pty-session at a dead listener.
|
||||
try {
|
||||
const { spawnSync } = require('child_process');
|
||||
spawnSync('pkill', ['-f', 'terminal-agent\\.ts'], { stdio: 'ignore', timeout: 3000 });
|
||||
} catch (err: any) {
|
||||
if (err?.code !== 'ENOENT') throw err;
|
||||
}
|
||||
const termProc = Bun.spawn(['bun', 'run', termAgentScript], {
|
||||
cwd: config.projectDir,
|
||||
env: {
|
||||
...process.env,
|
||||
BROWSE_STATE_FILE: config.stateFile,
|
||||
BROWSE_SERVER_PORT: String(newState.port),
|
||||
},
|
||||
stdio: ['ignore', 'ignore', 'ignore'],
|
||||
});
|
||||
termProc.unref();
|
||||
console.log(`[browse] Terminal agent started (PID: ${termProc.pid})`);
|
||||
const newPid = spawnTerminalAgent({
|
||||
stateFile: config.stateFile,
|
||||
serverPort: newState.port,
|
||||
cwd: config.projectDir,
|
||||
});
|
||||
if (newPid) {
|
||||
console.log(`[browse] Terminal agent started (PID: ${newPid})`);
|
||||
}
|
||||
} catch (err: any) {
|
||||
// Non-fatal: chat still works without the terminal agent.
|
||||
@@ -1068,6 +1143,96 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
|
||||
console.error(`[browse] Connect failed: ${err.message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// ─── Outer Supervisor (v1.44+, opt-in) ──────────────────────────
|
||||
//
|
||||
// Default: fire-and-forget (CLI exits, server runs detached). This is
|
||||
// the contract every existing call site relies on, including Claude
|
||||
// Code's Bash tool which expects `$B connect` to return promptly.
|
||||
//
|
||||
// Opt-in via `--supervise` flag or BROWSE_SUPERVISE=1 env: the CLI
|
||||
// stays attached, polls the spawned server's PID every 30s, and
|
||||
// respawns it through the same headed-mode startServer path on
|
||||
// unexpected exit. Crash-loop guard: 5 respawns inside 5 min →
|
||||
// give up and exit 1 with a clear error. SIGINT / SIGTERM cleanly
|
||||
// tear down the supervised server before exit.
|
||||
//
|
||||
// Out of scope for v1.44 minimum: routing the Chromium-disconnect
|
||||
// exit-code-1 path back through this supervisor. The terminal-agent
|
||||
// watchdog (T5) already covers the highest-frequency restart case;
|
||||
// Chromium-crash-respawn is documented as a follow-up so the
|
||||
// supervisor stays a tight, testable primitive.
|
||||
const superviseRequested = commandArgs.includes('--supervise')
|
||||
|| process.env.BROWSE_SUPERVISE === '1';
|
||||
if (!superviseRequested) {
|
||||
process.exit(0);
|
||||
}
|
||||
console.log('[browse] Supervisor mode: monitoring server. Ctrl-C to stop.');
|
||||
let supervisorExiting = false;
|
||||
const teardownAndExit = (signal: string) => {
|
||||
if (supervisorExiting) return;
|
||||
supervisorExiting = true;
|
||||
console.log(`\n[browse] ${signal} received — stopping server.`);
|
||||
const state = readState();
|
||||
if (state?.pid && isProcessAlive(state.pid)) {
|
||||
safeKill(state.pid, 'SIGTERM');
|
||||
}
|
||||
process.exit(0);
|
||||
};
|
||||
process.on('SIGINT', () => teardownAndExit('SIGINT'));
|
||||
process.on('SIGTERM', () => teardownAndExit('SIGTERM'));
|
||||
|
||||
const SUPERVISOR_TICK_MS = parseInt(
|
||||
process.env.GSTACK_SUPERVISOR_TICK_MS || '30000',
|
||||
10,
|
||||
);
|
||||
const SUPERVISOR_GUARD_WINDOW_MS = 5 * 60_000;
|
||||
const SUPERVISOR_GUARD_MAX = 5;
|
||||
const SUPERVISOR_BACKOFF_MS = (process.env.GSTACK_SUPERVISOR_BACKOFF || '1000,2000,4000,8000,30000')
|
||||
.split(',').map(s => parseInt(s.trim(), 10)).filter(n => Number.isFinite(n));
|
||||
const respawns: number[] = [];
|
||||
|
||||
while (!supervisorExiting) {
|
||||
await new Promise(resolve => setTimeout(resolve, SUPERVISOR_TICK_MS));
|
||||
if (supervisorExiting) break;
|
||||
const state = readState();
|
||||
if (state?.pid && isProcessAlive(state.pid)) continue;
|
||||
// Server died. Prune rolling window and check guard.
|
||||
const now = Date.now();
|
||||
while (respawns.length && now - respawns[0] > SUPERVISOR_GUARD_WINDOW_MS) {
|
||||
respawns.shift();
|
||||
}
|
||||
if (respawns.length >= SUPERVISOR_GUARD_MAX) {
|
||||
console.error(
|
||||
`[browse] Supervisor: ${SUPERVISOR_GUARD_MAX} crashes in ${SUPERVISOR_GUARD_WINDOW_MS / 1000}s — giving up.`,
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
const attempt = respawns.length;
|
||||
respawns.push(now);
|
||||
const backoff = SUPERVISOR_BACKOFF_MS[Math.min(attempt, SUPERVISOR_BACKOFF_MS.length - 1)] ?? 30_000;
|
||||
console.warn(`[browse] Supervisor: server PID gone — respawning in ${backoff}ms (attempt ${attempt + 1}/${SUPERVISOR_GUARD_MAX})...`);
|
||||
await new Promise(resolve => setTimeout(resolve, backoff));
|
||||
if (supervisorExiting) break;
|
||||
try {
|
||||
const respawned = await startServer(serverEnv);
|
||||
console.log(`[browse] Supervisor: server respawned (PID ${respawned.pid}, port ${respawned.port}).`);
|
||||
// Re-spawn the terminal-agent too; same env wiring as the initial connect.
|
||||
try {
|
||||
spawnTerminalAgent({
|
||||
stateFile: config.stateFile,
|
||||
serverPort: respawned.port,
|
||||
cwd: config.projectDir,
|
||||
});
|
||||
} catch (err: any) {
|
||||
console.warn(`[browse] Supervisor: terminal-agent respawn failed: ${err?.message || err}`);
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.error(`[browse] Supervisor: server respawn failed: ${err?.message || err}`);
|
||||
// Let the next tick try again — the crash-loop guard already
|
||||
// bounded the retries via the rolling window.
|
||||
}
|
||||
}
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
@@ -1118,11 +1283,11 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
|
||||
safeKill(existingState.pid, 'SIGKILL');
|
||||
}
|
||||
}
|
||||
// Clean profile locks and state file
|
||||
const profileDir = path.join(process.env.HOME || '/tmp', '.gstack', 'chromium-profile');
|
||||
for (const lockFile of ['SingletonLock', 'SingletonSocket', 'SingletonCookie']) {
|
||||
safeUnlinkQuiet(path.join(profileDir, lockFile));
|
||||
}
|
||||
// #1781: killing the daemon can orphan its Chromium child tree, which keeps
|
||||
// holding the SingletonLock and makes the next `connect` fail to launch.
|
||||
// Reap the orphan via the lock, then clear the lock files + state.
|
||||
await killOrphanChromium();
|
||||
cleanChromiumProfileLocks();
|
||||
// Xvfb orphan cleanup: if the recorded PID still matches our Xvfb (by
|
||||
// cmdline AND start-time), kill it. PID-only would risk killing a
|
||||
// recycled PID belonging to an unrelated process.
|
||||
@@ -1182,6 +1347,11 @@ Refs: After 'snapshot', use @e1, @e2... as selectors:
|
||||
}
|
||||
|
||||
await sendCommand(state, command, commandArgs);
|
||||
|
||||
// #1781: `focus` means "show me the window". The server-side focus activates
|
||||
// the page via CDP, but on macOS the app can still sit on another Space — pull
|
||||
// it to the user's current Space too.
|
||||
if (command === 'focus') raiseHeadedWindowMacOS();
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
|
||||
@@ -45,6 +45,7 @@ export const META_COMMANDS = new Set([
|
||||
'domain-skill',
|
||||
'skill',
|
||||
'cdp',
|
||||
'memory',
|
||||
]);
|
||||
|
||||
export const ALL_COMMANDS = new Set([...READ_COMMANDS, ...WRITE_COMMANDS, ...META_COMMANDS]);
|
||||
@@ -89,6 +90,7 @@ export function wrapUntrustedContent(result: string, url: string): string {
|
||||
|
||||
export const COMMAND_DESCRIPTIONS: Record<string, { category: string; description: string; usage?: string }> = {
|
||||
// Navigation
|
||||
'memory': { category: 'Server', description: 'Snapshot Bun heap + per-tab JS heap + Chromium process tree + bounded buffer sizes. JSON output with --json.', usage: 'memory [--json]' },
|
||||
'goto': { category: 'Navigation', description: 'Navigate to URL (http://, https://, or file:// scoped to cwd/TEMP_DIR)', usage: 'goto <url>' },
|
||||
'load-html': { category: 'Navigation', description: 'Load HTML via setContent. Accepts a file path under safe-dirs (validated), OR --from-file <payload.json> with {"html":"...","waitUntil":"..."} for large inline HTML (Windows argv safe).', usage: 'load-html <file> [--wait-until load|domcontentloaded|networkidle] [--tab-id <N>] | load-html --from-file <payload.json> [--tab-id <N>]' },
|
||||
'back': { category: 'Navigation', description: 'History back' },
|
||||
@@ -104,8 +106,8 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
|
||||
'media': { category: 'Reading', description: 'All media elements (images, videos, audio) with URLs, dimensions, types', usage: 'media [--images|--videos|--audio] [selector]' },
|
||||
'data': { category: 'Reading', description: 'Structured data: JSON-LD, Open Graph, Twitter Cards, meta tags', usage: 'data [--jsonld|--og|--meta|--twitter]' },
|
||||
// Inspection
|
||||
'js': { category: 'Inspection', description: 'Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file.', usage: 'js <expr>' },
|
||||
'eval': { category: 'Inspection', description: 'Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners.', usage: 'eval <file>' },
|
||||
'js': { category: 'Inspection', description: 'Run inline JavaScript expression in the page context and return result as string. Same JS sandbox as eval; the only difference is js takes an inline expr while eval reads from a file. With --out <file>, the result is written to disk instead of returned (a base64 data URL is decoded to raw bytes unless --raw is given) — ideal for rasterizing local renders to PNG without serializing megabytes back through the CLI. --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel).', usage: 'js <expr> [--out <file>] [--raw]' },
|
||||
'eval': { category: 'Inspection', description: 'Run JavaScript from a file in the page context and return result as string. Path must resolve under /tmp or cwd (no traversal). Use eval for multi-line scripts; use js for one-liners. With --out <file>, the result is written to disk (base64 data URL decoded to bytes unless --raw); --out makes the invocation a WRITE (needs write scope, never allowed over the tunnel).', usage: 'eval <file> [--out <file>] [--raw]' },
|
||||
'css': { category: 'Inspection', description: 'Computed CSS value', usage: 'css <sel> <prop>' },
|
||||
'attrs': { category: 'Inspection', description: 'Element attributes as JSON', usage: 'attrs <sel|@ref>' },
|
||||
'is': { category: 'Inspection', description: 'State check on element. Valid <prop> values: visible, hidden, enabled, disabled, checked, editable, focused (case-sensitive). <sel> accepts a CSS selector OR an @ref token from a prior snapshot (e.g. @e3, @c1) — refs are interchangeable with selectors anywhere a selector is expected.', usage: 'is <prop> <sel|@ref>' },
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* Outputs the absolute path to the browse binary on stdout, or exits 1 if not found.
|
||||
*/
|
||||
|
||||
import { existsSync } from 'fs';
|
||||
import { accessSync, constants } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { homedir } from 'os';
|
||||
|
||||
@@ -24,6 +24,35 @@ function getGitRoot(): string | null {
|
||||
}
|
||||
}
|
||||
|
||||
// Probe a path for executability. accessSync(X_OK) checks the executable
|
||||
// bit on Linux/macOS and degrades to an existence check on Windows (no
|
||||
// true execute bit). Mirrors make-pdf/src/browseClient.ts:159 /
|
||||
// make-pdf/src/pdftotext.ts:117.
|
||||
function isExecutable(p: string): boolean {
|
||||
try {
|
||||
accessSync(p, constants.X_OK);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve a bare binary path to the actual file on disk. On Windows, `bun
|
||||
// build --compile` appends `.exe` to the output filename, so `browse` on
|
||||
// disk is actually `browse.exe`. After a bare-path probe, try the Windows
|
||||
// extensions. Linux/macOS behavior is unchanged. Mirrors the helper in
|
||||
// make-pdf/src/browseClient.ts:89 and make-pdf/src/pdftotext.ts:52.
|
||||
function findExecutable(base: string): string | null {
|
||||
if (isExecutable(base)) return base;
|
||||
if (process.platform === 'win32') {
|
||||
for (const ext of ['.exe', '.cmd', '.bat']) {
|
||||
const withExt = base + ext;
|
||||
if (isExecutable(withExt)) return withExt;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export function locateBinary(): string | null {
|
||||
const root = getGitRoot();
|
||||
const home = homedir();
|
||||
@@ -33,14 +62,26 @@ export function locateBinary(): string | null {
|
||||
if (root) {
|
||||
for (const m of markers) {
|
||||
const local = join(root, m, 'skills', 'gstack', 'browse', 'dist', 'browse');
|
||||
if (existsSync(local)) return local;
|
||||
const found = findExecutable(local);
|
||||
if (found) return found;
|
||||
}
|
||||
|
||||
// Source-checkout fallback (no installed skill layout — the binary
|
||||
// lives directly at <repo>/browse/dist/browse[.exe]). Hit by:
|
||||
// - gstack repo dev workflow before `./setup` runs
|
||||
// - the windows-setup-e2e.yml CI workflow which builds binaries
|
||||
// in place but never installs them under a marker dir
|
||||
// - make-pdf consumers running from a sibling source checkout
|
||||
const sourceCheckout = join(root, 'browse', 'dist', 'browse');
|
||||
const sourceFound = findExecutable(sourceCheckout);
|
||||
if (sourceFound) return sourceFound;
|
||||
}
|
||||
|
||||
// Global fallback
|
||||
for (const m of markers) {
|
||||
const global = join(home, m, 'skills', 'gstack', 'browse', 'dist', 'browse');
|
||||
if (existsSync(global)) return global;
|
||||
const found = findExecutable(global);
|
||||
if (found) return found;
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
/**
|
||||
* find-security-sidecar — resolve the Node entry that runs the L4 ML
|
||||
* classifier sidecar.
|
||||
*
|
||||
* The sidecar can't be bundled into the compiled browse binary because
|
||||
* onnxruntime-node fails to dlopen from Bun's compile extract dir. It runs
|
||||
* as a separate Node subprocess instead. This module resolves the right
|
||||
* path + interpreter on each platform:
|
||||
*
|
||||
* 1. Prefer node on PATH + a bundled JS entry at
|
||||
* browse/dist/security-sidecar.js (built by package.json's
|
||||
* build:security-sidecar script).
|
||||
* 2. Dev fallback: node + browse/src/security-sidecar-entry.ts via tsx
|
||||
* (only available in the source checkout, not the compiled install).
|
||||
* 3. If Node is missing or no entry resolves, return null. The /pty-inject-scan
|
||||
* endpoint then responds with l4 { available: false } and the extension
|
||||
* degrades to WARN+confirm (D7).
|
||||
*/
|
||||
|
||||
import { existsSync } from "fs";
|
||||
import { join, dirname } from "path";
|
||||
import { execFileSync } from "child_process";
|
||||
|
||||
export interface SidecarLocation {
|
||||
node: string;
|
||||
entry: string;
|
||||
/** "compiled" if running from browse/dist/, "dev" if running from src */
|
||||
mode: "compiled" | "dev";
|
||||
}
|
||||
|
||||
function nodeOnPath(): string | null {
|
||||
try {
|
||||
execFileSync("node", ["--version"], { stdio: "ignore", timeout: 2000 });
|
||||
return "node";
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function browseRoot(): string {
|
||||
// When running compiled, __dirname (via import.meta.dir) points at the
|
||||
// Bun extract temp. Walk up until we find a directory containing
|
||||
// browse/dist/ or browse/src/.
|
||||
let candidate = dirname(import.meta.path || "");
|
||||
for (let i = 0; i < 6; i += 1) {
|
||||
if (existsSync(join(candidate, "browse", "dist", "security-sidecar.js"))) {
|
||||
return candidate;
|
||||
}
|
||||
if (existsSync(join(candidate, "src", "security-sidecar-entry.ts"))) {
|
||||
return candidate;
|
||||
}
|
||||
const next = dirname(candidate);
|
||||
if (next === candidate) break;
|
||||
candidate = next;
|
||||
}
|
||||
return process.cwd();
|
||||
}
|
||||
|
||||
export function findSecuritySidecar(): SidecarLocation | null {
|
||||
const node = nodeOnPath();
|
||||
if (!node) return null;
|
||||
|
||||
const root = browseRoot();
|
||||
|
||||
const compiled = join(root, "browse", "dist", "security-sidecar.js");
|
||||
if (existsSync(compiled)) {
|
||||
return { node, entry: compiled, mode: "compiled" };
|
||||
}
|
||||
|
||||
// Dev fallback. Compiled installs won't have src/ on disk so this only
|
||||
// resolves when running from the source checkout.
|
||||
const devEntry = join(root, "src", "security-sidecar-entry.ts");
|
||||
if (existsSync(devEntry)) {
|
||||
return { node, entry: devEntry, mode: "dev" };
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
// `$B memory` — diagnostic snapshot of Bun heap + per-tab JS heap +
|
||||
// Chromium process tree + bounded buffer sizes. Lives in its own file
|
||||
// because the meta-commands dispatcher imports it lazily — projects
|
||||
// that never run the diagnostic don't pay the import-graph cost (CDP
|
||||
// bridge, memory-snapshot types, buffer accessors).
|
||||
|
||||
import type { BrowserManager } from './browser-manager';
|
||||
import { formatBytes, type MemorySnapshot, type MemoryStructureStats } from './memory-snapshot';
|
||||
import { getModificationHistoryStats } from './cdp-inspector';
|
||||
import { getSubscriberCount as getActivitySubscriberCount } from './activity';
|
||||
import { getInspectorSubscriberCount } from './server';
|
||||
import { consoleBuffer, networkBuffer, dialogBuffer } from './buffers';
|
||||
import { getCaptureBuffer } from './network-capture';
|
||||
|
||||
/**
|
||||
* Assemble the MemoryStructureStats from the modules that own each buffer.
|
||||
* Browser-manager doesn't take a hard dep on every buffer-owning module —
|
||||
* the snapshot caller passes them in.
|
||||
*/
|
||||
function collectStructureStats(): MemoryStructureStats {
|
||||
return {
|
||||
modificationHistory: getModificationHistoryStats(),
|
||||
activitySubscribers: getActivitySubscriberCount(),
|
||||
inspectorSubscribers: getInspectorSubscriberCount(),
|
||||
consoleBufferLen: consoleBuffer.length,
|
||||
networkBufferLen: networkBuffer.length,
|
||||
dialogBufferLen: dialogBuffer.length,
|
||||
captureBufferBytes: getCaptureBuffer().byteSize,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Pretty-print the snapshot for terminal output. JSON mode (--json) goes
|
||||
* straight through JSON.stringify so the extension footer and any test
|
||||
* harness can consume it programmatically.
|
||||
*/
|
||||
function formatSnapshotText(s: MemorySnapshot): string {
|
||||
const lines: string[] = [];
|
||||
lines.push(
|
||||
`Bun server: RSS: ${formatBytes(s.bunServer.rss)} ` +
|
||||
`heap: ${formatBytes(s.bunServer.heapUsed)} / ${formatBytes(s.bunServer.heapTotal)} ` +
|
||||
`external: ${formatBytes(s.bunServer.external)}`,
|
||||
);
|
||||
|
||||
if (s.processes && s.processes.length > 0) {
|
||||
// Group by type so the user sees "renderer: 12" vs listing 12 separate rows.
|
||||
const byType: Record<string, number> = {};
|
||||
for (const p of s.processes) byType[p.type] = (byType[p.type] ?? 0) + 1;
|
||||
const typeSummary = Object.entries(byType)
|
||||
.map(([t, n]) => `${t}=${n}`)
|
||||
.join(' ');
|
||||
lines.push(`Chromium processes: ${s.processes.length} total (${typeSummary})`);
|
||||
} else if (s.processes === null) {
|
||||
lines.push('Chromium processes: (unavailable — see notes)');
|
||||
} else {
|
||||
lines.push('Chromium processes: 0');
|
||||
}
|
||||
|
||||
if (s.tabs.length > 0) {
|
||||
// Sort by JS heap descending; show top 10 plus "...N more" tail.
|
||||
const sorted = [...s.tabs].sort((a, b) => b.jsHeapUsed - a.jsHeapUsed);
|
||||
const shown = sorted.slice(0, 10);
|
||||
lines.push(`Renderers: ${s.tabs.length} tabs (top by JS heap):`);
|
||||
for (const t of shown) {
|
||||
const urlShort = t.url.length > 80 ? t.url.slice(0, 77) + '...' : t.url;
|
||||
lines.push(
|
||||
` [${formatBytes(t.jsHeapUsed).padStart(8)} JS, ` +
|
||||
`${String(t.nodes).padStart(6)} nodes, ` +
|
||||
`${String(t.listeners).padStart(5)} listeners] ` +
|
||||
`tab #${t.id} — ${urlShort}`,
|
||||
);
|
||||
}
|
||||
if (sorted.length > shown.length) {
|
||||
lines.push(` ...and ${sorted.length - shown.length} more`);
|
||||
}
|
||||
} else {
|
||||
lines.push('Renderers: (no tabs tracked)');
|
||||
}
|
||||
|
||||
lines.push('─────────────────────────────────────────────────');
|
||||
lines.push('In-memory structures (Bun side):');
|
||||
const m = s.structures.modificationHistory;
|
||||
lines.push(
|
||||
` modificationHistory: ${m.current} / ${m.cap} entries` +
|
||||
(m.evicted > 0 ? ` (${m.evicted} evicted since reset)` : ''),
|
||||
);
|
||||
lines.push(` inspectorSubscribers: ${s.structures.inspectorSubscribers}`);
|
||||
lines.push(` activitySubscribers: ${s.structures.activitySubscribers}`);
|
||||
lines.push(` consoleBuffer: ${s.structures.consoleBufferLen} entries`);
|
||||
lines.push(` networkBuffer: ${s.structures.networkBufferLen} entries`);
|
||||
lines.push(` dialogBuffer: ${s.structures.dialogBufferLen} entries`);
|
||||
lines.push(` captureBuffer: ${formatBytes(s.structures.captureBufferBytes)}`);
|
||||
|
||||
if (s.notes.length > 0) {
|
||||
lines.push('');
|
||||
lines.push('Notes:');
|
||||
for (const n of s.notes) lines.push(` - ${n}`);
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
export async function handleMemoryCommand(args: string[], bm: BrowserManager): Promise<string> {
|
||||
const jsonMode = args.includes('--json');
|
||||
const structures = collectStructureStats();
|
||||
const snapshot = await bm.getMemorySnapshot(structures);
|
||||
if (jsonMode) return JSON.stringify(snapshot);
|
||||
return formatSnapshotText(snapshot);
|
||||
}
|
||||
|
||||
/** Entry point used by the /memory HTTP endpoint — same data, always JSON. */
|
||||
export async function buildMemorySnapshotJson(bm: BrowserManager): Promise<MemorySnapshot> {
|
||||
const structures = collectStructureStats();
|
||||
return bm.getMemorySnapshot(structures);
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
// Shared types for the $B memory diagnostic command and the /memory
|
||||
// endpoint. Lives in its own module so server.ts, read-commands.ts, and
|
||||
// the extension footer poll can import without taking a circular dep on
|
||||
// browser-manager.ts.
|
||||
//
|
||||
// Background: the gbrowser-OOM investigation (160 GB Activity Monitor
|
||||
// reading on a friend's machine) needed a diagnostic that could land
|
||||
// before the next incident — measurement comes first, fixes come after.
|
||||
// $B memory is that diagnostic.
|
||||
|
||||
/** Counts/bytes for the bounded in-memory structures on the Bun side. */
|
||||
export interface MemoryStructureStats {
|
||||
modificationHistory: { current: number; cap: number; evicted: number };
|
||||
activitySubscribers: number;
|
||||
inspectorSubscribers: number;
|
||||
consoleBufferLen: number;
|
||||
networkBufferLen: number;
|
||||
dialogBufferLen: number;
|
||||
captureBufferBytes: number;
|
||||
}
|
||||
|
||||
/** Per-tab JS heap snapshot (CDP Performance.getMetrics). */
|
||||
export interface MemoryTabSnapshot {
|
||||
id: number;
|
||||
url: string;
|
||||
title: string;
|
||||
jsHeapUsed: number;
|
||||
jsHeapTotal: number;
|
||||
documents: number;
|
||||
nodes: number;
|
||||
listeners: number;
|
||||
}
|
||||
|
||||
/** Chromium process metadata via CDP SystemInfo.getProcessInfo. */
|
||||
export interface MemoryProcess {
|
||||
/** Chromium-internal process id (not OS PID). */
|
||||
id: number;
|
||||
/** 'browser' | 'renderer' | 'gpu' | 'utility' | 'extension' | ... */
|
||||
type: string;
|
||||
/** CPU time accumulated since process start (seconds). */
|
||||
cpuTime: number;
|
||||
}
|
||||
|
||||
export interface MemorySnapshot {
|
||||
bunServer: {
|
||||
rss: number;
|
||||
heapUsed: number;
|
||||
heapTotal: number;
|
||||
external: number;
|
||||
};
|
||||
tabs: MemoryTabSnapshot[];
|
||||
/**
|
||||
* Chromium process tree. `null` when no browser handle is available
|
||||
* (server in connection mode, or browser not yet launched).
|
||||
*
|
||||
* Per-process RSS is NOT included: SystemInfo.getProcessInfo returns
|
||||
* id+type+cpuTime but Chromium does not expose RSS via CDP. The
|
||||
* `notes[]` field tells the caller why — see the follow-up TODO
|
||||
* "native/GPU memory breakdown" for the deferred fix.
|
||||
*/
|
||||
processes: MemoryProcess[] | null;
|
||||
structures: MemoryStructureStats;
|
||||
capturedAt: number;
|
||||
notes: string[];
|
||||
}
|
||||
|
||||
/** Format bytes as a short human string ("1.4 GB", "312 MB", "84 KB"). */
|
||||
export function formatBytes(n: number): string {
|
||||
if (n < 1024) return `${n} B`;
|
||||
if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`;
|
||||
if (n < 1024 * 1024 * 1024) return `${(n / 1024 / 1024).toFixed(1)} MB`;
|
||||
return `${(n / 1024 / 1024 / 1024).toFixed(2)} GB`;
|
||||
}
|
||||
@@ -11,6 +11,7 @@ import { handleSkillCommand } from './browser-skill-commands';
|
||||
import { validateNavigationUrl } from './url-validation';
|
||||
import { checkScope, type TokenInfo } from './token-registry';
|
||||
import { validateOutputPath, validateReadPath, SAFE_DIRECTORIES, escapeRegExp } from './path-security';
|
||||
import { guardScreenshotBuffer, guardScreenshotPath } from './screenshot-size-guard';
|
||||
// Re-export for backward compatibility (tests import from meta-commands)
|
||||
export { validateOutputPath, escapeRegExp } from './path-security';
|
||||
import * as Diff from 'diff';
|
||||
@@ -136,7 +137,7 @@ function parsePdfArgs(args: string[]): ParsedPdfArgs {
|
||||
return result;
|
||||
}
|
||||
|
||||
function parsePdfFromFile(payloadPath: string): ParsedPdfArgs {
|
||||
export function parsePdfFromFile(payloadPath: string): ParsedPdfArgs {
|
||||
// Parity with load-html --from-file (browse/src/write-commands.ts) and
|
||||
// the direct load-html <file> path: every caller-supplied file path
|
||||
// must pass validateReadPath so the safe-dirs policy can't be skirted
|
||||
@@ -149,7 +150,16 @@ function parsePdfFromFile(payloadPath: string): ParsedPdfArgs {
|
||||
);
|
||||
}
|
||||
const raw = fs.readFileSync(payloadPath, 'utf8');
|
||||
const json = JSON.parse(raw);
|
||||
let json: any;
|
||||
try {
|
||||
json = JSON.parse(raw);
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
throw new Error(`pdf: --from-file ${payloadPath} is not valid JSON (${msg}).`);
|
||||
}
|
||||
if (json === null || typeof json !== 'object' || Array.isArray(json)) {
|
||||
throw new Error(`pdf: --from-file ${payloadPath} must be a JSON object, got ${Array.isArray(json) ? 'array' : typeof json}.`);
|
||||
}
|
||||
const out: ParsedPdfArgs = {
|
||||
output: json.output || `${TEMP_DIR}/browse-page.pdf`,
|
||||
format: json.format,
|
||||
@@ -497,6 +507,10 @@ export async function handleMetaCommand(
|
||||
buffer = await page.screenshot({ clip: clipRect });
|
||||
} else {
|
||||
buffer = await page.screenshot({ fullPage: !viewportOnly });
|
||||
// Guard the most common API-bricking case (fullPage). Element /
|
||||
// clip captures usually stay within the cap; we still guard the
|
||||
// path-mode below for fullPage writes.
|
||||
({ buffer } = await guardScreenshotBuffer(buffer));
|
||||
}
|
||||
if (buffer.length > 10 * 1024 * 1024) {
|
||||
throw new Error('Screenshot too large for --base64 (>10MB). Use disk path instead.');
|
||||
@@ -517,6 +531,7 @@ export async function handleMetaCommand(
|
||||
}
|
||||
|
||||
await page.screenshot({ path: outputPath, fullPage: !viewportOnly });
|
||||
if (!viewportOnly) await guardScreenshotPath(outputPath);
|
||||
return `Screenshot saved${viewportOnly ? ' (viewport)' : ''}: ${outputPath}`;
|
||||
}
|
||||
|
||||
@@ -567,6 +582,7 @@ export async function handleMetaCommand(
|
||||
const screenshotPath = `${prefix}-${vp.name}.png`;
|
||||
validateOutputPath(screenshotPath);
|
||||
await page.screenshot({ path: screenshotPath, fullPage: true });
|
||||
await guardScreenshotPath(screenshotPath);
|
||||
results.push(`${vp.name} (${vp.width}x${vp.height}): ${screenshotPath}`);
|
||||
}
|
||||
|
||||
@@ -1145,6 +1161,13 @@ export async function handleMetaCommand(
|
||||
return await handleCdpCommand(args, bm);
|
||||
}
|
||||
|
||||
case 'memory': {
|
||||
// Lazy import — pulls in cdp-bridge + memory-snapshot + buffer accessors
|
||||
// that aren't useful for projects that never run the diagnostic.
|
||||
const { handleMemoryCommand } = await import('./memory-command');
|
||||
return await handleMemoryCommand(args, bm);
|
||||
}
|
||||
|
||||
default:
|
||||
throw new Error(`Unknown meta command: ${command}`);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
/**
|
||||
* PTY session lease registry (v1.44+).
|
||||
*
|
||||
* Separates two concerns that pre-v1.44 were conflated under one token:
|
||||
*
|
||||
* - **sessionId** — stable, non-secret identifier for a single PTY session.
|
||||
* Safe to log, safe to include in URLs and server access logs, safe to
|
||||
* keep in DevTools. Identifies "this terminal," not "you're allowed to
|
||||
* use this terminal."
|
||||
*
|
||||
* - **attachToken** — secret, short-lived (30 s) bearer credential that
|
||||
* grants the WS upgrade for ONE attach attempt against a session. Minted
|
||||
* on every /pty-session and /pty-session/reattach call; revoked when
|
||||
* the WS upgrade consumes it. Kept out of logs.
|
||||
*
|
||||
* - **lease** — server-side bookkeeping that maps sessionId → expiresAt.
|
||||
* Re-attach within the lease window resumes the same PTY (and replays
|
||||
* the ring buffer from terminal-agent). Lease expiry tears down the
|
||||
* session.
|
||||
*
|
||||
* Codex outside-voice (T1 of the eng review) pushed for this separation:
|
||||
* "the auth token IS the session id" collapsed identity into a secret,
|
||||
* meaning re-attach URLs and logs carry the bearer credential. The lease
|
||||
* model fixes that without changing the user experience.
|
||||
*
|
||||
* Mint cadence:
|
||||
* - Initial /pty-session: mint sessionId + lease + attachToken (one round trip).
|
||||
* - /pty-session/reattach: validate sessionId/lease, mint fresh attachToken.
|
||||
* - /pty-restart: revoke old lease, mint fresh sessionId + lease + attachToken.
|
||||
* - /pty-dispose: revoke lease (and the terminal-agent disposes the PTY).
|
||||
*
|
||||
* Lease TTL is env-overridable so v1.44 e2e tests can compress detach
|
||||
* windows to 1 s instead of waiting 30 minutes per assertion.
|
||||
*/
|
||||
import * as crypto from 'crypto';
|
||||
|
||||
interface Lease {
|
||||
createdAt: number;
|
||||
expiresAt: number;
|
||||
}
|
||||
|
||||
const LEASE_TTL_MS = parseInt(
|
||||
process.env.GSTACK_PTY_LEASE_TTL_MS || `${30 * 60 * 1000}`,
|
||||
10,
|
||||
); // 30 minutes default; covers idle-but-engaged user sessions
|
||||
const MAX_LEASES = 10_000;
|
||||
const leases = new Map<string, Lease>();
|
||||
|
||||
/**
|
||||
* Mint a fresh sessionId + lease. Returns the non-secret sessionId and
|
||||
* the expiry timestamp (caller surfaces both to the client). Never throws.
|
||||
*/
|
||||
export function mintLease(): { sessionId: string; expiresAt: number } {
|
||||
const sessionId = crypto.randomBytes(32).toString('base64url');
|
||||
const now = Date.now();
|
||||
const expiresAt = now + LEASE_TTL_MS;
|
||||
leases.set(sessionId, { createdAt: now, expiresAt });
|
||||
pruneExpired(now);
|
||||
return { sessionId, expiresAt };
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a lease is still valid (exists AND not expired). Returns
|
||||
* the current expiresAt for valid leases; null otherwise. Lazily prunes
|
||||
* stale entries.
|
||||
*/
|
||||
export function validateLease(sessionId: string | null | undefined): { ok: true; expiresAt: number } | { ok: false } {
|
||||
if (!sessionId) return { ok: false };
|
||||
const lease = leases.get(sessionId);
|
||||
if (!lease) {
|
||||
pruneExpired(Date.now());
|
||||
return { ok: false };
|
||||
}
|
||||
if (Date.now() > lease.expiresAt) {
|
||||
leases.delete(sessionId);
|
||||
pruneExpired(Date.now());
|
||||
return { ok: false };
|
||||
}
|
||||
return { ok: true, expiresAt: lease.expiresAt };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extend the lease's expiresAt to `now + LEASE_TTL_MS`. Caller should
|
||||
* gate refresh on `expiresAt - now < REFRESH_THRESHOLD` (D10 lazy
|
||||
* refresh: avoid refreshing on every keepalive when the lease is
|
||||
* comfortably far from expiry).
|
||||
*
|
||||
* Returns `{ ok: true, expiresAt }` on success, `{ ok: false }` if the
|
||||
* lease is unknown or already expired (the agent must close the WS and
|
||||
* surface auth-invalid). Critical security invariant: never resurrect
|
||||
* an expired lease — the 30-min TTL is what bounds blast radius for a
|
||||
* leaked attach token whose lease should have been GC'd.
|
||||
*/
|
||||
export function refreshLease(sessionId: string | null | undefined): { ok: true; expiresAt: number } | { ok: false } {
|
||||
if (!sessionId) return { ok: false };
|
||||
const lease = leases.get(sessionId);
|
||||
if (!lease) return { ok: false };
|
||||
const now = Date.now();
|
||||
if (now > lease.expiresAt) {
|
||||
leases.delete(sessionId);
|
||||
return { ok: false };
|
||||
}
|
||||
lease.expiresAt = now + LEASE_TTL_MS;
|
||||
return { ok: true, expiresAt: lease.expiresAt };
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop a lease. Called on explicit dispose (/pty-dispose, /pty-restart,
|
||||
* WS close with code 4001) and on session timeout in terminal-agent.
|
||||
*/
|
||||
export function revokeLease(sessionId: string | null | undefined): void {
|
||||
if (!sessionId) return;
|
||||
leases.delete(sessionId);
|
||||
}
|
||||
|
||||
/** Returns the lease count — test + observability helper. */
|
||||
export function leaseCount(): number {
|
||||
return leases.size;
|
||||
}
|
||||
|
||||
/** Test-only reset. */
|
||||
export function __resetLeases(): void {
|
||||
leases.clear();
|
||||
}
|
||||
|
||||
function pruneExpired(now: number): void {
|
||||
let checked = 0;
|
||||
for (const [sessionId, lease] of leases) {
|
||||
if (checked++ >= 20) break;
|
||||
if (lease.expiresAt <= now) leases.delete(sessionId);
|
||||
}
|
||||
while (leases.size > MAX_LEASES) {
|
||||
const first = leases.keys().next().value;
|
||||
if (!first) break;
|
||||
leases.delete(first);
|
||||
}
|
||||
}
|
||||
+130
-7
@@ -13,7 +13,7 @@ import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { TEMP_DIR } from './platform';
|
||||
import { inspectElement, formatInspectorResult, getModificationHistory } from './cdp-inspector';
|
||||
import { validateReadPath } from './path-security';
|
||||
import { validateReadPath, validateOutputPath } from './path-security';
|
||||
import { stripLoneSurrogates } from './sanitize';
|
||||
// Re-export for backward compatibility (tests import from read-commands)
|
||||
export { validateReadPath } from './path-security';
|
||||
@@ -46,6 +46,117 @@ function wrapForEvaluate(code: string): string {
|
||||
: `(async()=>(${trimmed}))()`;
|
||||
}
|
||||
|
||||
/** Flags split out of `js`/`eval` args by parseOutArgs. */
|
||||
export interface OutArgs {
|
||||
outPath?: string;
|
||||
raw: boolean;
|
||||
rest: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse `--out <path>` / `--out=<path>` and `--raw` / `--raw=true|false` out of an
|
||||
* arg list, returning the flags plus the remaining positional args (`rest`).
|
||||
*
|
||||
* Single source of truth shared by the js/eval handlers and the write-capability
|
||||
* gate in server.ts, so the two never disagree on what counts as an `--out`
|
||||
* invocation. Throws on malformed usage (repeated `--out`, missing value, bad
|
||||
* `--raw` value) so the user gets a clear error instead of a silent misparse.
|
||||
*/
|
||||
export function parseOutArgs(args: string[]): OutArgs {
|
||||
let outPath: string | undefined;
|
||||
let raw = false;
|
||||
const rest: string[] = [];
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const a = args[i];
|
||||
if (a === '--out') {
|
||||
if (outPath !== undefined) throw new Error('--out specified more than once');
|
||||
const val = args[i + 1];
|
||||
if (val === undefined || val.startsWith('--')) throw new Error('--out requires a file path');
|
||||
outPath = val;
|
||||
i++;
|
||||
} else if (a.startsWith('--out=')) {
|
||||
if (outPath !== undefined) throw new Error('--out specified more than once');
|
||||
const val = a.slice('--out='.length);
|
||||
if (val === '') throw new Error('--out requires a file path');
|
||||
outPath = val;
|
||||
} else if (a === '--raw') {
|
||||
raw = true;
|
||||
} else if (a.startsWith('--raw=')) {
|
||||
const v = a.slice('--raw='.length).toLowerCase();
|
||||
if (v !== 'true' && v !== 'false') throw new Error('--raw must be true or false');
|
||||
raw = v === 'true';
|
||||
} else {
|
||||
rest.push(a);
|
||||
}
|
||||
}
|
||||
return { outPath, raw, rest };
|
||||
}
|
||||
|
||||
/**
|
||||
* True iff an arg list contains an `--out` flag in any accepted form
|
||||
* (`--out <path>` or `--out=<path>`). Used by the write-capability gate to
|
||||
* decide whether an otherwise-read command (`js`/`eval`) is actually a write
|
||||
* invocation. Mirrors parseOutArgs's `--out` recognition exactly. Never throws —
|
||||
* a malformed `--out=` still counts as an out attempt (fail safe: gate it).
|
||||
*/
|
||||
export function hasOutArg(args: string[]): boolean {
|
||||
return args.some(a => a === '--out' || a.startsWith('--out='));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an evaluate() result to its string form — the exact conversion `js`/`eval`
|
||||
* used inline before `--out` existed. Kept byte-for-byte: `typeof === 'object'`
|
||||
* (which includes `null`) goes through JSON.stringify (so `null` → `"null"`);
|
||||
* everything else via `String(result ?? '')` (so `undefined` → `''`). JSON.stringify
|
||||
* still throws on circular / BigInt-bearing results, same as before.
|
||||
*/
|
||||
export function resultToString(result: unknown): string {
|
||||
return typeof result === 'object'
|
||||
? JSON.stringify(result, null, 2)
|
||||
: String(result ?? '');
|
||||
}
|
||||
|
||||
/**
|
||||
* Write an evaluate result string to disk for `--out`, returning bytes written.
|
||||
*
|
||||
* When the result is a base64 data URL (`data:<type>;...;base64,<payload>`) and
|
||||
* `raw` is false, decode the payload to raw bytes — this is the Excalidraw / og-image
|
||||
* path where a render function returns a PNG data URL. The header is parsed
|
||||
* case-insensitively and split on the FIRST comma (data URLs can contain commas in
|
||||
* the payload). The payload is validated against the base64 charset before decoding,
|
||||
* because `Buffer.from(_, 'base64')` silently drops invalid characters and would
|
||||
* otherwise write corrupted bytes. `--raw` forces a literal write even for data URLs.
|
||||
*
|
||||
* Non-base64 strings are surrogate-sanitized (matching what the stdout egress path
|
||||
* did before) and written as UTF-8. Parent directories are created — validateOutputPath
|
||||
* gates the location but does not mkdir.
|
||||
*/
|
||||
export function writeEvalResult(outPath: string, str: string, opts: { raw: boolean }): number {
|
||||
validateOutputPath(outPath);
|
||||
fs.mkdirSync(path.dirname(path.resolve(outPath)), { recursive: true });
|
||||
|
||||
if (!opts.raw && str.startsWith('data:')) {
|
||||
const comma = str.indexOf(',');
|
||||
if (comma !== -1) {
|
||||
const header = str.slice('data:'.length, comma);
|
||||
const tokens = header.split(';').map(t => t.trim().toLowerCase());
|
||||
if (tokens.includes('base64')) {
|
||||
const payload = str.slice(comma + 1).replace(/\s+/g, '');
|
||||
if (!/^[A-Za-z0-9+/]*={0,2}$/.test(payload)) {
|
||||
throw new Error('--out: malformed base64 in data URL (decode would corrupt output)');
|
||||
}
|
||||
const buf = Buffer.from(payload, 'base64');
|
||||
fs.writeFileSync(outPath, buf);
|
||||
return buf.length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const buf = Buffer.from(stripLoneSurrogates(str), 'utf-8');
|
||||
fs.writeFileSync(outPath, buf);
|
||||
return buf.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract clean text from a page (strips script/style/noscript/svg).
|
||||
* Exported for DRY reuse in meta-commands (diff).
|
||||
@@ -179,24 +290,36 @@ export async function handleReadCommand(
|
||||
}
|
||||
|
||||
case 'js': {
|
||||
const expr = args[0];
|
||||
if (!expr) throw new Error('Usage: browse js <expression>');
|
||||
const { outPath, raw, rest } = parseOutArgs(args);
|
||||
const expr = rest[0];
|
||||
if (!expr) throw new Error('Usage: browse js <expression> [--out <file>] [--raw]');
|
||||
if (bm) assertJsOriginAllowed(bm, page.url());
|
||||
const wrapped = wrapForEvaluate(expr);
|
||||
const result = await target.evaluate(wrapped);
|
||||
return typeof result === 'object' ? JSON.stringify(result, null, 2) : String(result ?? '');
|
||||
const str = resultToString(result);
|
||||
if (outPath) {
|
||||
const n = writeEvalResult(outPath, str, { raw });
|
||||
return `JS result written: ${outPath} (${n} bytes)`;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
case 'eval': {
|
||||
const filePath = args[0];
|
||||
if (!filePath) throw new Error('Usage: browse eval <js-file>');
|
||||
const { outPath, raw, rest } = parseOutArgs(args);
|
||||
const filePath = rest[0];
|
||||
if (!filePath) throw new Error('Usage: browse eval <js-file> [--out <file>] [--raw]');
|
||||
if (bm) assertJsOriginAllowed(bm, page.url());
|
||||
validateReadPath(filePath);
|
||||
if (!fs.existsSync(filePath)) throw new Error(`File not found: ${filePath}`);
|
||||
const code = fs.readFileSync(filePath, 'utf-8');
|
||||
const wrapped = wrapForEvaluate(code);
|
||||
const result = await target.evaluate(wrapped);
|
||||
return typeof result === 'object' ? JSON.stringify(result, null, 2) : String(result ?? '');
|
||||
const str = resultToString(result);
|
||||
if (outPath) {
|
||||
const n = writeEvalResult(outPath, str, { raw });
|
||||
return `Eval result written: ${outPath} (${n} bytes)`;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
case 'css': {
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
/**
|
||||
* Screenshot size guard — keep full-page screenshots ≤ 2000px max-dim.
|
||||
*
|
||||
* The Anthropic vision API rejects images whose longest dimension exceeds
|
||||
* 2000 image-pixels (post deviceScaleFactor). Full-page screenshots of long
|
||||
* pages routinely exceed that, silently bricking the session: the agent
|
||||
* burns turns on a base64 blob that errors model-side with no useful
|
||||
* stderr surfacing on the browse side.
|
||||
*
|
||||
* This module centralizes the "after page.screenshot, check dimensions and
|
||||
* downscale if too big" path so every full-page caller in browse/src can
|
||||
* share the same enforcement. The cap is image-pixels, not CSS pixels,
|
||||
* matching the Anthropic API's own threshold.
|
||||
*
|
||||
* Used by: snapshot.ts (annotated, heatmap), meta-commands.ts (screenshot),
|
||||
* write-commands.ts (prettyscreenshot). See test/snapshot-meta-write-guard.test.ts.
|
||||
*
|
||||
* Closes #1214.
|
||||
*/
|
||||
|
||||
import { writeFileSync, readFileSync } from "fs";
|
||||
|
||||
const MAX_DIMENSION_PX = 2000;
|
||||
|
||||
export interface SizeGuardResult {
|
||||
/** True if the input image exceeded MAX_DIMENSION_PX and was downscaled. */
|
||||
resized: boolean;
|
||||
/** Final width and height (pixels) of the image as written/returned. */
|
||||
width: number;
|
||||
height: number;
|
||||
/** Original dimensions before any downscale. */
|
||||
originalWidth: number;
|
||||
originalHeight: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inspect an image buffer and downscale if its longest side exceeds the
|
||||
* 2000px Anthropic vision API cap. Preserves aspect ratio. Encodes back
|
||||
* to PNG. Returns the resulting buffer plus a diagnostic shape.
|
||||
*
|
||||
* Imports sharp lazily so the module load cost only hits screenshot paths
|
||||
* (sharp's native binding is non-trivial to initialize).
|
||||
*/
|
||||
export async function guardScreenshotBuffer(input: Buffer): Promise<{ buffer: Buffer; result: SizeGuardResult }> {
|
||||
const sharpModule = await import("sharp");
|
||||
const sharp = sharpModule.default ?? sharpModule;
|
||||
const image = sharp(input);
|
||||
const metadata = await image.metadata();
|
||||
const width = metadata.width ?? 0;
|
||||
const height = metadata.height ?? 0;
|
||||
|
||||
const longest = Math.max(width, height);
|
||||
if (longest <= MAX_DIMENSION_PX) {
|
||||
return {
|
||||
buffer: input,
|
||||
result: {
|
||||
resized: false,
|
||||
width,
|
||||
height,
|
||||
originalWidth: width,
|
||||
originalHeight: height,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const scale = MAX_DIMENSION_PX / longest;
|
||||
const newWidth = Math.round(width * scale);
|
||||
const newHeight = Math.round(height * scale);
|
||||
|
||||
const resized = await image
|
||||
.resize(newWidth, newHeight, { fit: "inside" })
|
||||
.png()
|
||||
.toBuffer();
|
||||
|
||||
process.stderr.write(
|
||||
`[screenshot-size-guard] image ${width}x${height} exceeded ${MAX_DIMENSION_PX}px max-dim; ` +
|
||||
`downscaled to ${newWidth}x${newHeight} to fit Anthropic vision API\n`,
|
||||
);
|
||||
|
||||
return {
|
||||
buffer: resized,
|
||||
result: {
|
||||
resized: true,
|
||||
width: newWidth,
|
||||
height: newHeight,
|
||||
originalWidth: width,
|
||||
originalHeight: height,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* File-mode variant: read the image at the given path, downscale if
|
||||
* needed, and write the result back to the same path. Returns the
|
||||
* diagnostic shape. Use this after `await page.screenshot({ path, ... })`.
|
||||
*/
|
||||
export async function guardScreenshotPath(filePath: string): Promise<SizeGuardResult> {
|
||||
const input = readFileSync(filePath);
|
||||
const { buffer, result } = await guardScreenshotBuffer(input);
|
||||
if (result.resized) {
|
||||
writeFileSync(filePath, buffer);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export const SCREENSHOT_MAX_DIMENSION_PX = MAX_DIMENSION_PX;
|
||||
@@ -135,7 +135,7 @@ export function getClassifierStatus(): ClassifierStatus {
|
||||
|
||||
// ─── Model download + staging ────────────────────────────────
|
||||
|
||||
async function downloadFile(url: string, dest: string): Promise<void> {
|
||||
export async function downloadFile(url: string, dest: string): Promise<void> {
|
||||
const res = await fetch(url);
|
||||
if (!res.ok || !res.body) {
|
||||
throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
|
||||
@@ -144,16 +144,30 @@ async function downloadFile(url: string, dest: string): Promise<void> {
|
||||
const writer = fs.createWriteStream(tmp);
|
||||
// @ts-ignore — Node stream compat
|
||||
const reader = res.body.getReader();
|
||||
let done = false;
|
||||
while (!done) {
|
||||
const chunk = await reader.read();
|
||||
if (chunk.done) { done = true; break; }
|
||||
writer.write(chunk.value);
|
||||
try {
|
||||
let done = false;
|
||||
while (!done) {
|
||||
const chunk = await reader.read();
|
||||
if (chunk.done) { done = true; break; }
|
||||
writer.write(chunk.value);
|
||||
}
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
writer.end((err?: Error | null) => (err ? reject(err) : resolve()));
|
||||
});
|
||||
fs.renameSync(tmp, dest);
|
||||
} catch (err) {
|
||||
// Drop the half-written tmp so we don't ship a truncated model file to
|
||||
// a retry's renameSync. Wait for the writer to close fully before
|
||||
// unlinking: Node's createWriteStream lazily opens the FD and flushes
|
||||
// buffered writes during destroy(), so a naive unlinkSync hits ENOENT
|
||||
// first and the writer re-creates the file on the next tick.
|
||||
await new Promise<void>((resolve) => {
|
||||
writer.once('close', () => resolve());
|
||||
writer.destroy();
|
||||
});
|
||||
try { fs.unlinkSync(tmp); } catch { /* nothing to clean */ }
|
||||
throw err;
|
||||
}
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
writer.end((err?: Error | null) => (err ? reject(err) : resolve()));
|
||||
});
|
||||
fs.renameSync(tmp, dest);
|
||||
}
|
||||
|
||||
async function ensureTestsavantStaged(onProgress?: (msg: string) => void): Promise<void> {
|
||||
|
||||
@@ -0,0 +1,231 @@
|
||||
/**
|
||||
* Security sidecar client — IPC layer for the Node L4 classifier subprocess.
|
||||
*
|
||||
* Spawn model: lazy. First call to scan() spawns the sidecar, warms it (the
|
||||
* sidecar's loadTestsavant call on first scan-page-content), and reuses
|
||||
* the same process for every subsequent scan. The process dies when the
|
||||
* browse server exits (Node's stdin-close behavior).
|
||||
*
|
||||
* Reliability:
|
||||
* - 5s default timeout per scan. Caller can override per-call.
|
||||
* - 64KB request cap. Larger payloads short-circuit with `payload-too-large`.
|
||||
* - Respawn capped at 3 failures within 10 minutes; further failures
|
||||
* trip a circuit breaker that returns `available: false` until reset.
|
||||
* - Parent-exit cleanup: process.on('exit') sends SIGTERM to the child.
|
||||
*
|
||||
* Failure semantics:
|
||||
* - Node not on PATH → available() returns false; caller (the
|
||||
* /pty-inject-scan endpoint) returns l4: { available: false } and the
|
||||
* extension degrades to WARN + user confirm.
|
||||
* - Scan throws or times out → caller treats as L4-unavailable for that
|
||||
* request and falls through to L1-L3-only verdict.
|
||||
*
|
||||
* Single-process singleton. Multiple callers within the same browse
|
||||
* process share one sidecar.
|
||||
*/
|
||||
|
||||
import { ChildProcessByStdio, spawn } from "child_process";
|
||||
import { Readable, Writable } from "stream";
|
||||
import { findSecuritySidecar } from "./find-security-sidecar";
|
||||
|
||||
const REQUEST_CAP_BYTES = 64 * 1024;
|
||||
const DEFAULT_TIMEOUT_MS = 5000;
|
||||
const RESPAWN_WINDOW_MS = 10 * 60 * 1000;
|
||||
const RESPAWN_LIMIT = 3;
|
||||
|
||||
interface PendingRequest {
|
||||
resolve: (response: unknown) => void;
|
||||
reject: (err: Error) => void;
|
||||
timer: ReturnType<typeof setTimeout>;
|
||||
}
|
||||
|
||||
interface SidecarState {
|
||||
child: ChildProcessByStdio<Writable, Readable, Readable> | null;
|
||||
pending: Map<string, PendingRequest>;
|
||||
buffer: string;
|
||||
failures: number[]; // timestamps of recent failures
|
||||
available: boolean;
|
||||
/** True after circuit-breaker tripped; stays true until reset() */
|
||||
brokenCircuit: boolean;
|
||||
nextId: number;
|
||||
}
|
||||
|
||||
let state: SidecarState | null = null;
|
||||
|
||||
function getState(): SidecarState {
|
||||
if (!state) {
|
||||
state = {
|
||||
child: null,
|
||||
pending: new Map(),
|
||||
buffer: "",
|
||||
failures: [],
|
||||
available: true,
|
||||
brokenCircuit: false,
|
||||
nextId: 1,
|
||||
};
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
function recordFailure(): void {
|
||||
const s = getState();
|
||||
const now = Date.now();
|
||||
s.failures = s.failures.filter((t) => now - t < RESPAWN_WINDOW_MS);
|
||||
s.failures.push(now);
|
||||
if (s.failures.length >= RESPAWN_LIMIT) {
|
||||
s.brokenCircuit = true;
|
||||
s.available = false;
|
||||
}
|
||||
}
|
||||
|
||||
function processBuffer(): void {
|
||||
const s = getState();
|
||||
let idx = s.buffer.indexOf("\n");
|
||||
while (idx !== -1) {
|
||||
const line = s.buffer.slice(0, idx).trim();
|
||||
s.buffer = s.buffer.slice(idx + 1);
|
||||
idx = s.buffer.indexOf("\n");
|
||||
if (!line) continue;
|
||||
let parsed: { id?: string; ok?: boolean; verdict?: unknown; status?: unknown; error?: string };
|
||||
try {
|
||||
parsed = JSON.parse(line);
|
||||
} catch {
|
||||
// Malformed line — record as failure but don't reject any specific
|
||||
// pending request (we don't know which one this was meant for).
|
||||
recordFailure();
|
||||
continue;
|
||||
}
|
||||
const id = typeof parsed.id === "string" ? parsed.id : null;
|
||||
if (!id) continue;
|
||||
const pending = s.pending.get(id);
|
||||
if (!pending) continue;
|
||||
s.pending.delete(id);
|
||||
clearTimeout(pending.timer);
|
||||
if (parsed.ok) {
|
||||
pending.resolve(parsed);
|
||||
} else {
|
||||
recordFailure();
|
||||
pending.reject(new Error(parsed.error ?? "sidecar-error"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function shutdownChild(): void {
|
||||
const s = getState();
|
||||
if (!s.child) return;
|
||||
try {
|
||||
s.child.kill("SIGTERM");
|
||||
} catch {
|
||||
// Already dead.
|
||||
}
|
||||
s.child = null;
|
||||
for (const [, p] of s.pending) {
|
||||
clearTimeout(p.timer);
|
||||
p.reject(new Error("sidecar-died"));
|
||||
}
|
||||
s.pending.clear();
|
||||
}
|
||||
|
||||
function spawnSidecar(): boolean {
|
||||
const s = getState();
|
||||
if (s.brokenCircuit) return false;
|
||||
const location = findSecuritySidecar();
|
||||
if (!location) {
|
||||
s.available = false;
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
const child = spawn(location.node, [location.entry], {
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
detached: false,
|
||||
});
|
||||
child.stdout.on("data", (chunk: Buffer) => {
|
||||
s.buffer += chunk.toString("utf-8");
|
||||
processBuffer();
|
||||
});
|
||||
child.on("exit", () => {
|
||||
shutdownChild();
|
||||
});
|
||||
child.on("error", () => {
|
||||
recordFailure();
|
||||
shutdownChild();
|
||||
});
|
||||
s.child = child;
|
||||
s.available = true;
|
||||
return true;
|
||||
} catch {
|
||||
recordFailure();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Best-effort parent-exit cleanup. Node's "exit" event blocks async work, so
|
||||
// we send SIGTERM synchronously and let the OS reap the child.
|
||||
process.on("exit", () => shutdownChild());
|
||||
|
||||
export interface SidecarAvailability {
|
||||
available: boolean;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
export function isSidecarAvailable(): SidecarAvailability {
|
||||
const s = getState();
|
||||
if (s.brokenCircuit) return { available: false, reason: "circuit-broken" };
|
||||
if (s.child) return { available: true };
|
||||
// Probe via findSecuritySidecar without spawning. If the resolver returns
|
||||
// null (no node on PATH, no entry on disk), we're permanently unavailable
|
||||
// until a setup re-run.
|
||||
const location = findSecuritySidecar();
|
||||
if (!location) return { available: false, reason: "no-node-or-entry" };
|
||||
return { available: true };
|
||||
}
|
||||
|
||||
export async function scanWithSidecar(text: string, opts?: { timeoutMs?: number }): Promise<{ verdict: unknown }> {
|
||||
const s = getState();
|
||||
if (s.brokenCircuit) {
|
||||
throw new Error("sidecar-circuit-broken");
|
||||
}
|
||||
if (Buffer.byteLength(text, "utf-8") > REQUEST_CAP_BYTES) {
|
||||
throw new Error("payload-too-large");
|
||||
}
|
||||
if (!s.child) {
|
||||
if (!spawnSidecar()) {
|
||||
throw new Error("sidecar-spawn-failed");
|
||||
}
|
||||
}
|
||||
const id = String(s.nextId++);
|
||||
const timeoutMs = opts?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
s.pending.delete(id);
|
||||
recordFailure();
|
||||
reject(new Error("sidecar-timeout"));
|
||||
}, timeoutMs);
|
||||
|
||||
s.pending.set(id, {
|
||||
resolve: (response: unknown) => {
|
||||
const r = response as { verdict?: unknown };
|
||||
resolve({ verdict: r.verdict });
|
||||
},
|
||||
reject,
|
||||
timer,
|
||||
});
|
||||
|
||||
const payload = JSON.stringify({ id, op: "scan-page-content", text }) + "\n";
|
||||
try {
|
||||
s.child!.stdin.write(payload);
|
||||
} catch (err) {
|
||||
clearTimeout(timer);
|
||||
s.pending.delete(id);
|
||||
recordFailure();
|
||||
reject(err instanceof Error ? err : new Error(String(err)));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/** Reset the circuit breaker. Test-only escape hatch. */
|
||||
export function resetSidecarForTests(): void {
|
||||
shutdownChild();
|
||||
state = null;
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
/**
|
||||
* Security sidecar entry — Node script that hosts the L4 ML classifier on
|
||||
* behalf of the compiled browse server.
|
||||
*
|
||||
* Why a sidecar:
|
||||
* - browse/src/security-classifier.ts depends on @huggingface/transformers
|
||||
* which loads onnxruntime-node, a native module that fails to `dlopen`
|
||||
* from Bun's compile-binary temp extraction dir (CLAUDE.md "Sidebar
|
||||
* security stack" section). Importing the classifier into server.ts
|
||||
* would brick the compiled binary at startup.
|
||||
* - sidebar-agent.ts (the previous host of the classifier) was removed
|
||||
* when the PTY proved out. The classifier file still ships but had no
|
||||
* caller — exactly the gap codex flagged in #1370.
|
||||
*
|
||||
* This entry runs under plain Node (resolved by find-security-sidecar.ts).
|
||||
* It reads NDJSON requests from stdin and writes NDJSON responses to stdout.
|
||||
*
|
||||
* Protocol (one JSON object per line, both directions):
|
||||
* request: { id: string, op: "scan-page-content" | "ping", text?: string }
|
||||
* response: { id: string, ok: true, verdict: LayerSignal } |
|
||||
* { id: string, ok: false, error: string }
|
||||
*
|
||||
* Lifecycle:
|
||||
* - Spawned lazily by security-sidecar-client.ts on first /pty-inject-scan
|
||||
* - Exits when stdin closes (parent gone) — standard Node behavior
|
||||
* - Exits on SIGTERM cleanly
|
||||
*
|
||||
* Failure modes:
|
||||
* - Model download fails → reply { ok: false, error: "model-load" } and
|
||||
* keep the loop alive for the next request (caller decides whether to
|
||||
* retry or fail-safe to L1-L3-only)
|
||||
*/
|
||||
|
||||
import * as readline from "readline";
|
||||
import { scanPageContent, getClassifierStatus, loadTestsavant } from "./security-classifier";
|
||||
|
||||
interface Request {
|
||||
id: string;
|
||||
op: "scan-page-content" | "ping" | "status";
|
||||
text?: string;
|
||||
}
|
||||
|
||||
interface OkResponse {
|
||||
id: string;
|
||||
ok: true;
|
||||
verdict?: unknown;
|
||||
status?: unknown;
|
||||
}
|
||||
|
||||
interface ErrResponse {
|
||||
id: string;
|
||||
ok: false;
|
||||
error: string;
|
||||
}
|
||||
|
||||
function write(obj: OkResponse | ErrResponse): void {
|
||||
process.stdout.write(JSON.stringify(obj) + "\n");
|
||||
}
|
||||
|
||||
async function handle(req: Request): Promise<void> {
|
||||
if (!req || typeof req.id !== "string") {
|
||||
// Drop unidentifiable requests silently — protocol invariant.
|
||||
return;
|
||||
}
|
||||
try {
|
||||
if (req.op === "ping") {
|
||||
write({ id: req.id, ok: true, verdict: { layer: "ping", verdict: "alive", score: 0 } });
|
||||
return;
|
||||
}
|
||||
if (req.op === "status") {
|
||||
write({ id: req.id, ok: true, status: getClassifierStatus() });
|
||||
return;
|
||||
}
|
||||
if (req.op === "scan-page-content") {
|
||||
if (typeof req.text !== "string") {
|
||||
write({ id: req.id, ok: false, error: "missing-text" });
|
||||
return;
|
||||
}
|
||||
// Warm the classifier once per process; subsequent scans are fast.
|
||||
await loadTestsavant().catch(() => {
|
||||
// loadTestsavant degrades gracefully; scanPageContent below will
|
||||
// return a fail-open verdict if the model never loaded.
|
||||
});
|
||||
const verdict = await scanPageContent(req.text);
|
||||
write({ id: req.id, ok: true, verdict });
|
||||
return;
|
||||
}
|
||||
write({ id: req.id, ok: false, error: `unknown-op:${(req as { op?: unknown }).op}` });
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
write({ id: req.id, ok: false, error: msg });
|
||||
}
|
||||
}
|
||||
|
||||
function main(): void {
|
||||
// readline buffers stdin into one-line chunks. Stay alive until stdin
|
||||
// closes (parent gone) — Node exits naturally then.
|
||||
const rl = readline.createInterface({ input: process.stdin });
|
||||
rl.on("line", (line) => {
|
||||
if (!line.trim()) return;
|
||||
let req: Request;
|
||||
try {
|
||||
req = JSON.parse(line) as Request;
|
||||
} catch {
|
||||
// Malformed line — write a generic error without an id, callers can
|
||||
// detect via missing id and trip the circuit breaker.
|
||||
write({ id: "<malformed>", ok: false, error: "malformed-json" });
|
||||
return;
|
||||
}
|
||||
// Fire-and-forget; concurrent requests get id-correlated responses.
|
||||
void handle(req);
|
||||
});
|
||||
rl.on("close", () => {
|
||||
process.exit(0);
|
||||
});
|
||||
process.on("SIGTERM", () => process.exit(0));
|
||||
process.on("SIGINT", () => process.exit(0));
|
||||
}
|
||||
|
||||
main();
|
||||
+750
-173
File diff suppressed because it is too large
Load Diff
@@ -23,6 +23,7 @@ import * as Diff from 'diff';
|
||||
import { TEMP_DIR, isPathWithin } from './platform';
|
||||
import { escapeEnvelopeSentinels } from './content-security';
|
||||
import { stripLoneSurrogates } from './sanitize';
|
||||
import { guardScreenshotPath } from './screenshot-size-guard';
|
||||
|
||||
// Roles considered "interactive" for the -i flag
|
||||
const INTERACTIVE_ROLES = new Set([
|
||||
@@ -418,6 +419,7 @@ export async function handleSnapshot(
|
||||
}, boxes);
|
||||
|
||||
await page.screenshot({ path: screenshotPath, fullPage: true });
|
||||
await guardScreenshotPath(screenshotPath);
|
||||
|
||||
// Always remove overlays
|
||||
await page.evaluate(() => {
|
||||
@@ -538,6 +540,7 @@ export async function handleSnapshot(
|
||||
}, boxes);
|
||||
|
||||
await page.screenshot({ path: heatmapPath, fullPage: true });
|
||||
await guardScreenshotPath(heatmapPath);
|
||||
|
||||
// Remove heatmap overlays
|
||||
await page.evaluate(() => {
|
||||
|
||||
@@ -0,0 +1,154 @@
|
||||
// SSE endpoint helper — shared cleanup contract for stream endpoints.
|
||||
//
|
||||
// Pre-helper, /activity/stream and /inspector/events implemented the same
|
||||
// pattern in parallel and both leaked subscribers when enqueue failed
|
||||
// without a corresponding abort signal (e.g. Chromium MV3 service-worker
|
||||
// suspend dropped the TCP without an abort edge). The subscriber closure
|
||||
// stayed in the Set, capturing the ReadableStreamDefaultController plus
|
||||
// any payloads queued behind it. Over a multi-day sidebar session this
|
||||
// compounded into multi-MB of retained controllers per dead connection.
|
||||
//
|
||||
// Centralizing the cleanup contract here means any future SSE endpoint
|
||||
// inherits the invariant — cleanup runs on abort, enqueue failure, AND
|
||||
// heartbeat failure, exactly once, regardless of which edge fires first.
|
||||
|
||||
import { stripLoneSurrogates } from './sanitize';
|
||||
|
||||
/**
|
||||
* JSON.stringify replacer that strips lone UTF-16 surrogates from string
|
||||
* values before they get escape-encoded. Pair with stringify when the
|
||||
* consumer will JSON.parse the payload back into JS strings (SSE clients
|
||||
* do this). Required at every SSE egress that ships page-content-derived
|
||||
* fields — see CLAUDE.md "Unicode sanitization at server egress".
|
||||
*/
|
||||
function sanitizeReplacer(_key: string, value: unknown): unknown {
|
||||
return typeof value === 'string' ? stripLoneSurrogates(value) : value;
|
||||
}
|
||||
|
||||
/** Send an SSE event. Handles JSON encoding + lone-surrogate sanitization. */
|
||||
export type SseSender = (event: string, data: unknown) => void;
|
||||
|
||||
export interface SseEndpointConfig<T> {
|
||||
/**
|
||||
* Optional. Runs once after the stream opens, before subscribing for live
|
||||
* events. Use for initial event replay (activity gap detection, history
|
||||
* burst) or a current-state snapshot (inspector). The `send` helper
|
||||
* handles JSON encoding with sanitizeReplacer and SSE framing; pass
|
||||
* any event name and any payload object.
|
||||
*/
|
||||
initialReplay?: (send: SseSender) => void;
|
||||
|
||||
/**
|
||||
* Subscribe to the live event source. Receives a `notify` callback;
|
||||
* returns an unsubscribe function. The callback routes through the
|
||||
* helper's safeEnqueue + cleanup-on-throw, so a dead consumer ends up
|
||||
* removed from the subscriber set on the very next event (instead of
|
||||
* waiting for an abort that may never fire).
|
||||
*/
|
||||
subscribe: (notify: (entry: T) => void) => () => void;
|
||||
|
||||
/**
|
||||
* SSE event name for live events. `data: <JSON.stringify(entry)>\n\n`
|
||||
* is wrapped automatically. /activity/stream uses 'activity';
|
||||
* /inspector/events uses 'inspector'.
|
||||
*/
|
||||
liveEventName: string;
|
||||
|
||||
/** Heartbeat interval in ms. Default: 15000. */
|
||||
heartbeatMs?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a streaming Response that owns the cleanup contract:
|
||||
* - safeEnqueue catches enqueue throws → cleanup
|
||||
* - 15s heartbeat catches dead peers; failure → cleanup
|
||||
* - req.signal abort → cleanup
|
||||
* - cleanup is idempotent (clearInterval + unsubscribe + try close)
|
||||
*/
|
||||
export function createSseEndpoint<T>(
|
||||
req: Request,
|
||||
config: SseEndpointConfig<T>,
|
||||
): Response {
|
||||
const heartbeatMs = config.heartbeatMs ?? 15000;
|
||||
const encoder = new TextEncoder();
|
||||
|
||||
const stream = new ReadableStream({
|
||||
start(controller) {
|
||||
let cleanedUp = false;
|
||||
let heartbeat: ReturnType<typeof setInterval> | null = null;
|
||||
let unsubscribe: (() => void) | null = null;
|
||||
|
||||
const cleanup = (): void => {
|
||||
if (cleanedUp) return;
|
||||
cleanedUp = true;
|
||||
if (heartbeat !== null) {
|
||||
clearInterval(heartbeat);
|
||||
heartbeat = null;
|
||||
}
|
||||
if (unsubscribe !== null) {
|
||||
unsubscribe();
|
||||
unsubscribe = null;
|
||||
}
|
||||
try {
|
||||
controller.close();
|
||||
} catch {
|
||||
// Expected: stream already closed by the consumer.
|
||||
}
|
||||
};
|
||||
|
||||
const send: SseSender = (event, data) => {
|
||||
if (cleanedUp) return;
|
||||
try {
|
||||
controller.enqueue(
|
||||
encoder.encode(
|
||||
`event: ${event}\ndata: ${JSON.stringify(data, sanitizeReplacer)}\n\n`,
|
||||
),
|
||||
);
|
||||
} catch {
|
||||
// Consumer disconnected mid-write. Tear down so this subscriber
|
||||
// doesn't sit in the set forever.
|
||||
cleanup();
|
||||
}
|
||||
};
|
||||
|
||||
// Initial replay (caller-provided).
|
||||
if (config.initialReplay) {
|
||||
try {
|
||||
config.initialReplay(send);
|
||||
} catch {
|
||||
cleanup();
|
||||
return;
|
||||
}
|
||||
if (cleanedUp) return;
|
||||
}
|
||||
|
||||
// Subscribe for live events.
|
||||
unsubscribe = config.subscribe((entry) => {
|
||||
send(config.liveEventName, entry);
|
||||
});
|
||||
|
||||
// Heartbeat keeps NAT boxes and proxies from dropping idle SSE,
|
||||
// and serves as a liveness probe: an enqueue failure here is the
|
||||
// cheapest way to learn the consumer is gone without waiting for
|
||||
// an abort signal that may never arrive.
|
||||
heartbeat = setInterval(() => {
|
||||
if (cleanedUp) return;
|
||||
try {
|
||||
controller.enqueue(encoder.encode(`: heartbeat\n\n`));
|
||||
} catch {
|
||||
cleanup();
|
||||
}
|
||||
}, heartbeatMs);
|
||||
|
||||
req.signal.addEventListener('abort', cleanup);
|
||||
},
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
headers: {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
},
|
||||
});
|
||||
}
|
||||
+146
-3
@@ -239,18 +239,156 @@ export function buildStealthScript(hw: HostProfile): string {
|
||||
})();`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extended-mode init script — six detection-vector patches. Applied
|
||||
* AFTER the default mask, so the property-getter version remains in
|
||||
* place if any of the deletion paths fail.
|
||||
*
|
||||
* Self-contained string so it can be passed to addInitScript({ content })
|
||||
* without bundling concerns.
|
||||
*/
|
||||
export const EXTENDED_STEALTH_SCRIPT = `
|
||||
(() => {
|
||||
try {
|
||||
// 1. Fully delete navigator.webdriver from the prototype so
|
||||
// \`"webdriver" in navigator\` returns false (not just falsy).
|
||||
delete Object.getPrototypeOf(navigator).webdriver;
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
// 2. WebGL renderer spoof — SwiftShader is the canonical software-GPU
|
||||
// tell. Spoof to a plausible Apple M1 Pro string.
|
||||
const getParameter = WebGLRenderingContext.prototype.getParameter;
|
||||
WebGLRenderingContext.prototype.getParameter = function (parameter) {
|
||||
// UNMASKED_VENDOR_WEBGL (37445) → 'Apple Inc.'
|
||||
if (parameter === 37445) return 'Apple Inc.';
|
||||
// UNMASKED_RENDERER_WEBGL (37446) → realistic Apple silicon string
|
||||
if (parameter === 37446) return 'Apple M1 Pro, OpenGL 4.1';
|
||||
return getParameter.call(this, parameter);
|
||||
};
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
// 3. navigator.plugins: real PluginArray with MimeType objects.
|
||||
const makePlugin = (name, filename, desc, mimes) => {
|
||||
const p = Object.create(Plugin.prototype);
|
||||
Object.defineProperties(p, {
|
||||
name: { get: () => name },
|
||||
filename: { get: () => filename },
|
||||
description: { get: () => desc },
|
||||
length: { get: () => mimes.length },
|
||||
});
|
||||
mimes.forEach((m, i) => { p[i] = m; });
|
||||
p.item = (i) => mimes[i];
|
||||
p.namedItem = (n) => mimes.find((m) => m.type === n);
|
||||
return p;
|
||||
};
|
||||
const makeMime = (type, suffixes, desc) => {
|
||||
const m = Object.create(MimeType.prototype);
|
||||
Object.defineProperties(m, {
|
||||
type: { get: () => type },
|
||||
suffixes: { get: () => suffixes },
|
||||
description: { get: () => desc },
|
||||
});
|
||||
return m;
|
||||
};
|
||||
const pdfMime = makeMime('application/pdf', 'pdf', '');
|
||||
const cpdfMime = makeMime('application/x-google-chrome-pdf', 'pdf', 'Portable Document Format');
|
||||
const plugins = [
|
||||
makePlugin('PDF Viewer', 'internal-pdf-viewer', '', [pdfMime]),
|
||||
makePlugin('Chrome PDF Viewer', 'internal-pdf-viewer', '', [cpdfMime]),
|
||||
makePlugin('Chromium PDF Viewer', 'internal-pdf-viewer', '', [cpdfMime]),
|
||||
];
|
||||
Object.defineProperty(navigator, 'plugins', {
|
||||
get: () => {
|
||||
const arr = Object.create(PluginArray.prototype);
|
||||
Object.defineProperty(arr, 'length', { get: () => plugins.length });
|
||||
plugins.forEach((p, i) => { arr[i] = p; });
|
||||
arr.item = (i) => plugins[i];
|
||||
arr.namedItem = (n) => plugins.find((p) => p.name === n);
|
||||
arr.refresh = () => {};
|
||||
return arr;
|
||||
},
|
||||
});
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
// 4. window.chrome shape — chrome.app + chrome.runtime + loadTimes/csi.
|
||||
if (!window.chrome) {
|
||||
window.chrome = {};
|
||||
}
|
||||
if (!window.chrome.runtime) {
|
||||
window.chrome.runtime = { OnInstalledReason: {}, OnRestartRequiredReason: {} };
|
||||
}
|
||||
if (!window.chrome.app) {
|
||||
window.chrome.app = {
|
||||
isInstalled: false,
|
||||
InstallState: { DISABLED: 'disabled', INSTALLED: 'installed', NOT_INSTALLED: 'not_installed' },
|
||||
RunningState: { CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running' },
|
||||
};
|
||||
}
|
||||
if (!window.chrome.loadTimes) {
|
||||
window.chrome.loadTimes = function () {
|
||||
return { commitLoadTime: Date.now() / 1000, finishLoadTime: Date.now() / 1000 };
|
||||
};
|
||||
}
|
||||
if (!window.chrome.csi) {
|
||||
window.chrome.csi = function () {
|
||||
return { startE: Date.now(), onloadT: Date.now(), pageT: 0, tran: 15 };
|
||||
};
|
||||
}
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
// 5. mediaDevices — some headless builds drop it entirely.
|
||||
if (!navigator.mediaDevices) {
|
||||
Object.defineProperty(navigator, 'mediaDevices', {
|
||||
get: () => ({ enumerateDevices: () => Promise.resolve([]) }),
|
||||
});
|
||||
}
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
// 6. CDP cdc_* property cleanup. Chromium under CDP sets cdc_*-prefixed
|
||||
// globals (driver injection markers); a bot detector finds them by
|
||||
// iterating window keys. Strip all matching keys.
|
||||
for (const k of Object.keys(window)) {
|
||||
if (k.startsWith('cdc_')) {
|
||||
try { delete window[k]; } catch {}
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
})();
|
||||
`;
|
||||
|
||||
function extendedModeEnabled(): boolean {
|
||||
const v = process.env.GSTACK_STEALTH;
|
||||
return v === 'extended' || v === '1' || v === 'true';
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply stealth patches to a fresh BrowserContext (or persistent context).
|
||||
* Called by browser-manager.launch() and launchHeaded().
|
||||
*
|
||||
* Resolves the host profile from process.env at call time so per-install
|
||||
* Always applies the always-on Layer C stealth script (built from the
|
||||
* per-install host profile) — the consistency-first default. When
|
||||
* GSTACK_STEALTH=extended is set, layers the opt-in EXTENDED_STEALTH_SCRIPT
|
||||
* on top: its window.chrome.* patches are `if (!...)`-guarded, so Layer C's
|
||||
* richer shapes win, while the extended-only additions (WebGL spoof, faked
|
||||
* navigator.plugins, mediaDevices, cdc_* cleanup) apply on top. Extended
|
||||
* mode actively LIES about the browser and can break sites that reflect on
|
||||
* these properties, so it stays off by default.
|
||||
*
|
||||
* Host profile is resolved from process.env at call time so per-install
|
||||
* values bake into the script before Playwright sends it to Chromium via
|
||||
* Page.addScriptToEvaluateOnNewDocument.
|
||||
*/
|
||||
export async function applyStealth(context: BrowserContext): Promise<void> {
|
||||
const hw = readHostProfile();
|
||||
const script = buildStealthScript(hw);
|
||||
await context.addInitScript({ content: script });
|
||||
await context.addInitScript({ content: buildStealthScript(hw) });
|
||||
if (extendedModeEnabled()) {
|
||||
await context.addInitScript({ content: EXTENDED_STEALTH_SCRIPT });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -359,3 +497,8 @@ export const STEALTH_IGNORE_DEFAULT_ARGS = [
|
||||
'--disable-component-update',
|
||||
'--disable-default-apps',
|
||||
];
|
||||
|
||||
/** Test-only helper: report whether extended mode is currently active. */
|
||||
export function isExtendedStealthEnabled(): boolean {
|
||||
return extendedModeEnabled();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,143 @@
|
||||
/**
|
||||
* terminal-agent process-control primitives shared by cli.ts spawn site,
|
||||
* server.ts shutdown teardown, and the v1.44 watchdog/respawn loop.
|
||||
*
|
||||
* Why this exists: pre-v1.44 used `pkill -f terminal-agent\.ts`, which
|
||||
* matches any process whose argv contains the string and would kill
|
||||
* sibling gstack sessions on the same host. The agent now writes a
|
||||
* structured `terminal-agent-pid` record (`{pid, gen, startedAt}`) and
|
||||
* every kill site routes through `killAgentByRecord` here — identity-based,
|
||||
* no regex.
|
||||
*
|
||||
* The `gen` field is a per-boot generation counter. Loopback /internal/*
|
||||
* calls from the parent server include `X-Browse-Gen` so a slow agent that
|
||||
* the watchdog respawned around can't accidentally service a stale grant
|
||||
* from the old generation.
|
||||
*/
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { safeUnlink, safeKill, isProcessAlive } from './error-handling';
|
||||
import { writeSecureFile, mkdirSecure } from './file-permissions';
|
||||
|
||||
/**
|
||||
* Locate the terminal-agent script on disk. In dev (cli.ts running via
|
||||
* `bun run`), it lives next to this file in browse/src. In a compiled
|
||||
* binary, Bun's --compile bakes the source into the executable and
|
||||
* exposes it relative to process.execPath. Either path must work or
|
||||
* the agent can't be spawned at all.
|
||||
*/
|
||||
export function resolveTerminalAgentScript(searchHints: { metaDir?: string; execPath?: string } = {}): string | null {
|
||||
const meta = searchHints.metaDir || __dirname;
|
||||
const exec = searchHints.execPath || process.execPath;
|
||||
const candidates = [
|
||||
path.resolve(meta, 'terminal-agent.ts'),
|
||||
path.resolve(path.dirname(exec), '..', 'src', 'terminal-agent.ts'),
|
||||
];
|
||||
for (const c of candidates) {
|
||||
if (fs.existsSync(c)) return c;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawn a fresh terminal-agent as a detached child. Handles the standard
|
||||
* three steps: kill any prior agent recorded at `<stateDir>/terminal-agent-pid`,
|
||||
* clear the stale record, then `Bun.spawn(['bun', 'run', script], ...)` with
|
||||
* env wiring. Returns the PID of the new agent on success, null when the
|
||||
* agent script can't be located.
|
||||
*
|
||||
* Used by both the CLI cold-start path (cli.ts) and the v1.44 watchdog in
|
||||
* server.ts. Centralizing here removes a copy-paste between them and means
|
||||
* future spawn-env additions (e.g. BROWSE_OWNER_PID for the generation
|
||||
* counter rollout) land in one place.
|
||||
*/
|
||||
export function spawnTerminalAgent(opts: {
|
||||
stateFile: string;
|
||||
serverPort: number;
|
||||
cwd?: string;
|
||||
/** Optional extra env vars to add to the agent's process env. */
|
||||
extraEnv?: Record<string, string>;
|
||||
/** Override script lookup for tests. */
|
||||
scriptPath?: string;
|
||||
}): number | null {
|
||||
const stateDir = path.dirname(opts.stateFile);
|
||||
const prior = readAgentRecord(stateDir);
|
||||
if (prior) {
|
||||
killAgentByRecord(prior, 'SIGTERM');
|
||||
clearAgentRecord(stateDir);
|
||||
}
|
||||
const script = opts.scriptPath || resolveTerminalAgentScript();
|
||||
if (!script || !fs.existsSync(script)) return null;
|
||||
const proc = (Bun as any).spawn(['bun', 'run', script], {
|
||||
cwd: opts.cwd || process.cwd(),
|
||||
env: {
|
||||
...process.env,
|
||||
BROWSE_STATE_FILE: opts.stateFile,
|
||||
BROWSE_SERVER_PORT: String(opts.serverPort),
|
||||
...(opts.extraEnv || {}),
|
||||
},
|
||||
stdio: ['ignore', 'ignore', 'ignore'],
|
||||
});
|
||||
proc.unref?.();
|
||||
return proc.pid ?? null;
|
||||
}
|
||||
|
||||
export interface AgentRecord {
|
||||
pid: number;
|
||||
/** Random per-boot identifier. Loopback /internal/* sees X-Browse-Gen: <gen>. */
|
||||
gen: string;
|
||||
/** ms since epoch. Reserved for future PID-reuse guards. */
|
||||
startedAt: number;
|
||||
}
|
||||
|
||||
export function agentRecordPath(stateDir: string): string {
|
||||
return path.join(stateDir, 'terminal-agent-pid');
|
||||
}
|
||||
|
||||
/** Read the current record. Returns null on missing/malformed file. */
|
||||
export function readAgentRecord(stateDir: string): AgentRecord | null {
|
||||
try {
|
||||
const raw = fs.readFileSync(agentRecordPath(stateDir), 'utf-8');
|
||||
const j = JSON.parse(raw);
|
||||
if (typeof j?.pid === 'number' && typeof j?.gen === 'string' && typeof j?.startedAt === 'number') {
|
||||
return j as AgentRecord;
|
||||
}
|
||||
return null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Atomic write. Caller must ensure stateDir exists; agent does this at boot. */
|
||||
export function writeAgentRecord(stateDir: string, record: AgentRecord): void {
|
||||
try { mkdirSecure(stateDir); } catch {}
|
||||
const target = agentRecordPath(stateDir);
|
||||
const tmp = `${target}.tmp-${process.pid}`;
|
||||
writeSecureFile(tmp, JSON.stringify(record));
|
||||
fs.renameSync(tmp, target);
|
||||
}
|
||||
|
||||
export function clearAgentRecord(stateDir: string): void {
|
||||
safeUnlink(agentRecordPath(stateDir));
|
||||
}
|
||||
|
||||
/**
|
||||
* Kill the agent identified by `record`. Signal defaults to SIGTERM (give
|
||||
* the agent a chance to run its own SIGTERM cleanup). Returns true if a
|
||||
* signal was actually sent to a live PID; false if the PID was already
|
||||
* dead (no-op). Never throws — ESRCH is swallowed by safeKill.
|
||||
*
|
||||
* Validates liveness BEFORE signaling so a PID-reuse race (the recorded
|
||||
* PID was reaped and a brand-new unrelated process now holds it) can't
|
||||
* cause us to kill the wrong process. This is a best-effort defense:
|
||||
* Linux/macOS don't expose process-start-time cheaply, and the gap
|
||||
* between record-write and watchdog-tick is small (60s max).
|
||||
*/
|
||||
export function killAgentByRecord(
|
||||
record: AgentRecord,
|
||||
signal: NodeJS.Signals = 'SIGTERM',
|
||||
): boolean {
|
||||
if (!isProcessAlive(record.pid)) return false;
|
||||
safeKill(record.pid, signal);
|
||||
return true;
|
||||
}
|
||||
+509
-73
@@ -25,16 +25,47 @@ import * as path from 'path';
|
||||
import * as crypto from 'crypto';
|
||||
import { writeSecureFile, mkdirSecure } from './file-permissions';
|
||||
import { safeUnlink } from './error-handling';
|
||||
import { writeAgentRecord, clearAgentRecord } from './terminal-agent-control';
|
||||
|
||||
const STATE_FILE = process.env.BROWSE_STATE_FILE || path.join(process.env.HOME || '/tmp', '.gstack', 'browse.json');
|
||||
const PORT_FILE = path.join(path.dirname(STATE_FILE), 'terminal-port');
|
||||
const BROWSE_SERVER_PORT = parseInt(process.env.BROWSE_SERVER_PORT || '0', 10);
|
||||
const EXTENSION_ID = process.env.BROWSE_EXTENSION_ID || ''; // optional: tighten Origin check
|
||||
const INTERNAL_TOKEN = crypto.randomBytes(32).toString('base64url'); // shared with parent server via env at spawn
|
||||
/**
|
||||
* Per-boot generation identifier. Loopback /internal/* callers include
|
||||
* `X-Browse-Gen: <CURRENT_GEN>` so a slow agent the watchdog respawned
|
||||
* around can't service a stale grant from the prior generation. Absent
|
||||
* header means "legacy caller" and is accepted (backward compat); a
|
||||
* present-but-mismatched header returns 409 stale generation.
|
||||
*/
|
||||
const CURRENT_GEN = crypto.randomBytes(16).toString('base64url');
|
||||
|
||||
// In-memory cookie token registry. Parent posts /internal/grant after
|
||||
// /pty-session; we validate WS cookies against this set.
|
||||
const validTokens = new Set<string>();
|
||||
// In-memory attach-token registry. Parent posts /internal/grant after
|
||||
// /pty-session; we validate WS upgrades against this map.
|
||||
//
|
||||
// v1.44+: each token is bound to a v1.44 sessionId (the stable, non-secret
|
||||
// identifier from browse/src/pty-session-lease.ts). The token grants ONE
|
||||
// attach for ONE session — re-attach within the lease window comes through
|
||||
// /pty-session/reattach, which mints a fresh token for the same sessionId.
|
||||
//
|
||||
// Legacy callers can still pass `{token}` without sessionId (the value
|
||||
// stays null and the WS upgrade still works); those callers don't get
|
||||
// re-attach because there's no stable identifier to match against.
|
||||
const validTokens = new Map<string, string | null>(); // token → sessionId
|
||||
|
||||
/**
|
||||
* Reverse index for re-attach lookups: sessionId → live PtySession.
|
||||
* Populated when a WS first attaches with a known sessionId; cleared when
|
||||
* the session is disposed or the lease expires. Used by:
|
||||
* - /ws upgrade: if the incoming attachToken maps to a sessionId that
|
||||
* already has a live session, REPLACE its ws ref instead of spawning.
|
||||
* - /internal/restart: enumerate by sessionId, dispose that one session.
|
||||
*
|
||||
* Kept separate from the WeakMap<ws,PtySession> so re-attach can find the
|
||||
* session by id even after the original ws has gone.
|
||||
*/
|
||||
const sessionsById = new Map<string, PtySession>();
|
||||
|
||||
// Active PTY session per WS. One terminal per connection. Codex finding #4:
|
||||
// uncaught handlers below catch bugs in framing/cleanup so they don't kill
|
||||
@@ -46,12 +77,154 @@ process.on('unhandledRejection', (reason) => {
|
||||
console.error('[terminal-agent] unhandledRejection:', reason);
|
||||
});
|
||||
|
||||
interface PtySession {
|
||||
export interface PtySession {
|
||||
proc: any | null; // Bun.Subprocess once spawned
|
||||
cols: number;
|
||||
rows: number;
|
||||
cookie: string;
|
||||
/**
|
||||
* Current attached websocket. Swapped on re-attach (Commit 3): when a new
|
||||
* WS upgrade matches this session's sessionId, the old liveWs is gone
|
||||
* and the new ws takes its place. The PTY on-data callback closes over
|
||||
* `session`, not the original `ws`, so it always writes to the current
|
||||
* liveWs (or skips the write when detached and liveWs is null).
|
||||
*/
|
||||
liveWs: any | null;
|
||||
/**
|
||||
* v1.44+ stable session identifier (from pty-session-lease). Null for
|
||||
* legacy /internal/grant callers that didn't pass one. Used for
|
||||
* targeted /internal/restart and Commit 3 re-attach lookups.
|
||||
*/
|
||||
sessionId: string | null;
|
||||
spawned: boolean;
|
||||
/**
|
||||
* 25s server-side WS keepalive interval (v1.44+). Set in the WS `open`
|
||||
* handler, cleared in `close`. We send `{type:"ping",ts}` text frames so
|
||||
* NAT boxes, proxies, and Chrome's MV3 panel-suspend heuristics see the
|
||||
* connection as active; the client either replies with `{type:"pong"}`
|
||||
* or fires its own 25s `{type:"keepalive"}` cycle. Either path keeps
|
||||
* the underlying TCP from being silently dropped.
|
||||
*/
|
||||
pingInterval: ReturnType<typeof setInterval> | null;
|
||||
/**
|
||||
* Commit 3 scrollback ring buffer. Each PTY write appends a frame; the
|
||||
* total byte count is capped at RING_BUFFER_MAX_BYTES with oldest frames
|
||||
* evicted first. On re-attach, the surviving frames are replayed as a
|
||||
* single binary frame (prefixed with the v1.44 reset sequence) so the
|
||||
* user sees their last screen of output. Frame boundaries preserve UTF-8
|
||||
* + ANSI-CSI boundaries because each frame is the exact buffer that
|
||||
* spawnClaude's on-data callback emitted.
|
||||
*/
|
||||
ringBuffer: Buffer[];
|
||||
ringBufferBytes: number;
|
||||
/**
|
||||
* Tracks whether the PTY is currently in xterm alt-screen mode. claude's
|
||||
* TUI enters alt-screen (CSI ?1049h) during tool calls and exits (CSI
|
||||
* ?1049l) when returning to the main prompt. On re-attach, the replay
|
||||
* prelude must re-enter alt-screen if the original PTY left it active,
|
||||
* otherwise the replay renders against the main screen and the cursor
|
||||
* + colors end up in the wrong place.
|
||||
*/
|
||||
altScreenActive: boolean;
|
||||
/**
|
||||
* Detach state machine (Commit 3). When the WS closes for a reason OTHER
|
||||
* than the v1.44 intentional-restart code (4001), we keep the PtySession
|
||||
* alive for the detach window (default 60s) so a re-attach within the
|
||||
* window can resume the same PTY and replay the ring buffer. The timer
|
||||
* disposes the session if no re-attach arrives in time.
|
||||
*/
|
||||
detached: boolean;
|
||||
detachTimer: ReturnType<typeof setTimeout> | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* WS keepalive interval. 25s is comfortably under the lowest common NAT
|
||||
* idle timeout (typically 30-60s) and shorter than Chromium's WebSocket
|
||||
* dead-peer threshold. Test-overridable via env so the v1.44 e2e tests
|
||||
* can compress idle-window assertions to <1s without waiting half a
|
||||
* minute per assertion.
|
||||
*/
|
||||
const KEEPALIVE_INTERVAL_MS = parseInt(
|
||||
process.env.GSTACK_PTY_KEEPALIVE_INTERVAL_MS || '25000',
|
||||
10,
|
||||
);
|
||||
|
||||
/**
|
||||
* Commit 3 scrollback ring buffer cap. 1 MB is enough for a full screen
|
||||
* of dense claude output (including a recent tool result), small enough
|
||||
* that a worst-case 10 detached sessions only cost ~10 MB of RSS.
|
||||
* Env-overridable so e2e tests can verify eviction without writing 1 MB
|
||||
* of fixture data per assertion.
|
||||
*/
|
||||
const RING_BUFFER_MAX_BYTES = parseInt(
|
||||
process.env.GSTACK_PTY_RING_BUFFER_BYTES || `${1024 * 1024}`,
|
||||
10,
|
||||
);
|
||||
|
||||
/**
|
||||
* Commit 3 detach window — how long to keep a session alive after WS
|
||||
* close (with any code other than 4001 intentional-restart) so a
|
||||
* re-attach can resume the same PTY. 60s is long enough to cover a
|
||||
* Chrome MV3 service-worker suspend cycle, a wifi blip, or a brief
|
||||
* laptop sleep; short enough that genuinely-closed sessions don't
|
||||
* stack up unbounded.
|
||||
*/
|
||||
const DETACH_WINDOW_MS = parseInt(
|
||||
process.env.GSTACK_PTY_DETACH_WINDOW_MS || '60000',
|
||||
10,
|
||||
);
|
||||
|
||||
/**
|
||||
* Append a frame to a session's ring buffer, evicting oldest frames if
|
||||
* the total byte count exceeds RING_BUFFER_MAX_BYTES. Eviction is at
|
||||
* frame boundaries (one PTY write = one frame), so we never cut a
|
||||
* multi-byte UTF-8 sequence or a partial ANSI CSI in half — claude's
|
||||
* on-data callback emits coherent frames.
|
||||
*
|
||||
* Side effect: scans the appended chunk for alt-screen enter/exit
|
||||
* sequences (CSI ?1049h / CSI ?1049l) and updates session.altScreenActive
|
||||
* so the re-attach prelude knows whether to re-enter alt-screen.
|
||||
*/
|
||||
export function appendToRingBuffer(session: PtySession, frame: Buffer): void {
|
||||
session.ringBuffer.push(frame);
|
||||
session.ringBufferBytes += frame.length;
|
||||
while (session.ringBufferBytes > RING_BUFFER_MAX_BYTES && session.ringBuffer.length > 1) {
|
||||
const evicted = session.ringBuffer.shift()!;
|
||||
session.ringBufferBytes -= evicted.length;
|
||||
}
|
||||
// Alt-screen tracking. Scan for the canonical xterm enter/exit pairs.
|
||||
// We do this on every append (not just on attach) so the state is
|
||||
// correct even if many frames have flowed since the last attach.
|
||||
const ascii = frame.toString('latin1'); // single-byte view is enough — the codes are 7-bit ASCII
|
||||
// Use lastIndexOf so trailing state wins when both appear in one frame
|
||||
// (e.g., a quick tool-call open+close inside one render pass).
|
||||
const enterIdx = ascii.lastIndexOf('\x1b[?1049h');
|
||||
const exitIdx = ascii.lastIndexOf('\x1b[?1049l');
|
||||
if (enterIdx >= 0 && enterIdx > exitIdx) session.altScreenActive = true;
|
||||
else if (exitIdx >= 0 && exitIdx > enterIdx) session.altScreenActive = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the re-attach replay payload: server-side reset prelude + the
|
||||
* accumulated ring buffer. The client side writes RIS (`\x1bc`) to xterm
|
||||
* BEFORE feeding this payload in, so the layout is:
|
||||
*
|
||||
* 1. Client: `\x1bc` (RIS — full reset, clears pre-blip xterm content)
|
||||
* 2. Server: `\x1b[!p` (DECSTR soft reset — re-defaults char attributes)
|
||||
* 3. Server: optional `\x1b[?1049h` if we were in alt-screen at detach
|
||||
* 4. Server: ring buffer contents, in append order
|
||||
*
|
||||
* The client coordinates the order by waiting for a `{type:"reattach-begin"}`
|
||||
* text frame before treating the next binary frame as replay. That separation
|
||||
* is what lets us prepend reset codes without clobbering the live stream
|
||||
* that resumes immediately after.
|
||||
*/
|
||||
export function buildReplayPayload(session: PtySession): Buffer {
|
||||
const parts: Buffer[] = [];
|
||||
parts.push(Buffer.from('\x1b[!p'));
|
||||
if (session.altScreenActive) parts.push(Buffer.from('\x1b[?1049h'));
|
||||
for (const frame of session.ringBuffer) parts.push(frame);
|
||||
return Buffer.concat(parts);
|
||||
}
|
||||
|
||||
const sessions = new WeakMap<any, PtySession>(); // ws -> session
|
||||
@@ -201,6 +374,118 @@ function disposeSession(session: PtySession): void {
|
||||
*
|
||||
* Everything else returns 404. The listener binds 127.0.0.1 only.
|
||||
*/
|
||||
/**
|
||||
* Validate a loopback /internal/* request. Returns null when the request
|
||||
* is allowed; otherwise returns the Response to send back. Centralizes
|
||||
* bearer auth + the v1.44 X-Browse-Gen generation check so adding a new
|
||||
* /internal/* route is a one-liner.
|
||||
*/
|
||||
function checkInternalAuth(req: Request): Response | null {
|
||||
const auth = req.headers.get('authorization');
|
||||
if (auth !== `Bearer ${INTERNAL_TOKEN}`) {
|
||||
return new Response('forbidden', { status: 403 });
|
||||
}
|
||||
const headerGen = req.headers.get('x-browse-gen');
|
||||
if (headerGen && headerGen !== CURRENT_GEN) {
|
||||
return new Response('stale generation', { status: 409 });
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap a JSON-bodied /internal/* handler with the standard bearer-auth +
|
||||
* generation-check + json-parse + error-response boilerplate. The handler
|
||||
* `fn` is called with the parsed body; whatever it returns is JSON-stringified
|
||||
* into a 200 Response, or the handler can return a Response directly to
|
||||
* customize status / headers. Throwing from `fn` collapses to a 400 "bad".
|
||||
*
|
||||
* Centralizing the dance kills the copy-paste pattern of bearer + gen check
|
||||
* + req.json().then(...).catch(...) that every /internal/* route needs.
|
||||
* New routes become a single call to internalHandler.
|
||||
*/
|
||||
async function internalHandler<T>(
|
||||
req: Request,
|
||||
fn: (body: any) => T | Promise<T> | Response | Promise<Response>,
|
||||
): Promise<Response> {
|
||||
const denied = checkInternalAuth(req);
|
||||
if (denied) return denied;
|
||||
let body: any;
|
||||
try {
|
||||
body = await req.json();
|
||||
} catch {
|
||||
return new Response('bad', { status: 400 });
|
||||
}
|
||||
try {
|
||||
const result = await fn(body);
|
||||
if (result instanceof Response) return result;
|
||||
if (result === undefined || result === null) return new Response('ok');
|
||||
return new Response(JSON.stringify(result), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
} catch {
|
||||
return new Response('bad', { status: 400 });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawn the claude PTY for a session if it hasn't been spawned yet.
|
||||
* Used by both the legacy binary-frame spawn trigger and the v1.44 explicit
|
||||
* `{type:"start"}` text-frame trigger. Idempotent on `session.spawned`.
|
||||
*
|
||||
* Returns true if claude is now running, false if spawn failed (e.g. claude
|
||||
* binary not on PATH). On failure, the caller is expected to have already
|
||||
* surfaced the error to the client (or will via the next frame).
|
||||
*/
|
||||
function maybeSpawnPty(ws: any, session: PtySession): boolean {
|
||||
if (session.spawned) return true;
|
||||
session.spawned = true;
|
||||
let leftover = Buffer.alloc(0);
|
||||
const proc = spawnClaude(session.cols, session.rows, (chunk) => {
|
||||
const combined = Buffer.concat([leftover, Buffer.from(chunk)]);
|
||||
// UTF-8 boundary detection (issue #1272). Look back at most 3 bytes
|
||||
// for the start of an incomplete multibyte sequence and defer it.
|
||||
let safeEnd = combined.length;
|
||||
for (let i = combined.length - 1; i >= Math.max(0, combined.length - 3); i--) {
|
||||
const b = combined[i];
|
||||
if ((b & 0x80) === 0) { safeEnd = i + 1; break; }
|
||||
if ((b & 0xC0) === 0x80) continue;
|
||||
const expected = (b & 0xE0) === 0xC0 ? 2 : (b & 0xF0) === 0xE0 ? 3 : 4;
|
||||
safeEnd = (combined.length - i >= expected) ? combined.length : i;
|
||||
break;
|
||||
}
|
||||
const flush = combined.slice(0, safeEnd);
|
||||
leftover = combined.slice(safeEnd);
|
||||
if (flush.length) {
|
||||
// Always record into the ring buffer (Commit 3) so re-attach can
|
||||
// replay. session.liveWs is what changes across re-attaches — we
|
||||
// close over `session`, not the original `ws`, so the write always
|
||||
// goes to whichever ws is currently attached (or is skipped when
|
||||
// detached and liveWs is null).
|
||||
appendToRingBuffer(session, flush);
|
||||
if (session.liveWs) {
|
||||
try { session.liveWs.sendBinary(flush); } catch {}
|
||||
}
|
||||
}
|
||||
});
|
||||
if (!proc) {
|
||||
try {
|
||||
ws.send(JSON.stringify({
|
||||
type: 'error',
|
||||
code: 'CLAUDE_NOT_FOUND',
|
||||
message: 'claude CLI not on PATH. Install: https://docs.anthropic.com/en/docs/claude-code',
|
||||
}));
|
||||
ws.close(4404, 'claude not found');
|
||||
} catch {}
|
||||
return false;
|
||||
}
|
||||
session.proc = proc;
|
||||
proc.exited?.then?.(() => {
|
||||
try { session.liveWs?.close(1000, 'pty exited'); } catch {}
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
function buildServer() {
|
||||
return Bun.serve({
|
||||
hostname: '127.0.0.1',
|
||||
@@ -211,29 +496,66 @@ function buildServer() {
|
||||
const url = new URL(req.url);
|
||||
|
||||
// /internal/grant — loopback-only handshake from parent server.
|
||||
// v1.44+: accepts `{token, sessionId?}`. The sessionId binding lets
|
||||
// the agent route re-attach attempts (same sessionId, fresh token)
|
||||
// back to the same PtySession. Legacy callers passing just `{token}`
|
||||
// still work — sessionId becomes null and re-attach is unavailable
|
||||
// for that grant.
|
||||
if (url.pathname === '/internal/grant' && req.method === 'POST') {
|
||||
const auth = req.headers.get('authorization');
|
||||
if (auth !== `Bearer ${INTERNAL_TOKEN}`) {
|
||||
return new Response('forbidden', { status: 403 });
|
||||
}
|
||||
return req.json().then((body: any) => {
|
||||
return internalHandler(req, (body) => {
|
||||
if (typeof body?.token === 'string' && body.token.length > 16) {
|
||||
validTokens.add(body.token);
|
||||
const sid = typeof body?.sessionId === 'string' && body.sessionId.length > 0
|
||||
? body.sessionId
|
||||
: null;
|
||||
validTokens.set(body.token, sid);
|
||||
}
|
||||
return new Response('ok');
|
||||
}).catch(() => new Response('bad', { status: 400 }));
|
||||
});
|
||||
}
|
||||
|
||||
// /internal/revoke — drop a token (called on WS close or bootstrap reload)
|
||||
if (url.pathname === '/internal/revoke' && req.method === 'POST') {
|
||||
const auth = req.headers.get('authorization');
|
||||
if (auth !== `Bearer ${INTERNAL_TOKEN}`) {
|
||||
return new Response('forbidden', { status: 403 });
|
||||
}
|
||||
return req.json().then((body: any) => {
|
||||
return internalHandler(req, (body) => {
|
||||
if (typeof body?.token === 'string') validTokens.delete(body.token);
|
||||
return new Response('ok');
|
||||
}).catch(() => new Response('bad', { status: 400 }));
|
||||
});
|
||||
}
|
||||
|
||||
// /internal/restart — dispose the PtySession for a specific sessionId.
|
||||
// Scoped to one caller (not enumerate-all). Server.ts /pty-restart
|
||||
// posts here with the caller's sessionId; we kill ONLY that PTY,
|
||||
// leaving any other live sidebar tabs untouched. Codex T2 of the
|
||||
// eng review caught this gap — pre-spec the route would have
|
||||
// disposed all sessions.
|
||||
if (url.pathname === '/internal/restart' && req.method === 'POST') {
|
||||
return internalHandler(req, (body) => {
|
||||
const sid = typeof body?.sessionId === 'string' ? body.sessionId : null;
|
||||
if (!sid) return { killed: 0 };
|
||||
const session = sessionsById.get(sid);
|
||||
if (!session) return { killed: 0 };
|
||||
// Cancel any pending detach timer before disposal — otherwise it
|
||||
// would fire later against an already-disposed session.
|
||||
if (session.detachTimer) {
|
||||
clearTimeout(session.detachTimer);
|
||||
session.detachTimer = null;
|
||||
}
|
||||
disposeSession(session);
|
||||
sessionsById.delete(sid);
|
||||
return { killed: 1 };
|
||||
});
|
||||
}
|
||||
|
||||
// /internal/healthz — liveness probe used by the v1.44 watchdog.
|
||||
// Returns this agent's pid + gen + active session count without
|
||||
// touching claude binary lookup (which can fail for non-process
|
||||
// reasons and isn't a useful liveness signal). GET — no body to parse,
|
||||
// so it stays on the bare checkInternalAuth gate.
|
||||
if (url.pathname === '/internal/healthz' && req.method === 'GET') {
|
||||
const denied = checkInternalAuth(req);
|
||||
if (denied) return denied;
|
||||
return new Response(JSON.stringify({
|
||||
pid: process.pid,
|
||||
gen: CURRENT_GEN,
|
||||
sessions: validTokens.size,
|
||||
}), { status: 200, headers: { 'Content-Type': 'application/json' } });
|
||||
}
|
||||
|
||||
// /claude-available — bootstrap card hits this when user clicks "I installed it".
|
||||
@@ -305,8 +627,13 @@ function buildServer() {
|
||||
return new Response('unauthorized', { status: 401 });
|
||||
}
|
||||
|
||||
// v1.44+: surface the token's sessionId binding to the upgraded ws.
|
||||
// open() reads it via ws.data and registers the session in
|
||||
// sessionsById so /internal/restart and (Commit 3) re-attach
|
||||
// lookups can find it.
|
||||
const sessionId = validTokens.get(token) ?? null;
|
||||
const upgraded = server.upgrade(req, {
|
||||
data: { cookie: token },
|
||||
data: { cookie: token, sessionId },
|
||||
// Echo the protocol back so the browser accepts the upgrade.
|
||||
// Required when the client sends Sec-WebSocket-Protocol — the
|
||||
// server MUST select one of the offered protocols, otherwise
|
||||
@@ -320,22 +647,105 @@ function buildServer() {
|
||||
},
|
||||
|
||||
websocket: {
|
||||
/**
|
||||
* Spawn the claude PTY for `session` if it hasn't been spawned yet.
|
||||
* Called from both message paths: the legacy binary-frame trigger
|
||||
* (any keystroke) AND the v1.44 explicit `{type:"start"}` trigger
|
||||
* (forceRestart sends this on every fresh WS to get an eager prompt
|
||||
* without requiring the user to type). Idempotent — a second call
|
||||
* after `spawned: true` is a no-op.
|
||||
*/
|
||||
open(ws) {
|
||||
const sessionId = (ws.data as any)?.sessionId ?? null;
|
||||
const cookie = (ws.data as any)?.cookie || '';
|
||||
|
||||
// Commit 3 re-attach: if this sessionId already has a detached
|
||||
// PtySession in sessionsById, REPLACE its liveWs ref and replay
|
||||
// the ring buffer. The PTY process is unchanged — claude keeps
|
||||
// running through the wifi blip / panel-suspend cycle.
|
||||
if (sessionId) {
|
||||
const existing = sessionsById.get(sessionId);
|
||||
if (existing) {
|
||||
if (existing.detachTimer) {
|
||||
clearTimeout(existing.detachTimer);
|
||||
existing.detachTimer = null;
|
||||
}
|
||||
existing.detached = false;
|
||||
existing.liveWs = ws;
|
||||
existing.cookie = cookie;
|
||||
// Re-bind the WS-keyed map so resize/close/message handlers
|
||||
// can still find this session via the new ws.
|
||||
sessions.set(ws, existing);
|
||||
// Restart keepalive on the new ws.
|
||||
if (existing.pingInterval) clearInterval(existing.pingInterval);
|
||||
existing.pingInterval = setInterval(() => {
|
||||
try { ws.send(JSON.stringify({ type: 'ping', ts: Date.now() })); } catch {}
|
||||
}, KEEPALIVE_INTERVAL_MS);
|
||||
// Tell the client to prep its xterm (write RIS) before the
|
||||
// replay binary arrives. Order matters — the binary frame
|
||||
// immediately after this text frame IS the replay.
|
||||
try { ws.send(JSON.stringify({ type: 'reattach-begin', sessionId })); } catch {}
|
||||
try { ws.sendBinary(buildReplayPayload(existing)); } catch {}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const session: PtySession = {
|
||||
proc: null,
|
||||
cols: 80,
|
||||
rows: 24,
|
||||
cookie,
|
||||
liveWs: ws,
|
||||
sessionId,
|
||||
spawned: false,
|
||||
pingInterval: null,
|
||||
ringBuffer: [],
|
||||
ringBufferBytes: 0,
|
||||
altScreenActive: false,
|
||||
detached: false,
|
||||
detachTimer: null,
|
||||
};
|
||||
session.pingInterval = setInterval(() => {
|
||||
try {
|
||||
ws.send(JSON.stringify({ type: 'ping', ts: Date.now() }));
|
||||
} catch {
|
||||
// ws likely closed mid-tick; close handler clears the interval.
|
||||
}
|
||||
}, KEEPALIVE_INTERVAL_MS);
|
||||
sessions.set(ws, session);
|
||||
// Index by sessionId for /internal/restart + Commit 3 re-attach.
|
||||
if (sessionId) sessionsById.set(sessionId, session);
|
||||
},
|
||||
|
||||
message(ws, raw) {
|
||||
let session = sessions.get(ws);
|
||||
if (!session) {
|
||||
// Fallback for any path where open() didn't fire (shouldn't happen
|
||||
// in Bun.serve but keeps the spawn path safe). No keepalive on
|
||||
// this branch — open() is the supported entry point.
|
||||
session = {
|
||||
proc: null,
|
||||
cols: 80,
|
||||
rows: 24,
|
||||
cookie: (ws.data as any)?.cookie || '',
|
||||
liveWs: ws,
|
||||
sessionId: (ws.data as any)?.sessionId ?? null,
|
||||
spawned: false,
|
||||
pingInterval: null,
|
||||
ringBuffer: [],
|
||||
ringBufferBytes: 0,
|
||||
altScreenActive: false,
|
||||
detached: false,
|
||||
detachTimer: null,
|
||||
};
|
||||
sessions.set(ws, session);
|
||||
if (session.sessionId) sessionsById.set(session.sessionId, session);
|
||||
}
|
||||
|
||||
// Text frames are control messages: {type: "resize", cols, rows} or
|
||||
// {type: "tabSwitch", tabId, url, title}. Binary frames are raw input
|
||||
// bytes destined for the PTY stdin.
|
||||
// Text frames are control messages: {type: "resize", cols, rows},
|
||||
// {type: "tabSwitch", tabId, url, title}, {type: "tabState", ...},
|
||||
// or v1.44 keepalive frames: {type: "pong", ts}, {type: "keepalive"}.
|
||||
// Binary frames are raw input bytes destined for the PTY stdin.
|
||||
if (typeof raw === 'string') {
|
||||
let msg: any;
|
||||
try { msg = JSON.parse(raw); } catch { return; }
|
||||
@@ -355,50 +765,32 @@ function buildServer() {
|
||||
handleTabState(msg);
|
||||
return;
|
||||
}
|
||||
if (msg?.type === 'pong' || msg?.type === 'keepalive' || msg?.type === 'ping') {
|
||||
// Keepalive frames — accepted and silently dropped. The mere
|
||||
// fact that the WS carried this frame is the liveness signal;
|
||||
// there's no application-level state to update at this layer.
|
||||
// `ping` is acknowledged here too in case the client (or a
|
||||
// future agent peer) mirrors our server-side ping shape.
|
||||
return;
|
||||
}
|
||||
if (msg?.type === 'start') {
|
||||
// v1.44 explicit spawn trigger. forceRestart sends this
|
||||
// immediately on every fresh WS so claude boots without the
|
||||
// user having to type a keystroke (pre-v1.44, the lazy-binary
|
||||
// spawn made restart look stuck until the user typed). No-op
|
||||
// if already spawned.
|
||||
maybeSpawnPty(ws, session);
|
||||
return;
|
||||
}
|
||||
// Unknown text frame — ignore.
|
||||
return;
|
||||
}
|
||||
|
||||
// Binary input. Lazy-spawn claude on the first byte.
|
||||
// Binary input. Lazy-spawn claude on the first byte if `start`
|
||||
// wasn't sent first. Both paths land in the same maybeSpawnPty
|
||||
// helper for behavior parity.
|
||||
if (!session.spawned) {
|
||||
session.spawned = true;
|
||||
// UTF-8 boundary detection to prevent splitting multi-byte characters (issue #1272).
|
||||
// Buffer incomplete UTF-8 sequences until the next chunk completes them.
|
||||
let leftover = Buffer.alloc(0);
|
||||
const proc = spawnClaude(session.cols, session.rows, (chunk) => {
|
||||
const combined = Buffer.concat([leftover, Buffer.from(chunk)]);
|
||||
// Find the last index where a UTF-8 codepoint ends. Look back at most 3 bytes.
|
||||
let safeEnd = combined.length;
|
||||
for (let i = combined.length - 1; i >= Math.max(0, combined.length - 3); i--) {
|
||||
const b = combined[i];
|
||||
if ((b & 0x80) === 0) { safeEnd = i + 1; break; } // ASCII
|
||||
if ((b & 0xC0) === 0x80) continue; // continuation byte
|
||||
const expected = (b & 0xE0) === 0xC0 ? 2 : (b & 0xF0) === 0xE0 ? 3 : 4;
|
||||
safeEnd = (combined.length - i >= expected) ? combined.length : i;
|
||||
break;
|
||||
}
|
||||
const flush = combined.slice(0, safeEnd);
|
||||
leftover = combined.slice(safeEnd);
|
||||
if (flush.length) {
|
||||
try { ws.sendBinary(flush); } catch {}
|
||||
}
|
||||
});
|
||||
if (!proc) {
|
||||
try {
|
||||
ws.send(JSON.stringify({
|
||||
type: 'error',
|
||||
code: 'CLAUDE_NOT_FOUND',
|
||||
message: 'claude CLI not on PATH. Install: https://docs.anthropic.com/en/docs/claude-code',
|
||||
}));
|
||||
ws.close(4404, 'claude not found');
|
||||
} catch {}
|
||||
return;
|
||||
}
|
||||
session.proc = proc;
|
||||
// Watch for child exit so the WS closes cleanly when claude exits.
|
||||
proc.exited?.then?.(() => {
|
||||
try { ws.close(1000, 'pty exited'); } catch {}
|
||||
});
|
||||
if (!maybeSpawnPty(ws, session)) return;
|
||||
}
|
||||
try {
|
||||
// raw is a Uint8Array; Bun.Terminal.write accepts string|Buffer.
|
||||
@@ -409,16 +801,49 @@ function buildServer() {
|
||||
}
|
||||
},
|
||||
|
||||
close(ws) {
|
||||
close(ws, code, _reason) {
|
||||
const session = sessions.get(ws);
|
||||
if (session) {
|
||||
disposeSession(session);
|
||||
if (session.cookie) {
|
||||
// Drop the cookie so it can't be replayed against a new PTY.
|
||||
validTokens.delete(session.cookie);
|
||||
}
|
||||
sessions.delete(ws);
|
||||
if (!session) return;
|
||||
// Always drop the WS-keyed map entry and the per-attach
|
||||
// attachToken — the attach grant was single-use.
|
||||
sessions.delete(ws);
|
||||
if (session.cookie) validTokens.delete(session.cookie);
|
||||
// Keepalive lives with the WS — every attach starts a fresh one.
|
||||
if (session.pingInterval) {
|
||||
clearInterval(session.pingInterval);
|
||||
session.pingInterval = null;
|
||||
}
|
||||
|
||||
// Commit 3 detach state machine. If the close was intentional
|
||||
// (code 4001 = restart, 4404 = no-claude error), dispose
|
||||
// immediately — there's no value in keeping the PTY alive.
|
||||
// Otherwise enter the detach window: claude keeps running, the
|
||||
// ring buffer keeps accumulating, and a re-attach with the same
|
||||
// sessionId within DETACH_WINDOW_MS picks back up. If the timer
|
||||
// fires without a re-attach, the session is disposed normally.
|
||||
//
|
||||
// Sessions without a sessionId (legacy single-shot grants) can't
|
||||
// re-attach by definition — fall through to immediate dispose.
|
||||
const intentional = code === 4001 || code === 4404 || code === 1000;
|
||||
if (intentional || !session.sessionId) {
|
||||
disposeSession(session);
|
||||
if (session.sessionId) sessionsById.delete(session.sessionId);
|
||||
return;
|
||||
}
|
||||
|
||||
// Mark detached and start the disposal timer. The session stays
|
||||
// in sessionsById so the next /ws upgrade with the same
|
||||
// sessionId can find and reattach to it.
|
||||
session.detached = true;
|
||||
session.liveWs = null;
|
||||
session.detachTimer = setTimeout(() => {
|
||||
if (!session.detached) return; // re-attached in the meantime
|
||||
disposeSession(session);
|
||||
if (session.sessionId) sessionsById.delete(session.sessionId);
|
||||
}, DETACH_WINDOW_MS);
|
||||
// setTimeout returns a Bun Timer; unref so the detach window
|
||||
// doesn't keep the process alive past natural shutdown.
|
||||
(session.detachTimer as any)?.unref?.();
|
||||
},
|
||||
},
|
||||
});
|
||||
@@ -548,14 +973,25 @@ function main() {
|
||||
writeSecureFile(tmp, String(port));
|
||||
fs.renameSync(tmp, PORT_FILE);
|
||||
|
||||
// Write identity-based agent record (pid + per-boot gen). Replaces the
|
||||
// v1.43- `pkill -f terminal-agent\.ts` regex teardown that could kill
|
||||
// sibling gstack sessions. Callers (cli.ts spawn site, server.ts
|
||||
// shutdown, the v1.44 watchdog) now route through killAgentByRecord in
|
||||
// terminal-agent-control.ts.
|
||||
writeAgentRecord(dir, { pid: process.pid, gen: CURRENT_GEN, startedAt: Date.now() });
|
||||
|
||||
// Hand the parent the internal token so it can call /internal/grant.
|
||||
// Parent learns INTERNAL_TOKEN via env (TERMINAL_AGENT_INTERNAL_TOKEN below).
|
||||
// We just print it on stdout for the supervising process to pick up if it's
|
||||
// not already in env. Defense against env races at spawn time.
|
||||
console.log(`[terminal-agent] listening on 127.0.0.1:${port} pid=${process.pid}`);
|
||||
console.log(`[terminal-agent] listening on 127.0.0.1:${port} pid=${process.pid} gen=${CURRENT_GEN}`);
|
||||
|
||||
// Cleanup port file on exit.
|
||||
const cleanup = () => { safeUnlink(PORT_FILE); process.exit(0); };
|
||||
// Cleanup port file + agent record on exit.
|
||||
const cleanup = () => {
|
||||
safeUnlink(PORT_FILE);
|
||||
clearAgentRecord(dir);
|
||||
process.exit(0);
|
||||
};
|
||||
process.on('SIGTERM', cleanup);
|
||||
process.on('SIGINT', cleanup);
|
||||
}
|
||||
|
||||
@@ -11,12 +11,14 @@ import { findInstalledBrowsers, importCookies, importCookiesViaCdp, hasV20Cookie
|
||||
import { generatePickerCode } from './cookie-picker-routes';
|
||||
import { validateNavigationUrl } from './url-validation';
|
||||
import { validateOutputPath, validateReadPath } from './path-security';
|
||||
import { guardScreenshotPath } from './screenshot-size-guard';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import type { SetContentWaitUntil } from './tab-session';
|
||||
import { TEMP_DIR, isPathWithin } from './platform';
|
||||
import { SAFE_DIRECTORIES } from './path-security';
|
||||
import { modifyStyle, undoModification, resetModifications, getModificationHistory } from './cdp-inspector';
|
||||
import { withCdpSession } from './cdp-bridge';
|
||||
|
||||
/**
|
||||
* Aggressive page cleanup selectors and heuristics.
|
||||
@@ -1123,6 +1125,10 @@ export async function handleWriteCommand(
|
||||
|
||||
// Take screenshot
|
||||
await page.screenshot({ path: outputPath, fullPage: !scrollTo });
|
||||
// Guard against Anthropic vision API >2000px brick (#1214). Only
|
||||
// applies to fullPage captures; scrollTo viewport-bound shots are
|
||||
// already capped by the viewport size.
|
||||
if (!scrollTo) await guardScreenshotPath(outputPath);
|
||||
|
||||
// Restore viewport
|
||||
if (viewportWidth && originalViewport) {
|
||||
@@ -1404,9 +1410,10 @@ export async function handleWriteCommand(
|
||||
validateOutputPath(outputPath);
|
||||
|
||||
try {
|
||||
const cdp = await page.context().newCDPSession(page);
|
||||
const { data } = await cdp.send('Page.captureSnapshot', { format: 'mhtml' });
|
||||
await cdp.detach();
|
||||
const data = await withCdpSession(page, async (cdp) => {
|
||||
const result = await cdp.send('Page.captureSnapshot', { format: 'mhtml' });
|
||||
return (result as { data: string }).data;
|
||||
});
|
||||
fs.writeFileSync(outputPath, data);
|
||||
return `Archive saved: ${outputPath} (${Math.round(data.length / 1024)}KB, MHTML)`;
|
||||
} catch (err: any) {
|
||||
|
||||
@@ -29,17 +29,20 @@ describe('shouldEnableChromiumSandbox', () => {
|
||||
const origPlatform = process.platform;
|
||||
const origCI = process.env.CI;
|
||||
const origContainer = process.env.CONTAINER;
|
||||
const origNoSandbox = process.env.GSTACK_CHROMIUM_NO_SANDBOX;
|
||||
const origGetuid = process.getuid;
|
||||
|
||||
beforeEach(() => {
|
||||
delete process.env.CI;
|
||||
delete process.env.CONTAINER;
|
||||
delete process.env.GSTACK_CHROMIUM_NO_SANDBOX;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
Object.defineProperty(process, 'platform', { value: origPlatform });
|
||||
if (origCI === undefined) delete process.env.CI; else process.env.CI = origCI;
|
||||
if (origContainer === undefined) delete process.env.CONTAINER; else process.env.CONTAINER = origContainer;
|
||||
if (origNoSandbox === undefined) delete process.env.GSTACK_CHROMIUM_NO_SANDBOX; else process.env.GSTACK_CHROMIUM_NO_SANDBOX = origNoSandbox;
|
||||
process.getuid = origGetuid;
|
||||
});
|
||||
|
||||
@@ -90,6 +93,31 @@ describe('shouldEnableChromiumSandbox', () => {
|
||||
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
|
||||
expect(shouldEnableChromiumSandbox()).toBe(false);
|
||||
});
|
||||
|
||||
// #1562 — Ubuntu/AppArmor opt-in override
|
||||
it('linux + GSTACK_CHROMIUM_NO_SANDBOX=1 → false (Ubuntu/AppArmor opt-out)', async () => {
|
||||
setPlatform('linux');
|
||||
process.env.GSTACK_CHROMIUM_NO_SANDBOX = '1';
|
||||
process.getuid = (() => 1000) as typeof process.getuid;
|
||||
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
|
||||
expect(shouldEnableChromiumSandbox()).toBe(false);
|
||||
});
|
||||
|
||||
it('darwin + GSTACK_CHROMIUM_NO_SANDBOX=1 → false (env override wins on any platform)', async () => {
|
||||
setPlatform('darwin');
|
||||
process.env.GSTACK_CHROMIUM_NO_SANDBOX = '1';
|
||||
process.getuid = (() => 501) as typeof process.getuid;
|
||||
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
|
||||
expect(shouldEnableChromiumSandbox()).toBe(false);
|
||||
});
|
||||
|
||||
it('GSTACK_CHROMIUM_NO_SANDBOX=0 → does NOT trigger override (must be exactly "1")', async () => {
|
||||
setPlatform('linux');
|
||||
process.env.GSTACK_CHROMIUM_NO_SANDBOX = '0';
|
||||
process.getuid = (() => 1000) as typeof process.getuid;
|
||||
const { shouldEnableChromiumSandbox } = await import('../src/browser-manager');
|
||||
expect(shouldEnableChromiumSandbox()).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── resolveDisconnectCause ──────────────────────────────────────
|
||||
@@ -163,3 +191,39 @@ describe('resolveDisconnectCause', () => {
|
||||
expect(await resolveDisconnectCause(null)).toBe('crash');
|
||||
});
|
||||
});
|
||||
|
||||
// ─── onDisconnect exit-code propagation (regression test) ──────────
|
||||
//
|
||||
// The contract: BrowserManager.onDisconnect is called with the resolved
|
||||
// exit code (0 for clean Cmd+Q, 2 for crash). server.ts then forwards
|
||||
// that code to activeShutdown(), which exits the process.
|
||||
//
|
||||
// Without this propagation, the headed-mode user-visible Cmd+Q respawn
|
||||
// bug returns: server.ts hardcoded `activeShutdown?.(2)` ignores the
|
||||
// resolved 0 and gbrowser's gbd HealthMonitor treats the clean quit as
|
||||
// a crash, restarting the window.
|
||||
describe('BrowserManager.onDisconnect exit-code propagation', () => {
|
||||
it('signature accepts an optional exitCode argument', async () => {
|
||||
const { BrowserManager } = await import('../src/browser-manager');
|
||||
const bm = new BrowserManager();
|
||||
const calls: Array<number | undefined> = [];
|
||||
bm.onDisconnect = (code?: number) => { calls.push(code); };
|
||||
bm.onDisconnect(0);
|
||||
bm.onDisconnect(2);
|
||||
bm.onDisconnect(undefined);
|
||||
expect(calls).toEqual([0, 2, undefined]);
|
||||
});
|
||||
|
||||
it('server.ts callback forwards exitCode when provided, falls back to 2', async () => {
|
||||
// Mirror the production wiring in browse/src/server.ts so a refactor
|
||||
// that drops the forward (e.g. reverting to `() => activeShutdown?.(2)`)
|
||||
// fails CI before the user-visible bug returns.
|
||||
const shutdownCalls: number[] = [];
|
||||
const activeShutdown = (code: number) => { shutdownCalls.push(code); };
|
||||
const onDisconnect = (code?: number) => activeShutdown(code ?? 2);
|
||||
onDisconnect(0);
|
||||
onDisconnect(2);
|
||||
onDisconnect(undefined);
|
||||
expect(shutdownCalls).toEqual([0, 2, 2]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -178,7 +178,17 @@ describe('buildSpawnEnv', () => {
|
||||
process.env.LANG = 'en_US.UTF-8';
|
||||
});
|
||||
afterEach(() => {
|
||||
process.env = origEnv;
|
||||
// process.env = origEnv replaces only the reference; the underlying
|
||||
// env stays mutated and leaks to later test files in the same Bun
|
||||
// process (e.g., breaks Bun.which('bash') in security.test.ts and
|
||||
// bun-spawn in pair-agent-tunnel-eval.test.ts). Delete every current
|
||||
// key then re-assign from the snapshot — restores the actual env.
|
||||
for (const k of Object.keys(process.env)) {
|
||||
if (!(k in origEnv)) delete process.env[k];
|
||||
}
|
||||
for (const [k, v] of Object.entries(origEnv)) {
|
||||
if (v !== undefined) process.env[k] = v;
|
||||
}
|
||||
});
|
||||
|
||||
it('untrusted: drops $HOME and secrets', () => {
|
||||
@@ -293,7 +303,15 @@ describe.skipIf(SKIP_SPAWN)('spawnSkill: lifecycle', () => {
|
||||
expect(parsed.gh).toBeNull();
|
||||
expect(parsed.gstack).toBeNull();
|
||||
} finally {
|
||||
process.env = origEnv;
|
||||
// See afterEach comment in `buildSpawnEnv` describe — direct
|
||||
// reassignment of process.env doesn't actually restore the
|
||||
// underlying env in Bun. Delete + re-assign instead.
|
||||
for (const k of Object.keys(process.env)) {
|
||||
if (!(k in origEnv)) delete process.env[k];
|
||||
}
|
||||
for (const [k, v] of Object.entries(origEnv)) {
|
||||
if (v !== undefined) process.env[k] = v;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -312,7 +330,12 @@ describe.skipIf(SKIP_SPAWN)('spawnSkill: lifecycle', () => {
|
||||
const parsed = JSON.parse(result.stdout);
|
||||
expect(parsed.home).toBe('/Users/test-user');
|
||||
} finally {
|
||||
process.env = origEnv;
|
||||
for (const k of Object.keys(process.env)) {
|
||||
if (!(k in origEnv)) delete process.env[k];
|
||||
}
|
||||
for (const [k, v] of Object.entries(origEnv)) {
|
||||
if (v !== undefined) process.env[k] = v;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
import { describe, test, expect, beforeEach } from 'bun:test';
|
||||
import type { Page } from 'playwright';
|
||||
import {
|
||||
__testInternals,
|
||||
undoModification,
|
||||
} from '../src/cdp-inspector';
|
||||
|
||||
// Regression tests for the modificationHistory cap (D6 / smoking gun #2).
|
||||
// Pre-cap, the module-scoped array grew unbounded across the session. Cap is
|
||||
// 200 entries, oldest evicted on push past the cap. undoModification reports
|
||||
// "evicted at the cap" in the error message so a user who asks for a
|
||||
// no-longer-available index understands what happened (instead of seeing the
|
||||
// pre-cap "No modification at index 500" with no context).
|
||||
|
||||
const { pushModification, MOD_HISTORY_CAP, getRawHistory, getTotalPushed, resetForTest } = __testInternals;
|
||||
|
||||
function fakeMod(id: number) {
|
||||
return {
|
||||
selector: `#node-${id}`,
|
||||
property: 'color',
|
||||
oldValue: 'red',
|
||||
newValue: 'blue',
|
||||
source: 'inline' as const,
|
||||
timestamp: id,
|
||||
method: 'setProperty' as 'setProperty',
|
||||
};
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
resetForTest();
|
||||
});
|
||||
|
||||
describe('modificationHistory cap', () => {
|
||||
test('1. push under cap keeps every entry', () => {
|
||||
for (let i = 0; i < 50; i++) pushModification(fakeMod(i));
|
||||
expect(getRawHistory().length).toBe(50);
|
||||
expect(getTotalPushed()).toBe(50);
|
||||
expect(getRawHistory()[0].timestamp).toBe(0);
|
||||
expect(getRawHistory()[49].timestamp).toBe(49);
|
||||
});
|
||||
|
||||
test('2. push exactly cap keeps every entry', () => {
|
||||
for (let i = 0; i < MOD_HISTORY_CAP; i++) pushModification(fakeMod(i));
|
||||
expect(getRawHistory().length).toBe(MOD_HISTORY_CAP);
|
||||
expect(getTotalPushed()).toBe(MOD_HISTORY_CAP);
|
||||
expect(getRawHistory()[0].timestamp).toBe(0);
|
||||
});
|
||||
|
||||
test('3. push past cap evicts oldest, keeps length at cap', () => {
|
||||
const total = MOD_HISTORY_CAP + 50;
|
||||
for (let i = 0; i < total; i++) pushModification(fakeMod(i));
|
||||
expect(getRawHistory().length).toBe(MOD_HISTORY_CAP);
|
||||
expect(getTotalPushed()).toBe(total);
|
||||
// Oldest 50 dropped — entry that was #0 is gone; new oldest is #50.
|
||||
expect(getRawHistory()[0].timestamp).toBe(50);
|
||||
expect(getRawHistory()[MOD_HISTORY_CAP - 1].timestamp).toBe(total - 1);
|
||||
});
|
||||
|
||||
test('4. resetForTest clears both buffer and totalPushed', () => {
|
||||
for (let i = 0; i < 10; i++) pushModification(fakeMod(i));
|
||||
resetForTest();
|
||||
expect(getRawHistory().length).toBe(0);
|
||||
expect(getTotalPushed()).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('undoModification eviction-aware error', () => {
|
||||
// Stub Page: undoModification throws before any await when idx is out of
|
||||
// range, so the stub never actually gets called.
|
||||
const stubPage = {} as unknown as Page;
|
||||
|
||||
test('5. out-of-range BEFORE any eviction → no evicted note', async () => {
|
||||
for (let i = 0; i < 5; i++) pushModification(fakeMod(i));
|
||||
await expect(undoModification(stubPage, 99)).rejects.toThrow(
|
||||
'No modification at index 99. History has 5 entries.',
|
||||
);
|
||||
});
|
||||
|
||||
test('6. out-of-range AFTER eviction → message names the evicted count', async () => {
|
||||
const total = MOD_HISTORY_CAP + 73;
|
||||
for (let i = 0; i < total; i++) pushModification(fakeMod(i));
|
||||
// 273 pushed, 200 in buffer, 73 evicted. Ask for idx=400 (above buffer).
|
||||
await expect(undoModification(stubPage, 400)).rejects.toThrow(
|
||||
`No modification at index 400. History has ${MOD_HISTORY_CAP} entries ` +
|
||||
`(most recent ${MOD_HISTORY_CAP} only — 73 earlier entries evicted at the cap).`,
|
||||
);
|
||||
});
|
||||
|
||||
test('7. negative explicit index throws cleanly (no NaN propagation)', async () => {
|
||||
for (let i = 0; i < 10; i++) pushModification(fakeMod(i));
|
||||
await expect(undoModification(stubPage, -1)).rejects.toThrow(
|
||||
'No modification at index -1.',
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,171 @@
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import type { Page } from 'playwright';
|
||||
import { withCdpSession, getOrCreateCdpSession } from '../src/cdp-bridge';
|
||||
|
||||
// Static-grep tripwire + behavior tests for the CDP session lifecycle
|
||||
// helpers introduced as part of the D11 EXPAND_SCOPE memory-leak fix.
|
||||
//
|
||||
// Direct calls to `page.context().newCDPSession(page)` are the leak class
|
||||
// the helpers exist to close — every direct call needs a matching
|
||||
// `session.detach()` and forgetting it leaves the Chromium-side target
|
||||
// attached until the underlying transport drops. The tripwire fails CI
|
||||
// if any source file calls `newCDPSession(` outside `cdp-bridge.ts`
|
||||
// (the file that owns the helpers).
|
||||
//
|
||||
// Pattern mirrors browse/test/terminal-agent-pid-identity.test.ts and
|
||||
// browse/test/server-sanitize-surrogates.test.ts: read source files
|
||||
// directly, assert an invariant on their contents.
|
||||
|
||||
const SRC_DIR = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src');
|
||||
|
||||
function readAllSourceFiles(): Array<{ file: string; content: string }> {
|
||||
const out: Array<{ file: string; content: string }> = [];
|
||||
for (const entry of fs.readdirSync(SRC_DIR)) {
|
||||
if (!entry.endsWith('.ts')) continue;
|
||||
const full = path.join(SRC_DIR, entry);
|
||||
out.push({ file: entry, content: fs.readFileSync(full, 'utf-8') });
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
describe('CDP session cleanup invariant', () => {
|
||||
test('1. no source file calls `newCDPSession(` outside cdp-bridge.ts', () => {
|
||||
const offenders: Array<{ file: string; line: number; text: string }> = [];
|
||||
for (const { file, content } of readAllSourceFiles()) {
|
||||
// The helper file is the ONE allowed home for direct newCDPSession calls.
|
||||
if (file === 'cdp-bridge.ts') continue;
|
||||
const lines = content.split('\n');
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (!/newCDPSession\s*\(/.test(line)) continue;
|
||||
// Skip comment lines — documentation mentions are fine.
|
||||
const trimmed = line.trim();
|
||||
if (trimmed.startsWith('//') || trimmed.startsWith('*')) continue;
|
||||
offenders.push({ file, line: i + 1, text: trimmed });
|
||||
}
|
||||
}
|
||||
if (offenders.length > 0) {
|
||||
const formatted = offenders
|
||||
.map((o) => ` ${o.file}:${o.line} ${o.text}`)
|
||||
.join('\n');
|
||||
throw new Error(
|
||||
`Direct newCDPSession(...) calls found outside cdp-bridge.ts. ` +
|
||||
`Route through withCdpSession() (one-shot, finally-detach) or ` +
|
||||
`getOrCreateCdpSession() (cached, close-detach) instead:\n${formatted}`,
|
||||
);
|
||||
}
|
||||
expect(offenders).toEqual([]);
|
||||
});
|
||||
|
||||
test('2. helper file exports the two documented entry points', () => {
|
||||
// Sanity: the tripwire is meaningless if the helpers themselves are gone.
|
||||
expect(typeof withCdpSession).toBe('function');
|
||||
expect(typeof getOrCreateCdpSession).toBe('function');
|
||||
});
|
||||
});
|
||||
|
||||
describe('withCdpSession finally-detach', () => {
|
||||
// Fake Page surface for unit-testing the helper without spinning up a real
|
||||
// browser. The helper only touches page.context().newCDPSession(page) and
|
||||
// the returned session's .detach(), so this surface is enough.
|
||||
function makeFakePage(detachSpy: { called: number; rejected?: Error }) {
|
||||
const session = {
|
||||
detach: async () => {
|
||||
detachSpy.called++;
|
||||
if (detachSpy.rejected) throw detachSpy.rejected;
|
||||
},
|
||||
};
|
||||
return {
|
||||
context: () => ({
|
||||
newCDPSession: async (_p: unknown) => session,
|
||||
}),
|
||||
} as unknown as Page;
|
||||
}
|
||||
|
||||
test('3. detaches on the success path', async () => {
|
||||
const detachSpy = { called: 0 };
|
||||
const page = makeFakePage(detachSpy);
|
||||
const result = await withCdpSession(page, async (session) => {
|
||||
expect(session).toBeDefined();
|
||||
return 42;
|
||||
});
|
||||
expect(result).toBe(42);
|
||||
expect(detachSpy.called).toBe(1);
|
||||
});
|
||||
|
||||
test('4. detaches even when fn throws (the actual leak fix)', async () => {
|
||||
const detachSpy = { called: 0 };
|
||||
const page = makeFakePage(detachSpy);
|
||||
await expect(
|
||||
withCdpSession(page, async () => {
|
||||
throw new Error('boom');
|
||||
}),
|
||||
).rejects.toThrow('boom');
|
||||
expect(detachSpy.called).toBe(1);
|
||||
});
|
||||
|
||||
test('5. swallows detach errors so they do not mask fn errors', async () => {
|
||||
const detachSpy = { called: 0, rejected: new Error('already detached') };
|
||||
const page = makeFakePage(detachSpy);
|
||||
await expect(
|
||||
withCdpSession(page, async () => {
|
||||
throw new Error('original');
|
||||
}),
|
||||
).rejects.toThrow('original');
|
||||
expect(detachSpy.called).toBe(1);
|
||||
});
|
||||
|
||||
test('6. swallows detach errors on the success path too', async () => {
|
||||
const detachSpy = { called: 0, rejected: new Error('target closed') };
|
||||
const page = makeFakePage(detachSpy);
|
||||
const result = await withCdpSession(page, async () => 'ok');
|
||||
expect(result).toBe('ok');
|
||||
expect(detachSpy.called).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getOrCreateCdpSession close-detach', () => {
|
||||
function makeFakePage() {
|
||||
const closeListeners: Array<() => void> = [];
|
||||
const session = {
|
||||
detach: async () => {
|
||||
session._detachCount++;
|
||||
},
|
||||
_detachCount: 0,
|
||||
};
|
||||
const page = {
|
||||
context: () => ({
|
||||
newCDPSession: async (_p: unknown) => session,
|
||||
}),
|
||||
once: (event: string, fn: () => void) => {
|
||||
if (event === 'close') closeListeners.push(fn);
|
||||
},
|
||||
_fireClose: () => {
|
||||
for (const fn of closeListeners) fn();
|
||||
},
|
||||
};
|
||||
return { page: page as unknown as Page, session, fireClose: page._fireClose };
|
||||
}
|
||||
|
||||
test('7. caches the session across calls', async () => {
|
||||
const { page } = makeFakePage();
|
||||
const cache = new WeakMap<Page, any>();
|
||||
const s1 = await getOrCreateCdpSession(page, cache);
|
||||
const s2 = await getOrCreateCdpSession(page, cache);
|
||||
expect(s1).toBe(s2);
|
||||
});
|
||||
|
||||
test('8. close hook detaches the session AND clears the cache', async () => {
|
||||
const { page, session, fireClose } = makeFakePage();
|
||||
const cache = new WeakMap<Page, any>();
|
||||
await getOrCreateCdpSession(page, cache);
|
||||
expect(cache.get(page)).toBeDefined();
|
||||
fireClose();
|
||||
// Detach runs synchronously up to the await in the close hook; let it settle.
|
||||
await new Promise((r) => setTimeout(r, 0));
|
||||
expect(cache.get(page)).toBeUndefined();
|
||||
expect(session._detachCount).toBe(1);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,75 @@
|
||||
/**
|
||||
* Coverage for #1612 — macOS/Linux server must survive sandboxed-shell
|
||||
* harnesses by becoming its own session leader (setsid).
|
||||
*
|
||||
* Pre-#1612, Bun.spawn().unref() removed the child from Bun's event loop
|
||||
* but did NOT call setsid(). When the CLI ran inside Claude Code's
|
||||
* per-command sandbox, Conductor, or CI step runners, the session leader's
|
||||
* exit sent SIGHUP to every PID in the session, killing the bun server.
|
||||
*
|
||||
* The fix routes macOS/Linux spawn through Node's child_process.spawn with
|
||||
* detached:true, which calls setsid() so the server becomes its own session
|
||||
* leader (PPID=1 on Linux, similar reparenting on Darwin).
|
||||
*
|
||||
* The actual setsid syscall is hard to assert in a unit test without a
|
||||
* real spawn — testing here is static: the cli.ts source must use the
|
||||
* Node spawn path on macOS/Linux, with detached:true and .unref(). If a
|
||||
* future refactor reverts to Bun.spawn().unref() on the macOS/Linux branch
|
||||
* the regression returns and these tests fail.
|
||||
*/
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, "..", "..");
|
||||
const CLI = path.join(ROOT, "browse", "src", "cli.ts");
|
||||
|
||||
function read(): string {
|
||||
return fs.readFileSync(CLI, "utf-8");
|
||||
}
|
||||
|
||||
describe("#1612 macOS/Linux daemonize via Node setsid path", () => {
|
||||
test("cli.ts imports nodeSpawn from child_process (Node spawn alias)", () => {
|
||||
const body = read();
|
||||
// The fix relies on Node's child_process.spawn (which calls setsid on
|
||||
// detached:true), aliased to avoid name collision with Bun.spawn. Match
|
||||
// either `nodeSpawn` or `spawn as nodeSpawn` to be flexible to the
|
||||
// exact import style.
|
||||
expect(body).toMatch(/(spawn as nodeSpawn|nodeSpawn\s*[,}])/);
|
||||
expect(body).toMatch(/from\s+['"]child_process['"]/);
|
||||
});
|
||||
|
||||
test("non-Windows branch uses nodeSpawn(...).unref() with detached:true", () => {
|
||||
const body = read();
|
||||
// Find the non-Windows branch and assert it uses the Node spawn alias
|
||||
// with detached:true. Match the pattern `nodeSpawn(...) ... detached:true`.
|
||||
expect(body).toMatch(/nodeSpawn\([\s\S]{0,500}detached:\s*true/);
|
||||
expect(body).toMatch(/nodeSpawn\([\s\S]{0,500}\.unref\(\)/);
|
||||
});
|
||||
|
||||
test("non-Windows branch comment documents setsid/SIGHUP root cause", () => {
|
||||
const body = read();
|
||||
// The comment block must mention setsid() so a future refactor sees the
|
||||
// why before changing the spawn call.
|
||||
expect(body).toMatch(/setsid/);
|
||||
expect(body).toMatch(/SIGHUP/);
|
||||
});
|
||||
|
||||
test("the spawn call on macOS/Linux is nodeSpawn, not Bun.spawn", () => {
|
||||
const body = read();
|
||||
// Strip line comments before regex matching, so the "Bun.spawn().unref()"
|
||||
// mentions inside the explanatory comment don't trigger false positives.
|
||||
const codeOnly = body
|
||||
.split("\n")
|
||||
.filter((line) => !line.trim().startsWith("//"))
|
||||
.join("\n");
|
||||
// Find the non-Windows branch. The `} else {` block following the
|
||||
// Windows branch. We then require its first ~400 chars contain a
|
||||
// nodeSpawn() call and NOT a Bun.spawn() call (excluding the comment).
|
||||
const nonWindowsStart = codeOnly.indexOf("nodeSpawn('bun'");
|
||||
expect(nonWindowsStart).toBeGreaterThan(-1);
|
||||
const slice = codeOnly.slice(nonWindowsStart, nonWindowsStart + 400);
|
||||
expect(slice).toMatch(/nodeSpawn\(/);
|
||||
expect(slice).not.toMatch(/Bun\.spawn\(/);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,81 @@
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
// v1.44 outer supervisor — static-grep invariants.
|
||||
//
|
||||
// Pre-v1.44 `$B connect` was fire-and-forget: spawn server detached, CLI
|
||||
// exits, server runs unsupervised. If the server crashed, the user had to
|
||||
// re-run `$B connect`. The opt-in supervisor (--supervise or
|
||||
// BROWSE_SUPERVISE=1) keeps the CLI attached and respawns the server on
|
||||
// unexpected exit, with the same crash-loop guard shape as the v1.44
|
||||
// terminal-agent watchdog.
|
||||
//
|
||||
// Live respawn tests belong in the e2e tier (real Bun.spawn cycles take
|
||||
// 3-8s each). These tripwires defend the load-bearing invariants:
|
||||
// opt-in by default, signal handlers wired, crash-loop guard, env knobs.
|
||||
|
||||
const CLI_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'cli.ts');
|
||||
|
||||
describe('CLI outer supervisor (v1.44+)', () => {
|
||||
test('1. supervisor is opt-in via --supervise flag or BROWSE_SUPERVISE env', () => {
|
||||
const src = fs.readFileSync(CLI_TS, 'utf-8');
|
||||
expect(src).toContain("commandArgs.includes('--supervise')");
|
||||
expect(src).toContain("process.env.BROWSE_SUPERVISE === '1'");
|
||||
// Default path MUST still exit 0 promptly. The legacy contract is
|
||||
// that every caller of `$B connect` (Claude Code Bash tool, scripts,
|
||||
// CI) gets a prompt return.
|
||||
expect(src).toMatch(/if \(!superviseRequested\) \{\s*process\.exit\(0\);\s*\}/);
|
||||
});
|
||||
|
||||
test('2. SIGINT and SIGTERM trigger clean teardown', () => {
|
||||
const src = fs.readFileSync(CLI_TS, 'utf-8');
|
||||
// Both signals must hit the teardown path or the user's Ctrl-C leaves
|
||||
// an orphaned server (worse than no supervisor).
|
||||
expect(src).toMatch(/process\.on\('SIGINT'.*teardownAndExit/);
|
||||
expect(src).toMatch(/process\.on\('SIGTERM'.*teardownAndExit/);
|
||||
// Teardown must signal the supervised server before exiting itself.
|
||||
expect(src).toContain("safeKill(state.pid, 'SIGTERM')");
|
||||
});
|
||||
|
||||
test('3. crash-loop guard with 5-in-5min rolling window', () => {
|
||||
const src = fs.readFileSync(CLI_TS, 'utf-8');
|
||||
expect(src).toContain('SUPERVISOR_GUARD_WINDOW_MS = 5 * 60_000');
|
||||
expect(src).toContain('SUPERVISOR_GUARD_MAX = 5');
|
||||
// Window pruning: a long-lived daemon with sporadic crashes must NOT
|
||||
// hit the guard (otherwise we punish the user for the supervisor doing
|
||||
// its job).
|
||||
expect(src).toMatch(/respawns\.shift\(\)/);
|
||||
});
|
||||
|
||||
test('4. exponential backoff schedule, env-overridable', () => {
|
||||
const src = fs.readFileSync(CLI_TS, 'utf-8');
|
||||
expect(src).toContain('GSTACK_SUPERVISOR_BACKOFF');
|
||||
// Default schedule must include short waits at first (rapid recovery
|
||||
// from transient crashes) and cap at a sensible long wait.
|
||||
expect(src).toContain('1000,2000,4000,8000,30000');
|
||||
});
|
||||
|
||||
test('5. tick interval is env-overridable for tests', () => {
|
||||
const src = fs.readFileSync(CLI_TS, 'utf-8');
|
||||
expect(src).toContain('GSTACK_SUPERVISOR_TICK_MS');
|
||||
});
|
||||
|
||||
test('6. respawned server gets a fresh terminal-agent too', () => {
|
||||
const src = fs.readFileSync(CLI_TS, 'utf-8');
|
||||
// After server respawn, the terminal-agent state is stale (old PID
|
||||
// record points to a dead agent that exited with its parent). The
|
||||
// supervisor must re-call spawnTerminalAgent or the PTY path stays
|
||||
// broken even though the server is back up.
|
||||
const block = sliceBetween(src, 'Supervisor mode:', '// ─── Headed Disconnect');
|
||||
expect(block).toContain('spawnTerminalAgent({');
|
||||
});
|
||||
});
|
||||
|
||||
function sliceBetween(source: string, start: string, end: string): string {
|
||||
const i = source.indexOf(start);
|
||||
if (i === -1) throw new Error(`marker not found: ${start}`);
|
||||
const j = source.indexOf(end, i + start.length);
|
||||
if (j === -1) throw new Error(`end marker not found: ${end}`);
|
||||
return source.slice(i, j);
|
||||
}
|
||||
@@ -9,7 +9,7 @@ import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { startTestServer } from './test-server';
|
||||
import { BrowserManager } from '../src/browser-manager';
|
||||
import { resolveServerScript } from '../src/cli';
|
||||
import { handleReadCommand as _handleReadCommand } from '../src/read-commands';
|
||||
import { handleReadCommand as _handleReadCommand, parseOutArgs, hasOutArg, resultToString } from '../src/read-commands';
|
||||
import { handleWriteCommand as _handleWriteCommand } from '../src/write-commands';
|
||||
import { handleMetaCommand } from '../src/meta-commands';
|
||||
import { consoleBuffer, networkBuffer, dialogBuffer, addConsoleEntry, addNetworkEntry, addDialogEntry, CircularBuffer } from '../src/buffers';
|
||||
@@ -23,6 +23,65 @@ const handleReadCommand = (cmd: string, args: string[], b: BrowserManager) =>
|
||||
const handleWriteCommand = (cmd: string, args: string[], b: BrowserManager) =>
|
||||
_handleWriteCommand(cmd, args, b.getActiveSession(), b);
|
||||
|
||||
// ─── Pure arg-parser + result-conversion unit tests (no browser) ───
|
||||
describe('parseOutArgs / hasOutArg', () => {
|
||||
test('--out <path> splits the flag from the positional', () => {
|
||||
expect(parseOutArgs(['expr', '--out', '/tmp/x'])).toEqual({ outPath: '/tmp/x', raw: false, rest: ['expr'] });
|
||||
});
|
||||
|
||||
test('--out=<path> form is equivalent', () => {
|
||||
expect(parseOutArgs(['expr', '--out=/tmp/x'])).toEqual({ outPath: '/tmp/x', raw: false, rest: ['expr'] });
|
||||
});
|
||||
|
||||
test('flag ordering does not matter', () => {
|
||||
expect(parseOutArgs(['--out', '/tmp/x', 'expr'])).toEqual({ outPath: '/tmp/x', raw: false, rest: ['expr'] });
|
||||
});
|
||||
|
||||
test('--raw and --raw=true|false', () => {
|
||||
expect(parseOutArgs(['e', '--out', '/tmp/x', '--raw']).raw).toBe(true);
|
||||
expect(parseOutArgs(['e', '--out', '/tmp/x', '--raw=true']).raw).toBe(true);
|
||||
expect(parseOutArgs(['e', '--out', '/tmp/x', '--raw=false']).raw).toBe(false);
|
||||
});
|
||||
|
||||
test('repeated --out throws', () => {
|
||||
expect(() => parseOutArgs(['e', '--out', '/a', '--out', '/b'])).toThrow(/more than once/);
|
||||
});
|
||||
|
||||
test('--out with a missing value throws', () => {
|
||||
expect(() => parseOutArgs(['e', '--out'])).toThrow(/requires a file path/);
|
||||
expect(() => parseOutArgs(['e', '--out', '--raw'])).toThrow(/requires a file path/);
|
||||
expect(() => parseOutArgs(['e', '--out='])).toThrow(/requires a file path/);
|
||||
});
|
||||
|
||||
test('bad --raw value throws', () => {
|
||||
expect(() => parseOutArgs(['e', '--out', '/a', '--raw=maybe'])).toThrow(/--raw must be true or false/);
|
||||
});
|
||||
|
||||
test('hasOutArg matches --out and --out= exactly, not lookalikes', () => {
|
||||
expect(hasOutArg(['a', '--out', 'b'])).toBe(true);
|
||||
expect(hasOutArg(['a', '--out=b'])).toBe(true);
|
||||
expect(hasOutArg(['a'])).toBe(false);
|
||||
expect(hasOutArg(['a', '--output', 'b'])).toBe(false);
|
||||
expect(hasOutArg(['a', '--outx'])).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('resultToString — byte-for-byte with pre-refactor behavior', () => {
|
||||
test('null becomes "null" (typeof null === object → JSON.stringify)', () => {
|
||||
expect(resultToString(null)).toBe('null');
|
||||
});
|
||||
test('undefined becomes empty string', () => {
|
||||
expect(resultToString(undefined)).toBe('');
|
||||
});
|
||||
test('objects are pretty-printed JSON', () => {
|
||||
expect(resultToString({ a: 1 })).toBe(JSON.stringify({ a: 1 }, null, 2));
|
||||
});
|
||||
test('primitives use String()', () => {
|
||||
expect(resultToString(42)).toBe('42');
|
||||
expect(resultToString(true)).toBe('true');
|
||||
});
|
||||
});
|
||||
|
||||
let testServer: ReturnType<typeof startTestServer>;
|
||||
let bm: BrowserManager;
|
||||
let baseUrl: string;
|
||||
@@ -225,6 +284,102 @@ describe('Inspection', () => {
|
||||
expect(result).toBe('3');
|
||||
});
|
||||
|
||||
// ─── js/eval --out (render-to-file) ───────────────────────────
|
||||
|
||||
test('js (no --out) returns a multi-MB string without truncation', async () => {
|
||||
// Handler-level guarantee: the result is not sliced/capped before return.
|
||||
// (Full HTTP egress path is exercised elsewhere; this pins the handler.)
|
||||
const result = await handleReadCommand('js', ["'x'.repeat(3 * 1024 * 1024)"], bm);
|
||||
expect(result.length).toBe(3 * 1024 * 1024);
|
||||
});
|
||||
|
||||
test('js --out writes the result to disk and returns a short status, not the payload', async () => {
|
||||
const out = `/tmp/browse-out-large-${Date.now()}.txt`;
|
||||
try {
|
||||
const result = await handleReadCommand('js', ["'y'.repeat(2 * 1024 * 1024)", '--out', out], bm);
|
||||
expect(result).toContain('JS result written:');
|
||||
expect(result).toContain(out);
|
||||
expect(result).toContain(`(${2 * 1024 * 1024} bytes)`);
|
||||
expect(result.length).toBeLessThan(200); // status, not the 2MB payload
|
||||
expect(fs.statSync(out).size).toBe(2 * 1024 * 1024);
|
||||
} finally {
|
||||
fs.rmSync(out, { force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('js --out decodes a base64 PNG data URL to real bytes', async () => {
|
||||
// 1x1 transparent PNG.
|
||||
const b64 = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
|
||||
const out = `/tmp/browse-out-png-${Date.now()}.png`;
|
||||
try {
|
||||
const result = await handleReadCommand('js', [`'data:image/png;base64,' + '${b64}'`, '--out', out], bm);
|
||||
const buf = fs.readFileSync(out);
|
||||
// PNG magic bytes: 89 50 4E 47
|
||||
expect([buf[0], buf[1], buf[2], buf[3]]).toEqual([0x89, 0x50, 0x4e, 0x47]);
|
||||
const expectedLen = Buffer.from(b64, 'base64').length;
|
||||
expect(buf.length).toBe(expectedLen);
|
||||
expect(result).toContain(`(${expectedLen} bytes)`);
|
||||
} finally {
|
||||
fs.rmSync(out, { force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('js --out --raw writes the literal data-URL string (no decode)', async () => {
|
||||
const dataUrl = 'data:text/plain;base64,aGVsbG8=';
|
||||
const out = `/tmp/browse-out-raw-${Date.now()}.txt`;
|
||||
try {
|
||||
await handleReadCommand('js', [`'${dataUrl}'`, '--out', out, '--raw'], bm);
|
||||
expect(fs.readFileSync(out, 'utf-8')).toBe(dataUrl);
|
||||
} finally {
|
||||
fs.rmSync(out, { force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('js --out throws on a malformed base64 data URL instead of writing corrupt bytes', async () => {
|
||||
const out = `/tmp/browse-out-bad-${Date.now()}.png`;
|
||||
try {
|
||||
await expect(
|
||||
handleReadCommand('js', ["'data:image/png;base64,!!!not-base64!!!'", '--out', out], bm)
|
||||
).rejects.toThrow(/malformed base64/);
|
||||
expect(fs.existsSync(out)).toBe(false);
|
||||
} finally {
|
||||
fs.rmSync(out, { force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('js --out rejects a path outside the safe directories', async () => {
|
||||
await expect(
|
||||
handleReadCommand('js', ['1 + 1', '--out', '/etc/browse-should-not-write.txt'], bm)
|
||||
).rejects.toThrow();
|
||||
});
|
||||
|
||||
test('js --out creates a missing parent directory', async () => {
|
||||
// validateOutputPath resolves the parent's realpath, so it permits one level
|
||||
// of missing dir under a safe root (/tmp). mkdir then materializes it.
|
||||
const root = `/tmp/browse-out-nested-${Date.now()}`;
|
||||
const out = `${root}/result.txt`;
|
||||
try {
|
||||
await handleReadCommand('js', ["'nested'", '--out', out], bm);
|
||||
expect(fs.readFileSync(out, 'utf-8')).toBe('nested');
|
||||
} finally {
|
||||
fs.rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('eval --out writes the file result to disk (parity with js)', async () => {
|
||||
const script = `/tmp/browse-eval-out-src-${Date.now()}.js`;
|
||||
const out = `/tmp/browse-eval-out-${Date.now()}.txt`;
|
||||
fs.writeFileSync(script, "'from eval'");
|
||||
try {
|
||||
const result = await handleReadCommand('eval', [script, '--out', out], bm);
|
||||
expect(result).toContain('Eval result written:');
|
||||
expect(fs.readFileSync(out, 'utf-8')).toBe('from eval');
|
||||
} finally {
|
||||
fs.rmSync(script, { force: true });
|
||||
fs.rmSync(out, { force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('css returns computed property', async () => {
|
||||
const result = await handleReadCommand('css', ['h1', 'color'], bm);
|
||||
// Navy color
|
||||
|
||||
@@ -47,4 +47,15 @@ describe('locateBinary', () => {
|
||||
expect(typeof locateBinary).toBe('function');
|
||||
expect(locateBinary.length).toBe(0);
|
||||
});
|
||||
|
||||
test('source-checkout fallback resolves <git-root>/browse/dist/browse[.exe]', () => {
|
||||
// The windows-setup-e2e.yml workflow builds binaries directly under
|
||||
// browse/dist/ (no .claude/skills/gstack/ install layout). find-browse
|
||||
// must resolve those — otherwise every fresh build that hasn't run
|
||||
// ./setup yet looks broken. Static pin so a future refactor that
|
||||
// drops the source-checkout branch trips this test.
|
||||
const src = require('fs').readFileSync(require('path').join(__dirname, '../src/find-browse.ts'), 'utf-8');
|
||||
expect(src).toContain('Source-checkout fallback');
|
||||
expect(src).toContain("join(root, 'browse', 'dist', 'browse')");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as net from 'net';
|
||||
import * as path from 'path';
|
||||
import { __testInternals__ } from '../src/server';
|
||||
|
||||
const polyfillPath = path.resolve(import.meta.dir, '../src/bun-polyfill.cjs');
|
||||
|
||||
@@ -28,6 +29,47 @@ function getFreePort(): Promise<number> {
|
||||
}
|
||||
|
||||
describe('findPort / isPortAvailable', () => {
|
||||
test('explicit BROWSE_PORT diagnostic distinguishes bind denial from occupied port', () => {
|
||||
const blocked = __testInternals__.formatExplicitPortUnavailableError(34567, {
|
||||
available: false,
|
||||
code: 'EPERM',
|
||||
message: 'operation not permitted',
|
||||
}).message;
|
||||
|
||||
expect(blocked).toContain('Cannot bind BROWSE_PORT=34567');
|
||||
expect(blocked).toContain('localhost port binding is blocked');
|
||||
expect(blocked).toContain('not that the port is occupied');
|
||||
|
||||
const occupied = __testInternals__.formatExplicitPortUnavailableError(34567, {
|
||||
available: false,
|
||||
code: 'EADDRINUSE',
|
||||
message: 'address already in use',
|
||||
}).message;
|
||||
|
||||
expect(occupied).toBe('[browse] Port 34567 (from BROWSE_PORT env) is in use');
|
||||
});
|
||||
|
||||
test('random port diagnostic calls out sandbox-style bind denial', () => {
|
||||
const message = __testInternals__.formatRandomPortUnavailableError([
|
||||
{ port: 11001, result: { available: false, code: 'EADDRINUSE', message: 'address already in use' } },
|
||||
{ port: 12002, result: { available: false, code: 'EPERM', message: 'operation not permitted' } },
|
||||
]).message;
|
||||
|
||||
expect(message).toContain('Cannot bind localhost ports after 2 attempts');
|
||||
expect(message).toContain('Last error: 12002 (EPERM: operation not permitted)');
|
||||
expect(message).toContain('not that every sampled port is occupied');
|
||||
expect(message).toContain('set BROWSE_PORT to an approved port');
|
||||
});
|
||||
|
||||
test('random port diagnostic preserves old busy-port meaning when all attempts are occupied', () => {
|
||||
const message = __testInternals__.formatRandomPortUnavailableError([
|
||||
{ port: 11001, result: { available: false, code: 'EADDRINUSE', message: 'address already in use' } },
|
||||
{ port: 12002, result: { available: false, code: 'EADDRINUSE', message: 'address already in use' } },
|
||||
]).message;
|
||||
|
||||
expect(message).toContain('No available port after 5 attempts');
|
||||
expect(message).toContain('every sampled port was already in use');
|
||||
});
|
||||
|
||||
test('isPortAvailable returns true for a free port', async () => {
|
||||
// Use the same isPortAvailable logic from server.ts
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user