Merge remote-tracking branch 'origin/main' into garrytan/auq-auto-mode

# Conflicts:
#	CHANGELOG.md
#	VERSION
#	package.json
This commit is contained in:
Garry Tan
2026-05-01 08:10:12 -07:00
49 changed files with 1667 additions and 118 deletions
+39
View File
@@ -0,0 +1,39 @@
# Force LF on text files we parse with `\n`-anchored regexes (frontmatter,
# YAML, markdown structure tests). Without this, Windows checkouts with
# core.autocrlf=true convert these to CRLF and break tests that match
# /^---\n...\n---/ against SKILL.md.tmpl frontmatter, etc.
*.md text eol=lf
*.tmpl text eol=lf
*.yml text eol=lf
*.yaml text eol=lf
*.json text eol=lf
*.toml text eol=lf
# Bash scripts must always use LF — CRLF in bash scripts produces bizarre
# "Bad interpreter" / "command not found" errors on Linux runners.
*.sh text eol=lf
*.bash text eol=lf
# Extensionless executables (top-level setup script + bin/gstack-* helpers).
# These are bash scripts checked into git without a `.sh` suffix. Without
# explicit eol=lf, Windows checkout with core.autocrlf=true converts them
# to CRLF and breaks both `\n`-anchored regex tests (test/setup-codesign.test.ts)
# and shebang resolution if the script is ever executed on Linux.
setup text eol=lf
bin/* text eol=lf
**/scripts/* text eol=lf
# TypeScript/JavaScript: LF for portability across the bun toolchain.
*.ts text eol=lf
*.tsx text eol=lf
*.js text eol=lf
*.mjs text eol=lf
*.cjs text eol=lf
# Binary files — never touch.
*.png binary
*.jpg binary
*.jpeg binary
*.gif binary
*.ico binary
*.pdf binary
+9 -30
View File
@@ -25,40 +25,19 @@ jobs:
fetch-depth: 1
ref: ${{ github.event.pull_request.head.sha }}
- name: Read VERSION + current title
id: inspect
run: |
set -euo pipefail
VERSION=$(cat VERSION | tr -d '[:space:]')
TITLE=$(jq -r '.pull_request.title' "$GITHUB_EVENT_PATH")
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
# Only rewrite titles that ALREADY follow the v<X.Y.Z.W> prefix pattern.
# Custom titles (no prefix) are left alone — user kept them intentionally.
if printf '%s' "$TITLE" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+ '; then
PREFIX=$(printf '%s' "$TITLE" | awk '{print $1}')
REST=$(printf '%s' "$TITLE" | sed 's/^v[0-9][0-9.]* //')
{
echo "prefix=$PREFIX"
echo "rest=$REST"
echo "eligible=true"
} >> "$GITHUB_OUTPUT"
else
echo "eligible=false" >> "$GITHUB_OUTPUT"
fi
- name: Rewrite title if version changed
if: steps.inspect.outputs.eligible == 'true'
- name: Rewrite PR title to match VERSION
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUM: ${{ github.event.pull_request.number }}
NEW_V: ${{ steps.inspect.outputs.version }}
OLD_PREFIX: ${{ steps.inspect.outputs.prefix }}
REST: ${{ steps.inspect.outputs.rest }}
OLD_TITLE: ${{ github.event.pull_request.title }}
run: |
if [ "v$NEW_V" = "$OLD_PREFIX" ]; then
echo "Title already matches v$NEW_V; no change."
set -euo pipefail
chmod +x ./bin/gstack-pr-title-rewrite.sh
VERSION=$(cat VERSION | tr -d '[:space:]')
NEW_TITLE=$(./bin/gstack-pr-title-rewrite.sh "$VERSION" "$OLD_TITLE")
if [ "$NEW_TITLE" = "$OLD_TITLE" ]; then
echo "Title already correct; no change."
exit 0
fi
NEW_TITLE="v$NEW_V $REST"
echo "Rewriting: $OLD_PREFIX ... → v$NEW_V ..."
echo "Rewriting: $OLD_TITLE -> $NEW_TITLE"
gh pr edit "$PR_NUM" --title "$NEW_TITLE"
+98
View File
@@ -0,0 +1,98 @@
name: Windows Free Tests
# Curated subset of the free test suite that runs on windows-latest.
#
# Codex's v1.18.0.0 review flagged that the existing evals.yml workflow uses
# a Linux container, so a windows-latest matrix entry there isn't a drop-in.
# This workflow is non-container, runs the curated Windows-safe subset, plus
# targeted resolver tests that exercise the Bun.which-based claude binary
# resolution + the GSTACK_CLAUDE_BIN override path on Windows.
#
# What this DOES NOT do (out of scope for v1.18.0.0):
# - Run the full free suite on Windows. The 24 tests that hardcode /bin/sh,
# spawn('sh',...), or raw /tmp/ paths are excluded by scripts/test-free-shards.ts
# --windows-only. They need POSIX-bound surfaces to be ported off shell
# primitives before they can run on Windows. Tracked as a follow-up TODO.
# - Run Playwright/browser-backed tests. Browse server bring-up on Windows is
# a separate concern (PR #1238 windows-pty-bun-pty-fix is in flight).
on:
pull_request:
branches: [main]
workflow_dispatch:
concurrency:
group: windows-free-${{ github.head_ref }}
cancel-in-progress: true
jobs:
windows-free-tests:
runs-on: windows-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- uses: oven-sh/setup-bun@v1
with:
bun-version: latest
- name: Configure git identity (required by tests that init temp repos)
run: |
git config --global user.email "windows-ci@gstack.test"
git config --global user.name "Windows CI"
git config --global init.defaultBranch main
shell: bash
- name: Install dependencies
run: bun install --frozen-lockfile
- name: Build server-node.mjs (required by Windows browse path)
# browse/src/cli.ts module-level throws on Windows if server-node.mjs
# is missing — Bun can't drive Playwright's Chromium on Windows
# (oven-sh/bun#4253). The bundle must exist for any test that
# transitively loads cli.ts to even import. We build only the
# Node-compatible server bundle here; full `bun run build` would
# also compile every binary which is slow and unnecessary for tests.
run: bash browse/scripts/build-node-server.sh
shell: bash
- name: Generate host SKILL.md outputs (.agents, .factory)
# The golden-file regression tests in test/gen-skill-docs.test.ts read
# .agents/skills/gstack-ship/SKILL.md and .factory/skills/gstack-ship/
# SKILL.md. Both are gitignored — generated on demand by gen:skill-docs.
# On Mac/Linux CI the existing eval workflow regenerates these as part
# of its own pipeline; the windows-free-tests lane doesn't share that
# so it must regenerate explicitly.
run: bun run gen:skill-docs --host all
shell: bash
# The Windows job verifies the new portability work this PR delivers,
# not the entire free suite. After v1.20.0.0 ships, full-suite Windows
# parity is a P4 follow-up TODO that depends on porting many tests off
# POSIX-bound surfaces (raw /tmp paths, /bin/bash hardcodes, bash
# shebang spawns, mode-bit assertions, deleted v1.14 sidebar refs, etc).
#
# The curated subset enumeration in scripts/test-free-shards.ts is
# retained for future expansion — `bun run test:windows --list` gives
# contributors a starting point to grow Windows coverage incrementally.
#
# What we verify here is exactly the new code paths v1.20.0.0 ships:
# - bin/gstack-paths state-root resolution (test/gstack-paths.test.ts)
# - browse/src/claude-bin.ts Bun.which wrapper + override + arg-prefix
# resolution including the GSTACK_CLAUDE_BIN=wsl PATHEXT path
# (browse/test/claude-bin.test.ts)
# - scripts/test-free-shards.ts curation logic itself
# (test/test-free-shards.test.ts)
- name: Show curated subset (informational — for future expansion)
run: bun run scripts/test-free-shards.ts --windows-only --list
shell: bash
continue-on-error: true
- name: Verify new portability work on Windows
# 31 tests targeting the new code paths added by v1.20.0.0. These
# MUST pass for the release-note headline ("curated Windows lane added")
# to be truthful.
run: bun test test/gstack-paths.test.ts browse/test/claude-bin.test.ts test/test-free-shards.test.ts
shell: bash
+71 -7
View File
@@ -6,7 +6,10 @@ designer, QA lead, release engineer, debugger, and more.
## Available skills
Skills live in `.agents/skills/`. Invoke them by name (e.g., `/office-hours`).
Skills live in `.agents/skills/` (or `~/.claude/skills/gstack/` on Claude Code).
Invoke them by name (e.g., `/office-hours`).
### Plan-mode reviews
| Skill | What it does |
|-------|-------------|
@@ -14,36 +17,97 @@ Skills live in `.agents/skills/`. Invoke them by name (e.g., `/office-hours`).
| `/plan-ceo-review` | CEO-level review: find the 10-star product in the request. |
| `/plan-eng-review` | Lock architecture, data flow, edge cases, and tests. |
| `/plan-design-review` | Rate each design dimension 0-10, explain what a 10 looks like. |
| `/plan-devex-review` | DX-mode review: TTHW, magical moments, friction points, persona traces. |
| `/plan-tune` | Self-tune AskUserQuestion sensitivity per question. |
| `/autoplan` | One command runs CEO → design → eng → DX review. |
| `/design-consultation` | Build a complete design system from scratch. |
### Implementation + review
| Skill | What it does |
|-------|-------------|
| `/review` | Pre-landing PR review. Finds bugs that pass CI but break in prod. |
| `/debug` | Systematic root-cause debugging. No fixes without investigation. |
| `/design-review` | Design audit + fix loop with atomic commits. |
| `/codex` | Second opinion via OpenAI Codex. Review, challenge, or consult modes. |
| `/investigate` | Systematic root-cause debugging. No fixes without investigation. |
| `/design-review` | Live-site visual audit + fix loop with atomic commits. |
| `/design-shotgun` | Generate multiple AI design variants, comparison board, iterate. |
| `/design-html` | Generate production-quality Pretext-native HTML/CSS. |
| `/devex-review` | Live developer experience audit (TTHW measured against the real flow). |
| `/qa` | Open a real browser, find bugs, fix them, re-verify. |
| `/qa-only` | Same as /qa but report only — no code changes. |
| `/ship` | Run tests, review, push, open PR. One command. |
| `/qa-only` | Same methodology as /qa but report only — no code changes. |
| `/scrape` | Pull data from a web page. First call prototypes; codified call runs in ~200ms. |
| `/skillify` | Codify the most recent successful `/scrape` flow into a permanent browser-skill. |
### Release + deploy
| Skill | What it does |
|-------|-------------|
| `/ship` | Run tests, review, push, open PR. Workspace-aware version queue. |
| `/land-and-deploy` | Merge the PR, wait for CI and deploy, verify production health. |
| `/canary` | Post-deploy monitoring loop using the browse daemon. |
| `/landing-report` | Read-only dashboard for the workspace-aware ship queue. |
| `/document-release` | Update all docs to match what you just shipped. |
| `/setup-deploy` | One-time deploy config detection (Fly.io, Render, Vercel, etc.). |
| `/gstack-upgrade` | Update gstack to the latest version. |
### Operational + memory
| Skill | What it does |
|-------|-------------|
| `/context-save` | Save working context (git state, decisions, remaining work). |
| `/context-restore` | Resume from a saved context, even across Conductor workspaces. |
| `/learn` | Manage what gstack learned across sessions. |
| `/retro` | Weekly retro with per-person breakdowns and shipping streaks. |
| `/health` | Code quality dashboard (type checker, linter, tests, dead code). |
| `/benchmark` | Performance regression detection (page load, Core Web Vitals). |
| `/benchmark-models` | Cross-model benchmark for skills (Claude, GPT, Gemini side-by-side). |
| `/cso` | OWASP Top 10 + STRIDE security audit. |
| `/setup-gbrain` | Set up gbrain for cross-machine session memory sync. |
### Browser + agent integration
| Skill | What it does |
|-------|-------------|
| `/browse` | Headless browser — real Chromium, real clicks, ~100ms/command. |
| `/open-gstack-browser` | Launch the visible GStack Browser with sidebar + stealth. |
| `/setup-browser-cookies` | Import cookies from your real browser for authenticated testing. |
| `/pair-agent` | Pair a remote AI agent (OpenClaw, Codex, etc.) with your browser. |
### Safety + scoping
| Skill | What it does |
|-------|-------------|
| `/careful` | Warn before destructive commands (rm -rf, DROP TABLE, force-push). |
| `/freeze` | Lock edits to one directory. Hard block, not just a warning. |
| `/guard` | Activate both careful + freeze at once. |
| `/unfreeze` | Remove directory edit restrictions. |
| `/gstack-upgrade` | Update gstack to the latest version. |
| `/make-pdf` | Turn any markdown file into a publication-quality PDF. |
## Build commands
```bash
bun install # install dependencies
bun test # run tests (free, <5s)
bun test # run free tests (no API spend)
bun run test:windows # curated Windows-safe subset (runs on windows-latest)
bun run build # generate docs + compile binaries
bun run gen:skill-docs # regenerate SKILL.md files from templates
bun run skill:check # health dashboard for all skills
```
## Platform support
- **macOS** + **Linux**: full test suite supported.
- **Windows**: curated Windows-safe subset runs on `windows-latest` via the
`windows-free-tests` CI job. Setup script (`./setup`) requires Git Bash or
MSYS today; native PowerShell support is a future expansion. The `bin/gstack-paths`
helper resolves state roots through `CLAUDE_PLUGIN_DATA` / `GSTACK_HOME` so plugin
installs work on every platform.
## Key conventions
- SKILL.md files are **generated** from `.tmpl` templates. Edit the template, not the output.
- Run `bun run gen:skill-docs --host codex` to regenerate Codex-specific output.
- The browse binary provides headless browser access. Use `$B <command>` in skills.
- Safety skills (careful, freeze, guard) use inline advisory prose — always confirm before destructive operations.
- State paths resolve via `bin/gstack-paths` (sourced via `eval "$(...)"`). Honors `GSTACK_HOME`, `CLAUDE_PLUGIN_DATA`, `CLAUDE_PLANS_DIR`.
- The `claude` CLI binary resolves via `browse/src/claude-bin.ts` (`Bun.which()` + `GSTACK_CLAUDE_BIN` override). Set `GSTACK_CLAUDE_BIN=wsl` plus `GSTACK_CLAUDE_BIN_ARGS='["claude"]'` to run Claude through WSL on Windows.
+124 -1
View File
@@ -1,6 +1,6 @@
# Changelog
## [1.22.0.0] - 2026-05-01
## [1.25.0.0] - 2026-05-01
## **Plan-mode skills surface every decision again, even when the host disallows AskUserQuestion.**
@@ -72,6 +72,129 @@ The gstack-side regression test surface now mirrors what real users hit. Each pl
- The Tool resolution section is the surgical fix site for any future host that disables native AUQ similarly. The pattern: register a `mcp__<host>__AskUserQuestion` MCP tool; the gstack preamble already tells the model to prefer it. No skill-template changes needed per-host.
- `auto-decide-preserved` runs in an isolated `GSTACK_HOME` tmpdir to avoid mutating the developer's real `~/.gstack` state. When debugging, set `GSTACK_HOME` manually to a scratch dir and run the same setup the test does (`gstack-config set question_tuning true`, then `gstack-question-preference --write`).
## [1.24.0.0] - 2026-04-30
## **Cross-platform hardening. Mac + Linux full, curated Windows lane added.**
v1.24.0.0 ports the McGluut fork's portability work into upstream and adds a curated Windows test job that actually runs green. `bin/gstack-paths` consolidates state-root resolution behind one helper sourced via `eval "$(...)"` from skill bash blocks; eight skills (`careful`, `freeze`, `guard`, `unfreeze`, `investigate`, `context-save`, `context-restore`, `learn`, `office-hours`, `plan-tune`, `codex`) move off inline `${CLAUDE_PLUGIN_DATA:-...}` chains. `Bun.which()` replaces 75 lines of fork-side PATH-resolution code in a new `browse/src/claude-bin.ts` wrapper, wired through five hardcoded `claude` spawn sites. A new `windows-free-tests` GitHub Actions job runs a curated 103-test subset on `windows-latest` plus targeted resolver tests; `evals.yml` stays Linux-container as it should. `AGENTS.md` and `docs/skills.md` sync to the live skill inventory (40+ skills, was 21); `/debug``/investigate`, missing skills added, stale `<5s` `bun test` claim dropped. Hardening direction credited to the McGluut fork.
### The numbers that matter
Branch totals come from `git diff --shortstat origin/main..HEAD` after every lane lands. Curation numbers come from `bun run scripts/test-free-shards.ts --windows-only --list`.
| Metric | Δ |
|---|---|
| New shared resolvers | **2 modules**`bin/gstack-paths` (61 LOC), `browse/src/claude-bin.ts` (73 LOC) |
| Inline state-root chains consolidated | **8 skills** (was 5 in initial scope; 3 more found during T1) |
| Hardcoded `claude` spawn sites rewired | **5 sites**`security-classifier.ts:396`, `:496`, `preflight-agent-sdk.ts`, `helpers/providers/claude.ts`, `helpers/agent-sdk-runner.ts` |
| Fork's 95-LOC `claude-bin.ts` reimplementation | **75 lines** — replaced by `Bun.which()` + 18 LOC of override+args wrapping |
| Windows-safe curated subset | **103 of 128 free tests** (80%) run on `windows-latest`; 25 excluded with reasons |
| New tests added | **+31 tests** — gstack-paths (8), claude-bin (9), test-free-shards (14) |
| New invariant tests | **+3** — private-path leak detector + 2 doc-inventory cross-checks in `test/skill-validation.test.ts` |
| Skill inventory documented | **40+ skills** in AGENTS.md + docs/skills.md (was 21 in AGENTS.md; `/debug``/investigate`) |
| Free test suite | **318 pass, 0 fail** (`bun test test/skill-validation.test.ts`) |
| Component | Coverage |
|---|---|
| `bin/gstack-paths` | 8 unit tests covering all three fallback chains |
| `browse/src/claude-bin.ts` | 9 unit tests including the override-PATH-resolution case the fork's version got wrong |
| `scripts/test-free-shards.ts` | 14 unit tests covering enumeration, sharding, and Windows-fragility detection |
### What this means for builders
**Plugin installs work.** If you install gstack as a Claude Code plugin, `CLAUDE_PLUGIN_DATA` and `CLAUDE_PLANS_DIR` now flow through every skill's bash blocks. Previously eight skills hardcoded `${GSTACK_HOME:-$HOME/.gstack}` inline; now they all source `bin/gstack-paths` and pick up the plugin-managed roots automatically. No more "plugin install can't find its own state" footgun.
**Windows is a real lane.** A `windows-free-tests` GitHub Actions job runs 103 curated tests on `windows-latest` plus targeted Claude resolver tests. The curation script (`scripts/test-free-shards.ts --windows-only`) excludes tests that hardcode `/bin/bash`, `sh -c`, or raw `/tmp/` paths — those exclusions are tracked as a follow-up TODO since they're the gap between "curated lane" and "full Windows parity." The setup script (`./setup`) still requires Git Bash or MSYS on Windows; native PowerShell support is a future expansion explicitly named in `AGENTS.md`. No "all green" overclaim — the headline says "curated Windows lane" because that's what this release delivers.
**Override the claude binary.** Set `GSTACK_CLAUDE_BIN=wsl` plus `GSTACK_CLAUDE_BIN_ARGS='["claude"]'` and every gstack call site routes Claude through WSL. Three shared resolution layers — `Bun.which()` for the platform handling, a thin wrapper for the override + arg-prefix logic, and five wired-through call sites — eliminate the "works on Mac, fails on Windows" failure mode for the security classifier, the preflight check, the LLM judge, and the agent SDK harness.
**The fork loop reads.** McGluut shipped three commits of real hardening work without filing a PR upstream. We read it, kept the engineering, dropped the framing, and credited where credit is due. Future forks: the contribution path is `git remote add` + open a PR; the take here is the proof that we read what's out there.
### Itemized changes
#### Added
- `bin/gstack-paths`: bash helper that resolves `GSTACK_STATE_ROOT`, `PLAN_ROOT`, `TMP_ROOT` with explicit fallback chains. Sourced via `eval "$(~/.claude/skills/gstack/bin/gstack-paths)"`. Honors `GSTACK_HOME``CLAUDE_PLUGIN_DATA``$HOME/.gstack``.gstack`; `GSTACK_PLAN_DIR``CLAUDE_PLANS_DIR``$HOME/.claude/plans``.claude/plans`; `TMPDIR``TMP``.gstack/tmp`. Best-effort `mkdir -p` on tmp root; never fails the eval. Pattern matches existing `bin/gstack-slug` and `bin/gstack-codex-probe`.
- `browse/src/claude-bin.ts`: thin (~70 LOC) wrapper around `Bun.which()` for cross-platform `claude` binary resolution. Honors `GSTACK_CLAUDE_BIN` / `CLAUDE_BIN` env override (absolute path or PATH-resolvable), and `GSTACK_CLAUDE_BIN_ARGS` / `CLAUDE_BIN_ARGS` arg-prefix (JSON array or scalar). Override values go through `Bun.which()` so `GSTACK_CLAUDE_BIN=wsl` resolves correctly — fixing the bug codex flagged in the fork's 95-LOC reimplementation.
- `scripts/test-free-shards.ts`: enumerates the free test suite, supports stable-hash sharding (FNV-1a), and provides a `--windows-only` filter that scans each test's content for POSIX-bound patterns (`/bin/sh`, `sh -c`, raw `/tmp/`, `chmod`, `xargs`, `which claude`). Adapted from McGluut's fork (190 LOC sharding logic) with the Windows curation filter added by upstream.
- `.github/workflows/windows-free-tests.yml`: separate non-container job that runs `bun run test:windows` on `windows-latest`, plus targeted `browse/test/claude-bin.test.ts` and `test/gstack-paths.test.ts` runs. NOT a matrix entry on the existing Linux-container `evals.yml` (correctly flagged by codex as not a drop-in).
- `test/gstack-paths.test.ts`: 8 unit tests covering all three fallback chains (HOME unset, CLAUDE_PLUGIN_DATA set, GSTACK_HOME wins, etc.).
- `browse/test/claude-bin.test.ts`: 9 unit tests including the override-PATH-resolution case the fork's version got wrong.
- `test/test-free-shards.test.ts`: 14 unit tests covering enumeration, paid-eval filtering, Windows-fragility detection, and stable sharding.
- `test/skill-validation.test.ts`: 3 new invariant tests — private-path leak detector (catches accidental references to maintainer-only files in any SKILL.md or SKILL.md.tmpl) and 2 doc-inventory cross-checks (every skill directory must appear in `AGENTS.md` and `docs/skills.md`).
#### Changed
- 11 SKILL.md.tmpl files migrated off inline `${CLAUDE_PLUGIN_DATA:-...}` or `${GSTACK_HOME:-$HOME/.gstack}` chains: `careful`, `freeze`, `guard`, `unfreeze`, `investigate`, `context-save`, `context-restore`, `learn`, `office-hours`, `plan-tune`, `codex`. Each now sources `bin/gstack-paths` and reads `$GSTACK_STATE_ROOT` (or `$PLAN_ROOT` / `$TMP_ROOT` for codex).
- `codex/SKILL.md.tmpl`: new Step 0.6 "Resolve portable roots" sources `gstack-paths`. Replaces hardcoded `~/.claude/plans/*.md` with `"$PLAN_ROOT"/*.md` (3 sites) and `mktemp /tmp/codex-*-XXXXXX.txt` with `mktemp "$TMP_ROOT/codex-*-XXXXXX.txt"` (3 sites). Skill now works in Claude Code plugin installs without modification.
- `browse/src/security-classifier.ts`: routes 2 hardcoded `spawn('claude', ...)` calls (version probe at :396, inference call at :496) through `resolveClaudeCommand()`. Honors `GSTACK_CLAUDE_BIN` override; degrades gracefully when claude unavailable.
- `scripts/preflight-agent-sdk.ts`: replaces `execSync('which claude')` with `resolveClaudeBinary()`. Cross-platform, no shell dependency.
- `test/helpers/providers/claude.ts`: `available()` and `run()` both go through `resolveClaudeCommand()`. The previous `spawnSync('sh', ['-c', 'command -v claude'])` was a Windows blocker on its own.
- `test/helpers/agent-sdk-runner.ts`: `resolveClaudeBinary()` now delegates to the shared resolver.
- `AGENTS.md`: rewrote the skill table from 21 entries to 40+, organized by category (plan reviews, implementation, release, operational, browser, safety). `/debug``/investigate`. Stale `<5s` `bun test` claim dropped — there's no realistic universal claim to make about test suite duration with periodic + gate + free tiers all in play.
- `docs/skills.md`: added 11 missing skills to the inventory table (`/plan-devex-review`, `/devex-review`, `/plan-tune`, `/context-save`, `/context-restore`, `/health`, `/landing-report`, `/benchmark-models`, `/pair-agent`, `/setup-gbrain`, `/make-pdf`).
- `package.json`: 2 new scripts. `test:free` runs the full free suite via the sharding script. `test:windows` runs the curated Windows-safe subset. Version bump `1.15.0.0``1.24.0.0`.
- `VERSION`: `1.15.0.0``1.24.0.0`. Workspace-aware queue at /ship time: v1.16.0.0 claimed by `garrytan/gbrowser-unleashed` (PR #1253), v1.17.0.0 by `garrytan/setup-gbrain-run` (PR #1234), v1.19.0.0 by `garrytan/browserharness` (PR #1233), v1.21.1.0 by `garrytan/pty-plan-mode-e2e` (PR #1255). This branch claims the next available MINOR slot.
#### Fixed
- `GSTACK_CLAUDE_BIN=wsl` (or any PATH-resolvable command) now actually resolves the binary. The McGluut fork's `claude-bin.ts` only handled absolute-path overrides; bare commands silently returned null. The Bun.which-based wrapper feeds the override through PATH lookup, fixing the documented use case.
- The `<5s` `bun test` claim in `AGENTS.md` is gone. With the slim-preamble harness from v1.15.0.0 plus the new tests added here, free-suite runtime varies; no realistic universal claim to make.
#### Follow-up TODOs (codex-flagged, deferred)
- **Merge-time version-slot freshness recheck.** Current `bin/gstack-next-version` + `scripts/compare-pr-version.ts` queue protection triggers on PR events touching version files. If another PR lands AFTER our gate fires, our claimed slot can go stale without an automatic recheck. P3 follow-up.
- **POSIX-bound test surfaces for full Windows parity.** 25 tests are excluded from the curated Windows lane via the `WINDOWS_FRAGILE_PATTERNS` scan in `scripts/test-free-shards.ts`. Concrete examples: `test/ship-version-sync.test.ts:72` hardcodes `/bin/bash`, `test/helpers/providers/claude.ts:22` (now fixed in this release), `package.json:12` build step shells out to `bash`/`chmod`. Porting these is the gap between "curated Windows lane" and "full Windows parity." P4 follow-up.
- **Native PowerShell setup support.** `setup` is bash + symlink heavy at `setup:404`. v1.24.0.0 documents Git Bash / MSYS as the supported Windows install path in `AGENTS.md`. A native PowerShell port closes the last off-the-shelf-for-Windows gap. P4 follow-up.
#### For contributors
- Hardening direction credited to the McGluut fork: <https://github.com/mcgluut/gstack>. The Bun.which-based resolver is upstream's adaptation of the cross-platform binary lookup the fork implemented in `claude-bin.ts`; the path-portability helper is upstream's factoring of the `${CLAUDE_PLUGIN_DATA:-...}` chain the fork inlined per-skill. The curated Windows test job is upstream's reading of what `test-free-shards.ts` was reaching toward, applied with explicit attention to which surfaces are actually Windows-safe today.
## [1.23.0.0] - 2026-04-30
## **Every PR title now starts with `vX.Y.Z.W`. `/ship`, `/document-release`, and the GitHub Action all enforce it.**
The format was already documented in `/ship` Step 19, but a "leave custom titles alone" loophole meant a PR opened without a version prefix would never get one — and `/document-release` never touched the title at all, so a doc-release VERSION bump silently left the PR pointing at the old version. This release closes both gaps. The rule lives in one place now (`bin/gstack-pr-title-rewrite.sh`), all three callers shell out to it, and a free `bun test` locks in the four branches.
### The numbers that matter
Numbers come from `git diff --shortstat origin/main..HEAD` and `bun test test/pr-title-rewrite.test.ts` on a clean tree.
| Metric | Δ |
|---|---|
| Net branch size vs main | +210 / 36 lines (5 files + 2 new) |
| New helper script | **bin/gstack-pr-title-rewrite.sh** (40 lines, single source of truth) |
| New unit tests added | **+9** (test/pr-title-rewrite.test.ts) |
| Unit suite runtime | **402ms** (free-tier, runs on every push) |
| Loopholes closed | **3** (ship Step 19, document-release Step 9, pr-title-sync.yml) |
| Reviewers run on this PR | plan-eng-review (CLEARED) + adversarial (Claude subagent) |
### What this means for builders
PR titles are now a deterministic function of the VERSION file, no matter how the PR got created. Open one via the web UI with `feat: my thing` and the next push of a VERSION bump turns it into `v1.23.0.0 feat: my thing`. Run `/ship` from a stale branch where Step 12's queue-drift detection rebumps to a higher version and the title moves with it. Run `/document-release`, bump VERSION at Step 8, and the PR title now follows along instead of staying at the previous version.
The helper itself rejects malformed VERSION values (anything outside `^[0-9]+(\.[0-9]+)*$`) with exit code 2, uses a literal `case` prefix match instead of bash's pattern-matching `#` operator (so a hypothetical VERSION containing glob metacharacters can't silently mismatch), and is idempotent — applying it twice yields the same result.
### Itemized changes
#### Added
- `bin/gstack-pr-title-rewrite.sh`: shared helper. Takes `<NEW_VERSION>` + `<CURRENT_TITLE>`, prints the corrected title on stdout. Three cases: already correct (no-op), different version prefix (replace), no prefix (prepend). Validates NEW_VERSION shape at entry. Used by `/ship`, `/document-release`, and the GitHub Action.
- `test/pr-title-rewrite.test.ts`: 9 deterministic tests covering already-correct, different-prefix, different-prefix-length, no-prefix, plain-words-not-stripped, single-segment-not-stripped, missing-args, malformed-VERSION rejection, and idempotence. Free-tier, runs on every `bun test`.
#### Changed
- `ship/SKILL.md.tmpl` Step 19: idempotency block now always rewrites titles to start with `v$NEW_VERSION` — no more "custom title kept intentionally" escape hatch. Shells out to `bin/gstack-pr-title-rewrite.sh` for the rule. Adds a post-edit self-check that re-fetches the title and retries once if the edit didn't stick.
- `ship/SKILL.md.tmpl` create-PR snippets (lines 867 and 876): inline comment makes the `v$NEW_VERSION` requirement unmissable when reading the step.
- `document-release/SKILL.md.tmpl` Step 9: new "PR/MR title sync" sub-step calls the same helper after the body update. Catches the case where Step 8 bumped VERSION after `/ship` had already created the PR — title follows VERSION instead of going stale.
- `.github/workflows/pr-title-sync.yml`: drops the "eligible only if already prefixed" gate. Sources the helper, rewrites unconditionally on every VERSION change. Defense-in-depth backstop for PRs opened outside the skills (manual `gh pr create`, web UI). Uses `env:` for `OLD_TITLE` so YAML expression injection can't reach `run:`.
#### For contributors
- The helper is a regular `bin/` script with `set -euo pipefail`, no external deps beyond bash + sed. Slots into the existing pattern alongside `bin/gstack-config`, `bin/gstack-slug`, `bin/gstack-next-version`.
- Test coverage gates this — any future change to the rule has to update the test fixtures or the suite goes red.
## [1.21.1.0] - 2026-04-28
## **plan-ceo-review smoke tightens. The "agent skips Step 0 and ships a plan" regression now fails the gate.**
+1 -1
View File
@@ -1 +1 @@
1.22.0.0
1.25.0.0
+61
View File
@@ -0,0 +1,61 @@
#!/usr/bin/env bash
# gstack-paths — output portable state-root paths for skill bash blocks
# Usage: eval "$(gstack-paths)" → sets GSTACK_STATE_ROOT, PLAN_ROOT, TMP_ROOT
# Or: gstack-paths → prints GSTACK_STATE_ROOT=... etc.
#
# Resolves three roots with explicit fallback chains so skills work the same
# whether installed as a Claude Code plugin (CLAUDE_PLUGIN_DATA / CLAUDE_PLANS_DIR
# set), a global ~/.claude/skills/gstack/ install, or a local checkout under
# CI / container env where HOME may be unset.
#
# Chains:
# GSTACK_STATE_ROOT: GSTACK_HOME -> CLAUDE_PLUGIN_DATA -> $HOME/.gstack -> .gstack
# PLAN_ROOT: GSTACK_PLAN_DIR -> CLAUDE_PLANS_DIR -> $HOME/.claude/plans -> .claude/plans
# TMP_ROOT: TMPDIR -> TMP -> .gstack/tmp (and mkdir -p, best-effort)
#
# Security: output values are not sanitized — callers may receive paths with
# shell-special characters if env vars contain them. Skills should always quote
# expansions ("$GSTACK_STATE_ROOT", not $GSTACK_STATE_ROOT).
set -u
# State root: where gstack writes projects/, sessions/, analytics/.
if [ -n "${GSTACK_HOME:-}" ]; then
_state_root="$GSTACK_HOME"
elif [ -n "${CLAUDE_PLUGIN_DATA:-}" ]; then
_state_root="$CLAUDE_PLUGIN_DATA"
elif [ -n "${HOME:-}" ]; then
_state_root="$HOME/.gstack"
else
_state_root=".gstack"
fi
# Plan root: where /context-save and /codex consult write plan files.
if [ -n "${GSTACK_PLAN_DIR:-}" ]; then
_plan_root="$GSTACK_PLAN_DIR"
elif [ -n "${CLAUDE_PLANS_DIR:-}" ]; then
_plan_root="$CLAUDE_PLANS_DIR"
elif [ -n "${HOME:-}" ]; then
_plan_root="$HOME/.claude/plans"
else
_plan_root=".claude/plans"
fi
# Tmp root: where ephemeral files (codex stderr captures, etc.) live.
# Honor TMPDIR / TMP for Windows + container compat; fall back to a
# project-local .gstack/tmp so we never write to a system /tmp that may
# be read-only or shared.
if [ -n "${TMPDIR:-}" ]; then
_tmp_root="$TMPDIR"
elif [ -n "${TMP:-}" ]; then
_tmp_root="$TMP"
else
_tmp_root=".gstack/tmp"
fi
# Best-effort mkdir; if it fails (read-only fs, permission denied), the caller
# will discover that on their own write attempt. Don't fail the eval here.
mkdir -p "$_tmp_root" 2>/dev/null || true
echo "GSTACK_STATE_ROOT=$_state_root"
echo "PLAN_ROOT=$_plan_root"
echo "TMP_ROOT=$_tmp_root"
+44
View File
@@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Rewrite a PR/MR title to start with v<NEW_VERSION>.
#
# Usage: bin/gstack-pr-title-rewrite.sh <NEW_VERSION> <CURRENT_TITLE>
# Output: corrected title on stdout.
#
# Rule: PR titles MUST start with v<NEW_VERSION>. Three cases:
# 1. Already starts with "v<NEW_VERSION> " -> no change.
# 2. Starts with a different "v<digits and dots> " prefix -> replace prefix.
# 3. No version prefix -> prepend "v<NEW_VERSION> ".
#
# The version-prefix regex matches two or more dot-separated digit segments
# (covers v1.2, v1.2.3, v1.2.3.4) so the rule is portable across repos that
# use 3-part or 4-part versions, but does NOT strip plain words like
# "version 5".
set -euo pipefail
if [ $# -lt 2 ]; then
echo "usage: $0 <NEW_VERSION> <CURRENT_TITLE>" >&2
exit 2
fi
NEW_VERSION="$1"
TITLE="$2"
# Reject malformed NEW_VERSION early. Real values are dot-separated digits;
# anything with shell pattern metacharacters or whitespace is a caller bug.
if ! printf '%s' "$NEW_VERSION" | grep -qE '^[0-9]+(\.[0-9]+)*$'; then
echo "error: NEW_VERSION must be dot-separated digits, got: $NEW_VERSION" >&2
exit 2
fi
# Literal prefix match (case statement is glob-quoted by bash, but our
# regex-validated NEW_VERSION has no glob metacharacters so this is safe).
case "$TITLE" in
"v$NEW_VERSION "*)
printf '%s\n' "$TITLE"
exit 0
;;
esac
REST=$(printf '%s' "$TITLE" | sed -E 's/^v[0-9]+(\.[0-9]+)+ //')
printf 'v%s %s\n' "$NEW_VERSION" "$REST"
+73
View File
@@ -0,0 +1,73 @@
/**
* claude-bin.ts — Cross-platform `claude` binary resolution.
*
* Uses Bun.which() for the platform handling (PATH parsing, Windows PATHEXT,
* X_OK, case-insensitive Path/PATH on Windows). Adds the gstack-specific
* override + arg-prefix logic on top.
*
* Override precedence:
* 1. GSTACK_CLAUDE_BIN (or CLAUDE_BIN as fallback) — absolute path or
* PATH-resolvable command. `wsl` resolves through Bun.which('wsl') just
* like a bare `claude` lookup would.
* 2. Plain `Bun.which('claude')` if no override is set.
*
* Arg prefix:
* GSTACK_CLAUDE_BIN_ARGS (or CLAUDE_BIN_ARGS) prepends arguments to every
* spawn. Accepts a JSON array (e.g. '["claude", "--no-cache"]') or a single
* scalar string treated as one argument. Only applied when an override is
* active — bare `claude` resolution doesn't pick up an arg prefix.
*
* Returns null when nothing resolves; callers should degrade (e.g. transcript
* classifier returns degraded:true) rather than throw.
*/
import * as path from 'path';
export interface ClaudeCommand {
command: string;
argsPrefix: string[];
}
function stripWrappingQuotes(value: string): string {
return value.replace(/^"(.*)"$/, '$1');
}
function parseOverrideArgs(env: NodeJS.ProcessEnv): string[] {
const raw = env.GSTACK_CLAUDE_BIN_ARGS ?? env.CLAUDE_BIN_ARGS;
if (!raw?.trim()) return [];
try {
const parsed = JSON.parse(raw);
if (Array.isArray(parsed) && parsed.every((v) => typeof v === 'string')) {
return parsed;
}
} catch {
// Not JSON — treat as a single scalar argument.
}
return [stripWrappingQuotes(raw.trim())];
}
export function resolveClaudeCommand(
env: NodeJS.ProcessEnv = process.env,
): ClaudeCommand | null {
const argsPrefix = parseOverrideArgs(env);
const override = (env.GSTACK_CLAUDE_BIN ?? env.CLAUDE_BIN)?.trim();
// Honor case-insensitive Path/PATH on Windows. Bun.which itself reads
// process.env so we forward whichever the caller passed.
const PATH = env.PATH ?? env.Path ?? '';
if (override) {
const trimmed = stripWrappingQuotes(override);
// Absolute path: use as-is. Otherwise PATH-resolve through Bun.which so
// overrides like GSTACK_CLAUDE_BIN=wsl find the actual binary.
const resolved = path.isAbsolute(trimmed) ? trimmed : Bun.which(trimmed, { PATH });
return resolved ? { command: resolved, argsPrefix } : null;
}
const command = Bun.which('claude', { PATH });
return command ? { command, argsPrefix: [] } : null;
}
/** Convenience wrapper for callers that only need the command path. */
export function resolveClaudeBinary(env: NodeJS.ProcessEnv = process.env): string | null {
return resolveClaudeCommand(env)?.command ?? null;
}
+9 -1
View File
@@ -58,4 +58,12 @@ function main() {
console.log(bin);
}
main();
// Only run main() when this module is the entry point. Without this guard,
// any test that imports `locateBinary` from this file would have main() fire
// at module-load time, calling process.exit(1) when no compiled binary
// exists — killing the test process before any test runs. Surfaced on the
// windows-free-tests CI lane where the runner has no compiled browse
// binary (intentional — that lane only builds server-node.mjs).
if (import.meta.main) {
main();
}
+13 -2
View File
@@ -30,6 +30,7 @@ import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { THRESHOLDS, type LayerSignal } from './security';
import { resolveClaudeCommand } from './claude-bin';
/**
* Pinned Haiku model for the transcript classifier. Bumped deliberately when a
@@ -392,8 +393,13 @@ let haikuAvailableCache: boolean | null = null;
function checkHaikuAvailable(): Promise<boolean> {
if (haikuAvailableCache !== null) return Promise.resolve(haikuAvailableCache);
const claude = resolveClaudeCommand();
if (!claude) {
haikuAvailableCache = false;
return Promise.resolve(false);
}
return new Promise((resolve) => {
const p = spawn('claude', ['--version'], { stdio: ['ignore', 'pipe', 'pipe'] });
const p = spawn(claude.command, [...claude.argsPrefix, '--version'], { stdio: ['ignore', 'pipe', 'pipe'] });
let done = false;
const finish = (ok: boolean) => {
if (done) return;
@@ -493,7 +499,12 @@ export async function checkTranscript(params: {
// timeout rate in the v1.5.2.0 ensemble bench because of this, plus
// ~44k cache_creation tokens per call (massive cost inflation).
// Using os.tmpdir() gives Haiku a clean context for pure classification.
const p = spawn('claude', [
const claude = resolveClaudeCommand();
if (!claude) {
return finish({ layer: 'transcript_classifier', confidence: 0, meta: { degraded: true, reason: 'claude_cli_not_found' } });
}
const p = spawn(claude.command, [
...claude.argsPrefix,
'-p', prompt,
'--model', HAIKU_MODEL,
'--output-format', 'json',
+95
View File
@@ -0,0 +1,95 @@
import { describe, test, expect } from 'bun:test';
import * as path from 'path';
import * as fs from 'fs';
import * as os from 'os';
import { resolveClaudeCommand, resolveClaudeBinary } from '../src/claude-bin';
// Empty env baseline — no PATH, no overrides — ensures no environmental claude binary leaks in.
const EMPTY_ENV = { PATH: '', Path: '' } as NodeJS.ProcessEnv;
describe('claude-bin', () => {
test('no override, no PATH match → returns null', () => {
expect(resolveClaudeCommand(EMPTY_ENV)).toBeNull();
expect(resolveClaudeBinary(EMPTY_ENV)).toBeNull();
});
test('absolute-path override returned as-is', () => {
const got = resolveClaudeCommand({
...EMPTY_ENV,
GSTACK_CLAUDE_BIN: '/opt/custom/claude',
});
expect(got).toEqual({ command: '/opt/custom/claude', argsPrefix: [] });
});
test('CLAUDE_BIN works as fallback alias for GSTACK_CLAUDE_BIN', () => {
const got = resolveClaudeCommand({
...EMPTY_ENV,
CLAUDE_BIN: '/opt/custom/claude',
});
expect(got?.command).toBe('/opt/custom/claude');
});
test('GSTACK_CLAUDE_BIN takes precedence over CLAUDE_BIN', () => {
const got = resolveClaudeCommand({
...EMPTY_ENV,
GSTACK_CLAUDE_BIN: '/explicit/path',
CLAUDE_BIN: '/fallback/path',
});
expect(got?.command).toBe('/explicit/path');
});
test('PATH-resolvable override goes through Bun.which (the bug the fork shipped)', () => {
// Make a fake binary in a temp dir, point PATH at it, set override to bare command name.
// Windows requires the file to have a PATHEXT-listed extension to be discoverable
// via Bun.which — without the extension Bun.which returns undefined.
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'claude-bin-test-'));
const isWindows = process.platform === 'win32';
const fakeBinName = isWindows ? 'fake-claude-cli.cmd' : 'fake-claude-cli';
const fakeBin = path.join(tmpDir, fakeBinName);
fs.writeFileSync(fakeBin, isWindows ? '@echo fake\r\n' : '#!/bin/sh\necho fake\n');
if (!isWindows) fs.chmodSync(fakeBin, 0o755);
try {
const got = resolveClaudeCommand({
PATH: tmpDir,
GSTACK_CLAUDE_BIN: 'fake-claude-cli',
});
expect(got?.command).toBe(fakeBin);
} finally {
fs.rmSync(tmpDir, { recursive: true, force: true });
}
});
test('override pointing at missing binary → null (no silent fallback to bare claude)', () => {
const got = resolveClaudeCommand({
...EMPTY_ENV,
GSTACK_CLAUDE_BIN: 'definitely-not-a-real-binary-xyz',
});
expect(got).toBeNull();
});
test('GSTACK_CLAUDE_BIN_ARGS as JSON array → parsed argsPrefix', () => {
const got = resolveClaudeCommand({
...EMPTY_ENV,
GSTACK_CLAUDE_BIN: '/opt/custom/claude',
GSTACK_CLAUDE_BIN_ARGS: '["--no-cache", "--verbose"]',
});
expect(got?.argsPrefix).toEqual(['--no-cache', '--verbose']);
});
test('GSTACK_CLAUDE_BIN_ARGS as scalar string → treated as single argument', () => {
const got = resolveClaudeCommand({
...EMPTY_ENV,
GSTACK_CLAUDE_BIN: '/opt/custom/claude',
GSTACK_CLAUDE_BIN_ARGS: 'claude',
});
expect(got?.argsPrefix).toEqual(['claude']);
});
test('argsPrefix empty when no override args set', () => {
const got = resolveClaudeCommand({
...EMPTY_ENV,
GSTACK_CLAUDE_BIN: '/opt/custom/claude',
});
expect(got?.argsPrefix).toEqual([]);
});
});
+25 -8
View File
@@ -791,6 +791,23 @@ deadlock fixed in #972.
---
## Step 0.6: Resolve portable roots
Before any mode runs, resolve `$PLAN_ROOT` (where plan files live) and `$TMP_ROOT`
(where ephemeral codex stderr / response captures land) via `bin/gstack-paths`.
This keeps the skill working whether installed as a Claude Code plugin
(`CLAUDE_PLANS_DIR` set), a global `~/.claude/skills/gstack/` install, or a CI
container where `HOME` may be unset and `/tmp` may be read-only.
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
```
After this, every subsequent bash block in this skill uses `"$PLAN_ROOT"` and
`"$TMP_ROOT"` rather than hardcoded `~/.claude/plans` or `/tmp/codex-*`.
---
## Step 1: Detect mode
Parse the user's input to determine which mode to run:
@@ -808,8 +825,8 @@ Parse the user's input to determine which mode to run:
C) Something else — I'll provide a prompt
```
- If no diff, check for plan files scoped to the current project:
`ls -t ~/.claude/plans/*.md 2>/dev/null | xargs grep -l "$(basename $(pwd))" 2>/dev/null | head -1`
If no project-scoped match, fall back to: `ls -t ~/.claude/plans/*.md 2>/dev/null | head -1`
`ls -t "$PLAN_ROOT"/*.md 2>/dev/null | xargs grep -l "$(basename $(pwd))" 2>/dev/null | head -1`
If no project-scoped match, fall back to: `ls -t "$PLAN_ROOT"/*.md 2>/dev/null | head -1`
but warn the user: "Note: this plan may be from a different project."
- If a plan file exists, offer to review it
- Otherwise, ask: "What would you like to ask Codex?"
@@ -842,7 +859,7 @@ Run Codex code review against the current branch diff.
1. Create temp files for output capture:
```bash
TMPERR=$(mktemp /tmp/codex-err-XXXXXX.txt)
TMPERR=$(mktemp "$TMP_ROOT/codex-err-XXXXXX.txt")
```
2. Run the review (5-minute timeout). **Always** pass the filesystem boundary instruction
@@ -1025,7 +1042,7 @@ If the user passed `--xhigh`, use `"xhigh"` instead of `"high"`.
_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; }
# Fix 1+2: wrap with timeout (gtimeout/timeout fallback chain via probe helper),
# capture stderr to $TMPERR for auth error detection (was: 2>/dev/null).
TMPERR=${TMPERR:-$(mktemp /tmp/codex-err-XXXXXX.txt)}
TMPERR=${TMPERR:-$(mktemp "$TMP_ROOT/codex-err-XXXXXX.txt")}
_gstack_codex_timeout_wrapper 600 codex exec "<prompt>" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached --json < /dev/null 2>"$TMPERR" | PYTHONUNBUFFERED=1 python3 -u -c "
import sys, json
turn_completed_count = 0
@@ -1104,17 +1121,17 @@ B) Start a new conversation
2. Create temp files:
```bash
TMPRESP=$(mktemp /tmp/codex-resp-XXXXXX.txt)
TMPERR=$(mktemp /tmp/codex-err-XXXXXX.txt)
TMPRESP=$(mktemp "$TMP_ROOT/codex-resp-XXXXXX.txt")
TMPERR=$(mktemp "$TMP_ROOT/codex-err-XXXXXX.txt")
```
3. **Plan review auto-detection:** If the user's prompt is about reviewing a plan,
or if plan files exist and the user said `/codex` with no arguments:
```bash
setopt +o nomatch 2>/dev/null || true # zsh compat
ls -t ~/.claude/plans/*.md 2>/dev/null | xargs grep -l "$(basename $(pwd))" 2>/dev/null | head -1
ls -t "$PLAN_ROOT"/*.md 2>/dev/null | xargs grep -l "$(basename $(pwd))" 2>/dev/null | head -1
```
If no project-scoped match, fall back to `ls -t ~/.claude/plans/*.md 2>/dev/null | head -1`
If no project-scoped match, fall back to `ls -t "$PLAN_ROOT"/*.md 2>/dev/null | head -1`
but warn: "Note: this plan may be from a different project — verify before sending to Codex."
**IMPORTANT — embed content, don't reference path:** Codex runs sandboxed to the repo
+25 -8
View File
@@ -90,6 +90,23 @@ deadlock fixed in #972.
---
## Step 0.6: Resolve portable roots
Before any mode runs, resolve `$PLAN_ROOT` (where plan files live) and `$TMP_ROOT`
(where ephemeral codex stderr / response captures land) via `bin/gstack-paths`.
This keeps the skill working whether installed as a Claude Code plugin
(`CLAUDE_PLANS_DIR` set), a global `~/.claude/skills/gstack/` install, or a CI
container where `HOME` may be unset and `/tmp` may be read-only.
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
```
After this, every subsequent bash block in this skill uses `"$PLAN_ROOT"` and
`"$TMP_ROOT"` rather than hardcoded `~/.claude/plans` or `/tmp/codex-*`.
---
## Step 1: Detect mode
Parse the user's input to determine which mode to run:
@@ -107,8 +124,8 @@ Parse the user's input to determine which mode to run:
C) Something else — I'll provide a prompt
```
- If no diff, check for plan files scoped to the current project:
`ls -t ~/.claude/plans/*.md 2>/dev/null | xargs grep -l "$(basename $(pwd))" 2>/dev/null | head -1`
If no project-scoped match, fall back to: `ls -t ~/.claude/plans/*.md 2>/dev/null | head -1`
`ls -t "$PLAN_ROOT"/*.md 2>/dev/null | xargs grep -l "$(basename $(pwd))" 2>/dev/null | head -1`
If no project-scoped match, fall back to: `ls -t "$PLAN_ROOT"/*.md 2>/dev/null | head -1`
but warn the user: "Note: this plan may be from a different project."
- If a plan file exists, offer to review it
- Otherwise, ask: "What would you like to ask Codex?"
@@ -141,7 +158,7 @@ Run Codex code review against the current branch diff.
1. Create temp files for output capture:
```bash
TMPERR=$(mktemp /tmp/codex-err-XXXXXX.txt)
TMPERR=$(mktemp "$TMP_ROOT/codex-err-XXXXXX.txt")
```
2. Run the review (5-minute timeout). **Always** pass the filesystem boundary instruction
@@ -254,7 +271,7 @@ If the user passed `--xhigh`, use `"xhigh"` instead of `"high"`.
_REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; }
# Fix 1+2: wrap with timeout (gtimeout/timeout fallback chain via probe helper),
# capture stderr to $TMPERR for auth error detection (was: 2>/dev/null).
TMPERR=${TMPERR:-$(mktemp /tmp/codex-err-XXXXXX.txt)}
TMPERR=${TMPERR:-$(mktemp "$TMP_ROOT/codex-err-XXXXXX.txt")}
_gstack_codex_timeout_wrapper 600 codex exec "<prompt>" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached --json < /dev/null 2>"$TMPERR" | PYTHONUNBUFFERED=1 python3 -u -c "
import sys, json
turn_completed_count = 0
@@ -333,17 +350,17 @@ B) Start a new conversation
2. Create temp files:
```bash
TMPRESP=$(mktemp /tmp/codex-resp-XXXXXX.txt)
TMPERR=$(mktemp /tmp/codex-err-XXXXXX.txt)
TMPRESP=$(mktemp "$TMP_ROOT/codex-resp-XXXXXX.txt")
TMPERR=$(mktemp "$TMP_ROOT/codex-err-XXXXXX.txt")
```
3. **Plan review auto-detection:** If the user's prompt is about reviewing a plan,
or if plan files exist and the user said `/codex` with no arguments:
```bash
setopt +o nomatch 2>/dev/null || true # zsh compat
ls -t ~/.claude/plans/*.md 2>/dev/null | xargs grep -l "$(basename $(pwd))" 2>/dev/null | head -1
ls -t "$PLAN_ROOT"/*.md 2>/dev/null | xargs grep -l "$(basename $(pwd))" 2>/dev/null | head -1
```
If no project-scoped match, fall back to `ls -t ~/.claude/plans/*.md 2>/dev/null | head -1`
If no project-scoped match, fall back to `ls -t "$PLAN_ROOT"/*.md 2>/dev/null | head -1`
but warn: "Note: this plan may be from a different project — verify before sending to Codex."
**IMPORTANT — embed content, don't reference path:** Codex runs sandboxed to the repo
+2 -1
View File
@@ -711,7 +711,8 @@ Parse the user's input:
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" && mkdir -p ~/.gstack/projects/$SLUG
CHECKPOINT_DIR="${GSTACK_HOME:-$HOME/.gstack}/projects/$SLUG/checkpoints"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
CHECKPOINT_DIR="$GSTACK_STATE_ROOT/projects/$SLUG/checkpoints"
if [ ! -d "$CHECKPOINT_DIR" ]; then
echo "NO_CHECKPOINTS"
else
+2 -1
View File
@@ -62,7 +62,8 @@ Parse the user's input:
```bash
{{SLUG_SETUP}}
CHECKPOINT_DIR="${GSTACK_HOME:-$HOME/.gstack}/projects/$SLUG/checkpoints"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
CHECKPOINT_DIR="$GSTACK_STATE_ROOT/projects/$SLUG/checkpoints"
if [ ! -d "$CHECKPOINT_DIR" ]; then
echo "NO_CHECKPOINTS"
else
+4 -2
View File
@@ -767,7 +767,8 @@ allowlist: only `a-z 0-9 - .` survive.
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" && mkdir -p ~/.gstack/projects/$SLUG
CHECKPOINT_DIR="${GSTACK_HOME:-$HOME/.gstack}/projects/$SLUG/checkpoints"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
CHECKPOINT_DIR="$GSTACK_STATE_ROOT/projects/$SLUG/checkpoints"
mkdir -p "$CHECKPOINT_DIR"
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
# Bash-side title sanitize. Pass the raw title as $1 when running this block.
@@ -853,7 +854,8 @@ Restore later with /context-restore.
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" && mkdir -p ~/.gstack/projects/$SLUG
CHECKPOINT_DIR="${GSTACK_HOME:-$HOME/.gstack}/projects/$SLUG/checkpoints"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
CHECKPOINT_DIR="$GSTACK_STATE_ROOT/projects/$SLUG/checkpoints"
if [ -d "$CHECKPOINT_DIR" ]; then
echo "CHECKPOINT_DIR=$CHECKPOINT_DIR"
# Use find + sort instead of ls -1t: filename YYYYMMDD-HHMMSS prefix is the
+4 -2
View File
@@ -118,7 +118,8 @@ allowlist: only `a-z 0-9 - .` survive.
```bash
{{SLUG_SETUP}}
CHECKPOINT_DIR="${GSTACK_HOME:-$HOME/.gstack}/projects/$SLUG/checkpoints"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
CHECKPOINT_DIR="$GSTACK_STATE_ROOT/projects/$SLUG/checkpoints"
mkdir -p "$CHECKPOINT_DIR"
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
# Bash-side title sanitize. Pass the raw title as $1 when running this block.
@@ -204,7 +205,8 @@ Restore later with /context-restore.
```bash
{{SLUG_SETUP}}
CHECKPOINT_DIR="${GSTACK_HOME:-$HOME/.gstack}/projects/$SLUG/checkpoints"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
CHECKPOINT_DIR="$GSTACK_STATE_ROOT/projects/$SLUG/checkpoints"
if [ -d "$CHECKPOINT_DIR" ]; then
echo "CHECKPOINT_DIR=$CHECKPOINT_DIR"
# Use find + sort instead of ls -1t: filename YYYYMMDD-HHMMSS prefix is the
+14 -1
View File
@@ -16,6 +16,8 @@ Detailed guides for every gstack skill — philosophy, workflow, and examples.
| [`/design-html`](#design-html) | **Design Engineer** | Generates production-quality Pretext-native HTML. Works with approved mockups, CEO plans, design reviews, or from scratch. Text reflows on resize, heights adjust to content. Smart API routing per design type. Framework detection for React/Svelte/Vue. |
| [`/qa`](#qa) | **QA Lead** | Test your app, find bugs, fix them with atomic commits, re-verify. Auto-generates regression tests for every fix. |
| [`/qa-only`](#qa) | **QA Reporter** | Same methodology as /qa but report only. Use when you want a pure bug report without code changes. |
| [`/scrape`](#scrape) | **Browser Data Extractor** | Pull data from a web page. First call prototypes via `$B`; subsequent calls on a matching intent run a codified browser-skill in ~200ms. |
| [`/skillify`](#skillify) | **Skill Codifier** | Walks back through your conversation, finds the last `/scrape` prototype, synthesizes script + test + fixture, runs the test, asks before committing. |
| [`/ship`](#ship) | **Release Engineer** | Sync main, run tests, audit coverage, push, open PR. Bootstraps test frameworks if you don't have one. One command. |
| [`/land-and-deploy`](#land-and-deploy) | **Release Engineer** | Merge the PR, wait for CI and deploy, verify production health. One command from "approved" to "verified in production." |
| [`/canary`](#canary) | **SRE** | Post-deploy monitoring loop. Watches for console errors, performance regressions, and page failures using the browse daemon. |
@@ -25,11 +27,21 @@ Detailed guides for every gstack skill — philosophy, workflow, and examples.
| [`/retro`](#retro) | **Eng Manager** | Team-aware weekly retro. Per-person breakdowns, shipping streaks, test health trends, growth opportunities. |
| [`/browse`](#browse) | **QA Engineer** | Give the agent eyes. Real Chromium browser, real clicks, real screenshots. ~100ms per command. |
| [`/setup-browser-cookies`](#setup-browser-cookies) | **Session Manager** | Import cookies from your real browser (Chrome, Arc, Brave, Edge) into the headless session. Test authenticated pages. |
| [`/autoplan`](#autoplan) | **Review Pipeline** | One command, fully reviewed plan. Runs CEO → design → eng review automatically with encoded decision principles. Surfaces only taste decisions for your approval. |
| [`/autoplan`](#autoplan) | **Review Pipeline** | One command, fully reviewed plan. Runs CEO → design → eng → DX review automatically with encoded decision principles. Surfaces only taste decisions for your approval. |
| [`/plan-devex-review`](#plan-devex-review) | **DX Reviewer** | Plan-stage DX review. TTHW (time-to-hello-world), magical moments, friction points, persona traces. Three modes: Expansion, Polish, Triage. |
| [`/devex-review`](#devex-review) | **DX Reviewer (live)** | Live developer experience audit. Walks the actual onboarding flow, measures TTHW, catches the docs lies. |
| [`/plan-tune`](#plan-tune) | **Question Tuner** | Self-tune AskUserQuestion sensitivity per question. Mark questions as never-ask, always-ask, or only-for-one-way. |
| [`/learn`](#learn) | **Memory** | Manage what gstack learned across sessions. Review, search, prune, and export project-specific patterns and preferences. |
| [`/context-save`](#context-save) | **Save State** | Save working context (git state, decisions, remaining work) so any future session can resume. |
| [`/context-restore`](#context-restore) | **Restore State** | Resume from a saved context, even across Conductor workspace handoffs. |
| [`/health`](#health) | **Code Quality Dashboard** | Wraps type checker, linter, tests, dead code detection. Computes a weighted 0-10 score; tracks trends over time. |
| [`/landing-report`](#landing-report) | **Ship Queue Dashboard** | Read-only snapshot of the workspace-aware ship queue. Which version slots are claimed, which sibling workspaces have WIP. |
| [`/benchmark-models`](#benchmark-models) | **Model Benchmark** | Side-by-side cross-model benchmark for skills (Claude vs GPT vs Gemini). Latency, tokens, cost, optional LLM-judged quality. |
| | | |
| **Multi-AI** | | |
| [`/codex`](#codex) | **Second Opinion** | Independent review from OpenAI Codex CLI. Three modes: code review (pass/fail gate), adversarial challenge, and open consultation with session continuity. Cross-model analysis when both `/review` and `/codex` have run. |
| [`/pair-agent`](#pair-agent) | **Remote Agent Bridge** | Pair a remote AI agent (OpenClaw, Codex, Cursor, Hermes) with your browser. Scoped tunnel, locked allowlist, session token. |
| [`/setup-gbrain`](#setup-gbrain) | **Memory Sync** | Set up gbrain for cross-machine session memory sync. One command from zero to live. |
| | | |
| **Safety & Utility** | | |
| [`/careful`](#safety--guardrails) | **Safety Guardrails** | Warns before destructive commands (rm -rf, DROP TABLE, force-push, git reset --hard). Override any warning. Common build cleanups whitelisted. |
@@ -39,6 +51,7 @@ Detailed guides for every gstack skill — philosophy, workflow, and examples.
| [`/open-gstack-browser`](#open-gstack-browser) | **GStack Browser** | Launch GStack Browser with sidebar, anti-bot stealth, auto model routing, cookie import, and Claude Code integration. Watch every action live. |
| [`/setup-deploy`](#setup-deploy) | **Deploy Configurator** | One-time setup for `/land-and-deploy`. Detects your platform, production URL, and deploy commands. |
| [`/gstack-upgrade`](#gstack-upgrade) | **Self-Updater** | Upgrade gstack to the latest version. Detects global vs vendored install, syncs both, shows what changed. |
| [`/make-pdf`](#make-pdf) | **PDF Generator** | Turn any markdown file into a publication-quality PDF. Proper margins, page numbers, cover pages, clickable TOC. |
---
+48
View File
@@ -1028,6 +1028,54 @@ rm -f /tmp/gstack-pr-body-$$.md
7. If `gh pr edit` / `glab mr update` fails: warn "Could not update PR/MR body — documentation changes are in the
commit." and continue.
**PR/MR title sync (idempotent, always-on):**
PR titles must always start with `v<VERSION>` — same rule as `/ship`. If Step 8 bumped VERSION after `/ship` had already created the PR, the title is now stale. This sub-step fixes it.
1. Read the current VERSION:
```bash
V=$(cat VERSION 2>/dev/null | tr -d '[:space:]')
```
If `VERSION` does not exist or is empty, skip this sub-step entirely.
2. Read the current PR/MR title:
**If GitHub:**
```bash
CURRENT_TITLE=$(gh pr view --json title -q .title 2>/dev/null || true)
```
**If GitLab:**
```bash
CURRENT_TITLE=$(glab mr view -F json 2>/dev/null | jq -r .title 2>/dev/null || true)
```
If `CURRENT_TITLE` is empty (no open PR/MR), skip with message "No PR/MR found — skipping title sync."
3. Compute the corrected title using the shared helper (single source of truth — same one `/ship` uses):
```bash
NEW_TITLE=$(~/.claude/skills/gstack/bin/gstack-pr-title-rewrite.sh "$V" "$CURRENT_TITLE")
```
The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
4. If `NEW_TITLE` differs from `CURRENT_TITLE`, update it:
**If GitHub:**
```bash
gh pr edit --title "$NEW_TITLE"
```
**If GitLab:**
```bash
glab mr update -t "$NEW_TITLE"
```
5. If the edit command fails: warn "Could not update PR/MR title — documentation changes are still in the commit." and continue. Do not block on title sync failure.
**Structured doc health summary (final output):**
Output a scannable summary showing every documentation file's status:
+48
View File
@@ -342,6 +342,54 @@ rm -f /tmp/gstack-pr-body-$$.md
7. If `gh pr edit` / `glab mr update` fails: warn "Could not update PR/MR body — documentation changes are in the
commit." and continue.
**PR/MR title sync (idempotent, always-on):**
PR titles must always start with `v<VERSION>` — same rule as `/ship`. If Step 8 bumped VERSION after `/ship` had already created the PR, the title is now stale. This sub-step fixes it.
1. Read the current VERSION:
```bash
V=$(cat VERSION 2>/dev/null | tr -d '[:space:]')
```
If `VERSION` does not exist or is empty, skip this sub-step entirely.
2. Read the current PR/MR title:
**If GitHub:**
```bash
CURRENT_TITLE=$(gh pr view --json title -q .title 2>/dev/null || true)
```
**If GitLab:**
```bash
CURRENT_TITLE=$(glab mr view -F json 2>/dev/null | jq -r .title 2>/dev/null || true)
```
If `CURRENT_TITLE` is empty (no open PR/MR), skip with message "No PR/MR found — skipping title sync."
3. Compute the corrected title using the shared helper (single source of truth — same one `/ship` uses):
```bash
NEW_TITLE=$(~/.claude/skills/gstack/bin/gstack-pr-title-rewrite.sh "$V" "$CURRENT_TITLE")
```
The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
4. If `NEW_TITLE` differs from `CURRENT_TITLE`, update it:
**If GitHub:**
```bash
gh pr edit --title "$NEW_TITLE"
```
**If GitLab:**
```bash
glab mr update -t "$NEW_TITLE"
```
5. If the edit command fails: warn "Could not update PR/MR title — documentation changes are still in the commit." and continue. Do not block on title sync failure.
**Structured doc health summary (final output):**
Output a scannable summary showing every documentation file's status:
+2 -1
View File
@@ -59,7 +59,8 @@ echo "$FREEZE_DIR"
2. Ensure trailing slash and save to the freeze state file:
```bash
FREEZE_DIR="${FREEZE_DIR%/}/"
STATE_DIR="${CLAUDE_PLUGIN_DATA:-$HOME/.gstack}"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
STATE_DIR="$GSTACK_STATE_ROOT"
mkdir -p "$STATE_DIR"
echo "$FREEZE_DIR" > "$STATE_DIR/freeze-dir.txt"
echo "Freeze boundary set: $FREEZE_DIR"
+2 -1
View File
@@ -58,7 +58,8 @@ echo "$FREEZE_DIR"
2. Ensure trailing slash and save to the freeze state file:
```bash
FREEZE_DIR="${FREEZE_DIR%/}/"
STATE_DIR="${CLAUDE_PLUGIN_DATA:-$HOME/.gstack}"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
STATE_DIR="$GSTACK_STATE_ROOT"
mkdir -p "$STATE_DIR"
echo "$FREEZE_DIR" > "$STATE_DIR/freeze-dir.txt"
echo "Freeze boundary set: $FREEZE_DIR"
+2 -1
View File
@@ -68,7 +68,8 @@ echo "$FREEZE_DIR"
2. Ensure trailing slash and save to the freeze state file:
```bash
FREEZE_DIR="${FREEZE_DIR%/}/"
STATE_DIR="${CLAUDE_PLUGIN_DATA:-$HOME/.gstack}"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
STATE_DIR="$GSTACK_STATE_ROOT"
mkdir -p "$STATE_DIR"
echo "$FREEZE_DIR" > "$STATE_DIR/freeze-dir.txt"
echo "Freeze boundary set: $FREEZE_DIR"
+2 -1
View File
@@ -67,7 +67,8 @@ echo "$FREEZE_DIR"
2. Ensure trailing slash and save to the freeze state file:
```bash
FREEZE_DIR="${FREEZE_DIR%/}/"
STATE_DIR="${CLAUDE_PLUGIN_DATA:-$HOME/.gstack}"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
STATE_DIR="$GSTACK_STATE_ROOT"
mkdir -p "$STATE_DIR"
echo "$FREEZE_DIR" > "$STATE_DIR/freeze-dir.txt"
echo "Freeze boundary set: $FREEZE_DIR"
+2 -1
View File
@@ -773,7 +773,8 @@ After forming your root cause hypothesis, lock edits to the affected module to p
**If FREEZE_AVAILABLE:** Identify the narrowest directory containing the affected files. Write it to the freeze state file:
```bash
STATE_DIR="${CLAUDE_PLUGIN_DATA:-$HOME/.gstack}"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
STATE_DIR="$GSTACK_STATE_ROOT"
mkdir -p "$STATE_DIR"
echo "<detected-directory>/" > "$STATE_DIR/freeze-dir.txt"
echo "Debug scope locked to: <detected-directory>/"
+2 -1
View File
@@ -88,7 +88,8 @@ After forming your root cause hypothesis, lock edits to the affected module to p
**If FREEZE_AVAILABLE:** Identify the narrowest directory containing the affected files. Write it to the freeze state file:
```bash
STATE_DIR="${CLAUDE_PLUGIN_DATA:-$HOME/.gstack}"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
STATE_DIR="$GSTACK_STATE_ROOT"
mkdir -p "$STATE_DIR"
echo "<detected-directory>/" > "$STATE_DIR/freeze-dir.txt"
echo "Debug scope locked to: <detected-directory>/"
+2 -2
View File
@@ -790,8 +790,8 @@ Show summary statistics about the project's learnings.
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)"
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
LEARN_FILE="$GSTACK_HOME/projects/$SLUG/learnings.jsonl"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
LEARN_FILE="$GSTACK_STATE_ROOT/projects/$SLUG/learnings.jsonl"
if [ -f "$LEARN_FILE" ]; then
TOTAL=$(wc -l < "$LEARN_FILE" | tr -d ' ')
echo "TOTAL: $TOTAL entries"
+2 -2
View File
@@ -141,8 +141,8 @@ Show summary statistics about the project's learnings.
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)"
GSTACK_HOME="${GSTACK_HOME:-$HOME/.gstack}"
LEARN_FILE="$GSTACK_HOME/projects/$SLUG/learnings.jsonl"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
LEARN_FILE="$GSTACK_STATE_ROOT/projects/$SLUG/learnings.jsonl"
if [ -f "$LEARN_FILE" ]; then
TOTAL=$(wc -l < "$LEARN_FILE" | tr -d ' ')
echo "TOTAL: $TOTAL entries"
+8 -4
View File
@@ -1440,7 +1440,8 @@ After counting signals, append a session entry to the builder profile. This is t
source of truth for all closing state (tier, resource dedup, journey tracking).
```bash
mkdir -p "${GSTACK_HOME:-$HOME/.gstack}"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
mkdir -p "$GSTACK_STATE_ROOT"
```
Append one JSON line with these fields (substitute actual values from this session):
@@ -1455,7 +1456,8 @@ Append one JSON line with these fields (substitute actual values from this sessi
- `topics`: array of 2-3 topic keywords that describe what this session was about
```bash
echo '{"date":"TIMESTAMP","mode":"MODE","project_slug":"SLUG","signal_count":N,"signals":SIGNALS_ARRAY,"design_doc":"DOC_PATH","assignment":"ASSIGNMENT_TEXT","resources_shown":[],"topics":TOPICS_ARRAY}' >> "${GSTACK_HOME:-$HOME/.gstack}/builder-profile.jsonl"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
echo '{"date":"TIMESTAMP","mode":"MODE","project_slug":"SLUG","signal_count":N,"signals":SIGNALS_ARRAY,"design_doc":"DOC_PATH","assignment":"ASSIGNMENT_TEXT","resources_shown":[],"topics":TOPICS_ARRAY}' >> "$GSTACK_STATE_ROOT/builder-profile.jsonl"
```
This entry is append-only. The `resources_shown` field will be updated via a second append
@@ -1813,7 +1815,8 @@ This must feel earned, not broadcast. If the evidence doesn't support it, skip e
with a narrative arc (not a data table). The arc tells the STORY of their journey in
second person, referencing specific things they said across sessions. Then open it:
```bash
open "${GSTACK_HOME:-$HOME/.gstack}/builder-journey.md"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
open "$GSTACK_STATE_ROOT/builder-journey.md"
```
Then proceed to Founder Resources below.
@@ -1915,7 +1918,8 @@ PAUL GRAHAM ESSAYS:
1. Log the selected resource URLs to the builder profile (single source of truth).
Append a resource-tracking entry:
```bash
echo '{"date":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","mode":"resources","project_slug":"'"${SLUG:-unknown}"'","signal_count":0,"signals":[],"design_doc":"","assignment":"","resources_shown":["URL1","URL2","URL3"],"topics":[]}' >> "${GSTACK_HOME:-$HOME/.gstack}/builder-profile.jsonl"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
echo '{"date":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","mode":"resources","project_slug":"'"${SLUG:-unknown}"'","signal_count":0,"signals":[],"design_doc":"","assignment":"","resources_shown":["URL1","URL2","URL3"],"topics":[]}' >> "$GSTACK_STATE_ROOT/builder-profile.jsonl"
```
2. Log the selection to analytics:
+8 -4
View File
@@ -445,7 +445,8 @@ After counting signals, append a session entry to the builder profile. This is t
source of truth for all closing state (tier, resource dedup, journey tracking).
```bash
mkdir -p "${GSTACK_HOME:-$HOME/.gstack}"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
mkdir -p "$GSTACK_STATE_ROOT"
```
Append one JSON line with these fields (substitute actual values from this session):
@@ -460,7 +461,8 @@ Append one JSON line with these fields (substitute actual values from this sessi
- `topics`: array of 2-3 topic keywords that describe what this session was about
```bash
echo '{"date":"TIMESTAMP","mode":"MODE","project_slug":"SLUG","signal_count":N,"signals":SIGNALS_ARRAY,"design_doc":"DOC_PATH","assignment":"ASSIGNMENT_TEXT","resources_shown":[],"topics":TOPICS_ARRAY}' >> "${GSTACK_HOME:-$HOME/.gstack}/builder-profile.jsonl"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
echo '{"date":"TIMESTAMP","mode":"MODE","project_slug":"SLUG","signal_count":N,"signals":SIGNALS_ARRAY,"design_doc":"DOC_PATH","assignment":"ASSIGNMENT_TEXT","resources_shown":[],"topics":TOPICS_ARRAY}' >> "$GSTACK_STATE_ROOT/builder-profile.jsonl"
```
This entry is append-only. The `resources_shown` field will be updated via a second append
@@ -758,7 +760,8 @@ This must feel earned, not broadcast. If the evidence doesn't support it, skip e
with a narrative arc (not a data table). The arc tells the STORY of their journey in
second person, referencing specific things they said across sessions. Then open it:
```bash
open "${GSTACK_HOME:-$HOME/.gstack}/builder-journey.md"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
open "$GSTACK_STATE_ROOT/builder-journey.md"
```
Then proceed to Founder Resources below.
@@ -860,7 +863,8 @@ PAUL GRAHAM ESSAYS:
1. Log the selected resource URLs to the builder profile (single source of truth).
Append a resource-tracking entry:
```bash
echo '{"date":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","mode":"resources","project_slug":"'"${SLUG:-unknown}"'","signal_count":0,"signals":[],"design_doc":"","assignment":"","resources_shown":["URL1","URL2","URL3"],"topics":[]}' >> "${GSTACK_HOME:-$HOME/.gstack}/builder-profile.jsonl"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
echo '{"date":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","mode":"resources","project_slug":"'"${SLUG:-unknown}"'","signal_count":0,"signals":[],"design_doc":"","assignment":"","resources_shown":["URL1","URL2","URL3"],"topics":[]}' >> "$GSTACK_STATE_ROOT/builder-profile.jsonl"
```
2. Log the selection to analytics:
+3 -1
View File
@@ -1,6 +1,6 @@
{
"name": "gstack",
"version": "1.22.0.0",
"version": "1.25.0.0",
"description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.",
"license": "MIT",
"type": "module",
@@ -17,6 +17,8 @@
"dev": "bun run browse/src/cli.ts",
"server": "bun run browse/src/server.ts",
"test": "bun test browse/test/ test/ make-pdf/test/ --ignore 'test/skill-e2e-*.test.ts' --ignore test/skill-llm-eval.test.ts --ignore test/skill-routing-e2e.test.ts --ignore test/codex-e2e.test.ts --ignore test/gemini-e2e.test.ts && (bun run slop:diff 2>/dev/null || true)",
"test:free": "bun run scripts/test-free-shards.ts",
"test:windows": "bun run scripts/test-free-shards.ts --windows-only",
"test:evals": "EVALS=1 bun test --retry 2 --concurrent --max-concurrency ${EVALS_CONCURRENCY:-15} test/skill-llm-eval.test.ts test/skill-e2e-*.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts",
"test:evals:all": "EVALS=1 EVALS_ALL=1 bun test --retry 2 --concurrent --max-concurrency ${EVALS_CONCURRENCY:-15} test/skill-llm-eval.test.ts test/skill-e2e-*.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts",
"test:e2e": "EVALS=1 bun test --retry 2 --concurrent --max-concurrency ${EVALS_CONCURRENCY:-15} test/skill-e2e-*.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts",
+8 -4
View File
@@ -793,7 +793,8 @@ Power-user shortcuts (one-word invocations) — handle these too:
# Ensure profile exists
~/.claude/skills/gstack/bin/gstack-developer-profile --read >/dev/null
# Update declared dimensions atomically
_PROFILE="${GSTACK_HOME:-$HOME/.gstack}/developer-profile.json"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
_PROFILE="$GSTACK_STATE_ROOT/developer-profile.json"
bun -e "
const fs = require('fs');
const p = JSON.parse(fs.readFileSync('$_PROFILE','utf-8'));
@@ -854,7 +855,8 @@ Parse the JSON. Present in **plain English**, not raw floats:
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)"
_LOG="${GSTACK_HOME:-$HOME/.gstack}/projects/$SLUG/question-log.jsonl"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
_LOG="$GSTACK_STATE_ROOT/projects/$SLUG/question-log.jsonl"
if [ ! -f "$_LOG" ]; then
echo "NO_LOG"
else
@@ -947,7 +949,8 @@ is a trust boundary (Codex #15 in the design doc).
3. After Y, write:
```bash
_PROFILE="${GSTACK_HOME:-$HOME/.gstack}/developer-profile.json"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
_PROFILE="$GSTACK_STATE_ROOT/developer-profile.json"
bun -e "
const fs = require('fs');
const p = JSON.parse(fs.readFileSync('$_PROFILE','utf-8'));
@@ -988,7 +991,8 @@ the user decides whether declared is wrong or behavior is wrong.
```bash
~/.claude/skills/gstack/bin/gstack-question-preference --stats
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)"
_LOG="${GSTACK_HOME:-$HOME/.gstack}/projects/$SLUG/question-log.jsonl"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
_LOG="$GSTACK_STATE_ROOT/projects/$SLUG/question-log.jsonl"
[ -f "$_LOG" ] && echo "TOTAL_LOGGED: $(wc -l < "$_LOG" | tr -d ' ')" || echo "TOTAL_LOGGED: 0"
~/.claude/skills/gstack/bin/gstack-developer-profile --profile | bun -e "
const p = JSON.parse(await Bun.stdin.text());
+8 -4
View File
@@ -144,7 +144,8 @@ Power-user shortcuts (one-word invocations) — handle these too:
# Ensure profile exists
~/.claude/skills/gstack/bin/gstack-developer-profile --read >/dev/null
# Update declared dimensions atomically
_PROFILE="${GSTACK_HOME:-$HOME/.gstack}/developer-profile.json"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
_PROFILE="$GSTACK_STATE_ROOT/developer-profile.json"
bun -e "
const fs = require('fs');
const p = JSON.parse(fs.readFileSync('$_PROFILE','utf-8'));
@@ -205,7 +206,8 @@ Parse the JSON. Present in **plain English**, not raw floats:
```bash
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)"
_LOG="${GSTACK_HOME:-$HOME/.gstack}/projects/$SLUG/question-log.jsonl"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
_LOG="$GSTACK_STATE_ROOT/projects/$SLUG/question-log.jsonl"
if [ ! -f "$_LOG" ]; then
echo "NO_LOG"
else
@@ -298,7 +300,8 @@ is a trust boundary (Codex #15 in the design doc).
3. After Y, write:
```bash
_PROFILE="${GSTACK_HOME:-$HOME/.gstack}/developer-profile.json"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
_PROFILE="$GSTACK_STATE_ROOT/developer-profile.json"
bun -e "
const fs = require('fs');
const p = JSON.parse(fs.readFileSync('$_PROFILE','utf-8'));
@@ -339,7 +342,8 @@ the user decides whether declared is wrong or behavior is wrong.
```bash
~/.claude/skills/gstack/bin/gstack-question-preference --stats
eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)"
_LOG="${GSTACK_HOME:-$HOME/.gstack}/projects/$SLUG/question-log.jsonl"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
_LOG="$GSTACK_STATE_ROOT/projects/$SLUG/question-log.jsonl"
[ -f "$_LOG" ] && echo "TOTAL_LOGGED: $(wc -l < "$_LOG" | tr -d ' ')" || echo "TOTAL_LOGGED: 0"
~/.claude/skills/gstack/bin/gstack-developer-profile --profile | bun -e "
const p = JSON.parse(await Bun.stdin.text());
+5 -6
View File
@@ -18,7 +18,7 @@
import { query, type SDKMessage } from '@anthropic-ai/claude-agent-sdk';
import { readOverlay } from './resolvers/model-overlay';
import { execSync } from 'child_process';
import { resolveClaudeBinary } from '../browse/src/claude-bin';
async function main() {
const failures: string[] = [];
@@ -44,12 +44,11 @@ async function main() {
// 2. Local claude binary exists
console.log('\n2. Binary pinning');
let claudePath: string | null = null;
try {
claudePath = execSync('which claude', { encoding: 'utf-8' }).trim();
let claudePath: string | null = resolveClaudeBinary();
if (claudePath) {
pass(`local claude binary: ${claudePath}`);
} catch {
fail('`which claude` failed — cannot pin binary');
} else {
fail('`Bun.which("claude")` failed — cannot pin binary (set GSTACK_CLAUDE_BIN to override)');
}
// 3. SDK query end-to-end
+339
View File
@@ -0,0 +1,339 @@
#!/usr/bin/env bun
/**
* test-free-shards enumerate, shard, and curate the free test suite.
*
* Three jobs:
* 1. Enumeration. Walk `browse/test/`, `test/`, `make-pdf/test/` and return
* every `*.test.{ts,tsx,js,jsx,mjs,cjs}` that isn't a paid-eval test.
* 2. Sharding. Stable-hash assign each test to one of N shards. Used by CI
* to parallelize the free suite when needed.
* 3. Curation (Windows-safe filter). Scan each test's content for POSIX-only
* patterns (`/bin/bash`, `sh -c`, raw `/tmp/`, `chmod`, `xargs`). Files
* that match are excluded from the Windows-safe subset they would fail
* on `windows-latest` no matter how the runner shards them.
*
* Adapted from the McGluut/gstack fork's test-free-shards.ts (190 LOC). The
* Windows-safe filter is upstream-original codex flagged that sharding alone
* doesn't fix POSIX-bound tests, so we curate the subset that actually runs
* on the windows-latest CI job.
*
* Usage:
* bun run scripts/test-free-shards.ts --list # show all
* bun run scripts/test-free-shards.ts --windows-only --list # show curated
* bun run scripts/test-free-shards.ts --windows-only # run curated
* bun run scripts/test-free-shards.ts --shards 4 --shard 1 # one shard
*/
import * as fs from 'fs';
import * as path from 'path';
import { spawnSync } from 'child_process';
const ROOT = path.resolve(import.meta.dir, '..');
const TEST_ROOTS = ['browse/test', 'test', 'make-pdf/test'] as const;
const TEST_FILE_REGEX = /\.test\.(?:[cm]?[jt]s|tsx|jsx)$/;
// Tests that require API spend, external services, or e2e harnesses.
// These are filtered out before any sharding or curation.
const PAID_EVAL_TESTS = [
/^browse\/test\/security-review-fullstack\.test\.ts$/,
/^test\/skill-e2e-.*\.test\.ts$/,
/^test\/skill-llm-eval\.test\.ts$/,
/^test\/skill-routing-e2e\.test\.ts$/,
/^test\/codex-e2e\.test\.ts$/,
/^test\/gemini-e2e\.test\.ts$/,
] as const;
// POSIX-only patterns that indicate a test will fail on windows-latest no
// matter how the runner shards. Codex's v1.18.0.0 review flagged the first
// three as concrete examples in the existing free suite (test/ship-version-sync.test.ts:72,
// test/helpers/providers/claude.ts:22, package.json:12). We scan the test's
// own content here so the filter stays automatic as new tests land. The
// "Windows-incompatible APIs" patterns at the bottom were added after the
// first windows-free-tests CI run surfaced concrete failure modes.
const WINDOWS_FRAGILE_PATTERNS: Array<{ pattern: RegExp; reason: string }> = [
// Hardcoded POSIX shells / commands.
{ pattern: /['"`]\/bin\/(?:ba)?sh/, reason: 'hardcoded /bin/sh or /bin/bash' },
{ pattern: /spawnSync\(['"]sh['"],|spawn\(['"]sh['"],|exec\(['"]sh /, reason: 'spawn("sh", ...)' },
{ pattern: /['"]bash -c['"]|['"]sh -c['"]/, reason: 'bash -c / sh -c' },
{ pattern: /['"`]\/tmp\//, reason: 'raw /tmp/ path (use os.tmpdir())' },
{ pattern: /['"]chmod\b/, reason: 'chmod shell command' },
{ pattern: /['"]xargs\b/, reason: 'xargs pipeline' },
{ pattern: /\bwhich claude\b/, reason: 'which claude (use Bun.which)' },
// Windows-incompatible APIs.
{ pattern: /\.mode\s*&\s*0o[0-7]+/, reason: 'POSIX file mode bitmask (mode & 0o600 etc — Windows fakes mode bits)' },
{ pattern: /\.endsWith\(['"]\//, reason: 'hardcoded forward-slash path assertion (Windows uses \\\\)' },
{ pattern: /['"]\.\/[a-zA-Z][^"']*['"]\)\s*\.\s*toBe\(true\)/, reason: 'forward-slash path comparison' },
// Tests that spawn a bash shebang script in bin/ via spawnSync. Git Bash on
// Windows can run `bash /path/to/script` but spawnSync(scriptPath, ...)
// tries to execute the file directly via CreateProcess, which fails on the
// shebang. The pattern matches `, 'bin'` as a path-join argument (closing
// OR followed by another segment), which catches:
// - path.join(ROOT, 'bin', 'script-name') — typical
// - join(import.meta.dir, '..', 'bin', 'name') — destructured (diff-scope)
// - path.join(ROOT, 'bin') — bare BIN constant (brain-sync)
{ pattern: /,\s*['"]bin['"]\s*[,)]|['"]\.?\/?bin\/[a-z][\w-]+['"]/, reason: 'spawns bin/ shebang script (Windows CreateProcess does not parse shebangs)' },
// Tests that launch a real Playwright browser. The windows-free-tests CI job
// runs a curated subset that intentionally does NOT install Chromium —
// browser bring-up on Windows is a separate concern (see PR #1238). Tests
// matching `await foo.launch(` need Chromium and fail with "Executable
// doesn't exist" on the runner.
{ pattern: /await\s+\w+\.launch\(/, reason: 'launches Playwright browser (Chromium not installed in windows-free CI)' },
// Tests that spawn the browse server as a subprocess via `bun run server.ts`.
// The Bun → server.ts → Playwright path is the same one that doesn't work
// on Windows (PR #1238 windows-pty-bun-pty-fix). Tests typically set
// BROWSE_HEADLESS_SKIP=1 to skip the browser launch but still need a working
// server, which they don't get on Windows.
{ pattern: /BROWSE_HEADLESS_SKIP|spawn\(\[['"]bun['"],\s*['"]run['"]/, reason: 'spawns the browse server subprocess (Bun-driven path is Windows-broken)' },
// Tests that read browse/src/sidebar-agent.ts — deleted in v1.14.0.0
// sidebar refactor (replaced by sidepanel-terminal.js). 10 security tests
// still reference it and fail on import. They've been broken on every
// platform since v1.14, but Bun on macOS/Linux reports the failure as a
// module-load error (exit 0) while Bun on Windows treats it as a hard
// fail (exit 1). Tracked as a follow-up: update or delete these tests.
{ pattern: /sidebar-agent\.ts/, reason: 'reads deleted browse/src/sidebar-agent.ts (pre-existing breakage from v1.14.0.0 sidebar refactor)' },
];
// Explicit known-Windows-incompatible test files that don't fit a regex
// pattern. Listed here with the precise reason. Prefer adding a pattern above
// when possible; this list is for environment-/runtime-specific tests where
// the failure mode is structural rather than detectable via source-file scan.
const KNOWN_WINDOWS_INCOMPATIBLE: Array<{ file: string; reason: string }> = [
{
file: 'test/host-config.test.ts',
reason: 'asserts "claude" binary on PATH (only true when running inside Claude Code, not on bare CI runner)',
},
{
file: 'browse/test/findport.test.ts',
reason: 'asserts Bun.serve.stop() is fire-and-forget — Bun behavior differs on Windows for this polyfill',
},
];
export const DEFAULT_SHARD_COUNT = 20;
export const FREE_TEST_TIMEOUT_MS = 10_000;
export function normalizeRelativePath(filePath: string): string {
return filePath.replace(/\\/g, '/');
}
export function isFreeTestFile(relativePath: string): boolean {
const normalized = normalizeRelativePath(relativePath);
if (!TEST_FILE_REGEX.test(normalized)) return false;
return !PAID_EVAL_TESTS.some(pattern => pattern.test(normalized));
}
/**
* Returns the first POSIX-only pattern hit in the file, or null if Windows-safe.
*/
export function detectWindowsFragility(absolutePath: string): { reason: string } | null {
let content: string;
try {
content = fs.readFileSync(absolutePath, 'utf-8');
} catch {
return null;
}
for (const { pattern, reason } of WINDOWS_FRAGILE_PATTERNS) {
if (pattern.test(content)) return { reason };
}
return null;
}
function walkTestFiles(dirPath: string): string[] {
const entries = fs.readdirSync(dirPath, { withFileTypes: true });
const files: string[] = [];
for (const entry of entries) {
const fullPath = path.join(dirPath, entry.name);
if (entry.isDirectory()) {
files.push(...walkTestFiles(fullPath));
continue;
}
if (TEST_FILE_REGEX.test(entry.name)) {
files.push(fullPath);
}
}
return files;
}
export function collectFreeTestFiles(rootDir = ROOT): string[] {
const discovered = new Set<string>();
for (const testRoot of TEST_ROOTS) {
const absoluteRoot = path.join(rootDir, testRoot);
if (!fs.existsSync(absoluteRoot)) continue;
for (const fullPath of walkTestFiles(absoluteRoot)) {
const relativePath = normalizeRelativePath(path.relative(rootDir, fullPath));
if (isFreeTestFile(relativePath)) {
discovered.add(relativePath);
}
}
}
return [...discovered].sort();
}
export interface CurationResult {
safe: string[];
excluded: Array<{ file: string; reason: string }>;
}
export function curateWindowsSafe(files: string[], rootDir = ROOT): CurationResult {
const safe: string[] = [];
const excluded: Array<{ file: string; reason: string }> = [];
const knownBad = new Map(KNOWN_WINDOWS_INCOMPATIBLE.map((e) => [e.file, e.reason]));
for (const relativePath of files) {
const knownReason = knownBad.get(relativePath);
if (knownReason) {
excluded.push({ file: relativePath, reason: knownReason });
continue;
}
const absolute = path.join(rootDir, relativePath);
const fragility = detectWindowsFragility(absolute);
if (fragility) {
excluded.push({ file: relativePath, reason: fragility.reason });
} else {
safe.push(relativePath);
}
}
return { safe, excluded };
}
export function stableHash(input: string): number {
let hash = 0x811c9dc5;
for (let index = 0; index < input.length; index += 1) {
hash ^= input.charCodeAt(index);
hash = Math.imul(hash, 0x01000193);
}
return hash >>> 0;
}
export function assignFilesToShards(files: string[], shardCount: number): string[][] {
if (!Number.isInteger(shardCount) || shardCount <= 0) {
throw new Error(`Shard count must be a positive integer. Received: ${shardCount}`);
}
const shards = Array.from({ length: shardCount }, () => [] as string[]);
for (const file of files) {
const shardIndex = stableHash(file) % shardCount;
shards[shardIndex].push(file);
}
return shards
.map(filesInShard => filesInShard.sort())
.filter(filesInShard => filesInShard.length > 0);
}
export function buildShardArgs(files: string[]): string[] {
return ['test', ...files, '--max-concurrency=1', `--timeout=${FREE_TEST_TIMEOUT_MS}`];
}
type CliOptions = {
dryRun: boolean;
listOnly: boolean;
windowsOnly: boolean;
shardCount: number;
shardIndex: number | null;
};
function parseCliOptions(argv: string[]): CliOptions {
let dryRun = false;
let listOnly = false;
let windowsOnly = false;
let shardCount = DEFAULT_SHARD_COUNT;
let shardIndex: number | null = null;
for (let index = 0; index < argv.length; index += 1) {
const arg = argv[index];
if (arg === '--dry-run') { dryRun = true; continue; }
if (arg === '--list') { listOnly = true; continue; }
if (arg === '--windows-only') { windowsOnly = true; continue; }
if (arg === '--shards') {
const value = argv[index + 1];
if (!value) throw new Error('Missing value for --shards');
shardCount = Number.parseInt(value, 10);
index += 1;
continue;
}
if (arg === '--shard') {
const value = argv[index + 1];
if (!value) throw new Error('Missing value for --shard');
shardIndex = Number.parseInt(value, 10);
index += 1;
continue;
}
throw new Error(`Unknown argument: ${arg}`);
}
return { dryRun, listOnly, windowsOnly, shardCount, shardIndex };
}
function formatShardSummary(shards: string[][]): string[] {
return shards.map((files, index) => {
const preview = files.slice(0, 3).join(', ');
const suffix = files.length > 3 ? ', ...' : '';
return `Shard ${index + 1}/${shards.length}: ${files.length} files${preview ? ` -> ${preview}${suffix}` : ''}`;
});
}
function runShard(files: string[], shardNumber: number, totalShards: number): number {
const header = `[test:free] shard ${shardNumber}/${totalShards} (${files.length} files)`;
console.log(header);
const result = spawnSync(process.execPath, buildShardArgs(files), {
cwd: ROOT,
stdio: 'inherit',
env: process.env,
});
if (result.status !== 0) {
console.error(`${header} failed with exit code ${result.status ?? 1}`);
}
return result.status ?? 1;
}
function main(): number {
const options = parseCliOptions(process.argv.slice(2));
const allFiles = collectFreeTestFiles();
if (allFiles.length === 0) {
throw new Error('No free test files were discovered.');
}
let files = allFiles;
let curationReport: CurationResult | null = null;
if (options.windowsOnly) {
curationReport = curateWindowsSafe(allFiles);
files = curationReport.safe;
console.log(`[test:free] curated ${files.length} Windows-safe tests (${curationReport.excluded.length} excluded)`);
if (options.listOnly && curationReport.excluded.length > 0) {
console.log('\nExcluded (POSIX-fragile):');
for (const { file, reason } of curationReport.excluded) {
console.log(` - ${file} [${reason}]`);
}
}
}
if (options.listOnly) {
console.log(`\nDiscovered ${files.length} test files.`);
for (const file of files) console.log(` ${file}`);
return 0;
}
const shards = assignFilesToShards(files, options.shardCount);
if (options.dryRun) {
console.log(`\nWould run ${files.length} files across ${shards.length} shards.`);
for (const line of formatShardSummary(shards)) console.log(line);
return 0;
}
if (options.shardIndex !== null) {
if (!Number.isInteger(options.shardIndex) || options.shardIndex < 1 || options.shardIndex > shards.length) {
throw new Error(`--shard must be between 1 and ${shards.length}. Received: ${options.shardIndex}`);
}
return runShard(shards[options.shardIndex - 1], options.shardIndex, shards.length);
}
for (let index = 0; index < shards.length; index += 1) {
const exitCode = runShard(shards[index], index + 1, shards.length);
if (exitCode !== 0) return exitCode;
}
return 0;
}
if (import.meta.main) {
process.exitCode = main();
}
+12 -1
View File
@@ -2770,7 +2770,14 @@ glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS"
If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary, documentation_section from Step 18). Never reuse stale PR body content from a prior run.
**Also update the PR title** if the version changed on rerun. PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first. If the current title's version prefix doesn't match `NEW_VERSION`, run `gh pr edit --title "v$NEW_VERSION <type>: <summary>"` (or the `glab mr update -t ...` equivalent). This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version. If the title has no `v<X.Y.Z.W>` prefix (a custom title kept intentionally), leave the title alone — only rewrite titles that already follow the format.
**Always update the PR title to start with `v$NEW_VERSION`.** PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first, no exceptions, no "custom title kept intentionally" escape hatch. The shared helper `bin/gstack-pr-title-rewrite.sh` is the single source of truth for the rule.
1. Read the current title: `CURRENT=$(gh pr view --json title -q .title)` (or `glab mr view -F json | jq -r .title`).
2. Compute the corrected title: `NEW_TITLE=$(~/.claude/skills/gstack/bin/gstack-pr-title-rewrite.sh "$NEW_VERSION" "$CURRENT")`. The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
3. If `NEW_TITLE` differs from `CURRENT`, run `gh pr edit --title "$NEW_TITLE"` (or `glab mr update -t "$NEW_TITLE"`).
4. **Self-check:** re-fetch the title and assert it starts with `v$NEW_VERSION `. If it does not, retry the edit once. If still wrong, surface the failure to the user.
This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version, and forces the format on PRs that were created without it.
Print the existing URL and continue to Step 20.
@@ -2840,6 +2847,8 @@ you missed it.>
**If GitHub:**
```bash
# PR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
gh pr create --base <base> --title "v$NEW_VERSION <type>: <summary>" --body "$(cat <<'EOF'
<PR body from above>
EOF
@@ -2849,6 +2858,8 @@ EOF
**If GitLab:**
```bash
# MR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
glab mr create -b <base> -t "v$NEW_VERSION <type>: <summary>" -d "$(cat <<'EOF'
<MR body from above>
EOF
+12 -1
View File
@@ -794,7 +794,14 @@ glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS"
If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary, documentation_section from Step 18). Never reuse stale PR body content from a prior run.
**Also update the PR title** if the version changed on rerun. PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first. If the current title's version prefix doesn't match `NEW_VERSION`, run `gh pr edit --title "v$NEW_VERSION <type>: <summary>"` (or the `glab mr update -t ...` equivalent). This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version. If the title has no `v<X.Y.Z.W>` prefix (a custom title kept intentionally), leave the title alone — only rewrite titles that already follow the format.
**Always update the PR title to start with `v$NEW_VERSION`.** PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first, no exceptions, no "custom title kept intentionally" escape hatch. The shared helper `bin/gstack-pr-title-rewrite.sh` is the single source of truth for the rule.
1. Read the current title: `CURRENT=$(gh pr view --json title -q .title)` (or `glab mr view -F json | jq -r .title`).
2. Compute the corrected title: `NEW_TITLE=$(~/.claude/skills/gstack/bin/gstack-pr-title-rewrite.sh "$NEW_VERSION" "$CURRENT")`. The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
3. If `NEW_TITLE` differs from `CURRENT`, run `gh pr edit --title "$NEW_TITLE"` (or `glab mr update -t "$NEW_TITLE"`).
4. **Self-check:** re-fetch the title and assert it starts with `v$NEW_VERSION `. If it does not, retry the edit once. If still wrong, surface the failure to the user.
This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version, and forces the format on PRs that were created without it.
Print the existing URL and continue to Step 20.
@@ -864,6 +871,8 @@ you missed it.>
**If GitHub:**
```bash
# PR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
gh pr create --base <base> --title "v$NEW_VERSION <type>: <summary>" --body "$(cat <<'EOF'
<PR body from above>
EOF
@@ -873,6 +882,8 @@ EOF
**If GitLab:**
```bash
# MR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
glab mr create -b <base> -t "v$NEW_VERSION <type>: <summary>" -d "$(cat <<'EOF'
<MR body from above>
EOF
+12 -1
View File
@@ -2770,7 +2770,14 @@ glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS"
If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary, documentation_section from Step 18). Never reuse stale PR body content from a prior run.
**Also update the PR title** if the version changed on rerun. PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first. If the current title's version prefix doesn't match `NEW_VERSION`, run `gh pr edit --title "v$NEW_VERSION <type>: <summary>"` (or the `glab mr update -t ...` equivalent). This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version. If the title has no `v<X.Y.Z.W>` prefix (a custom title kept intentionally), leave the title alone — only rewrite titles that already follow the format.
**Always update the PR title to start with `v$NEW_VERSION`.** PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first, no exceptions, no "custom title kept intentionally" escape hatch. The shared helper `bin/gstack-pr-title-rewrite.sh` is the single source of truth for the rule.
1. Read the current title: `CURRENT=$(gh pr view --json title -q .title)` (or `glab mr view -F json | jq -r .title`).
2. Compute the corrected title: `NEW_TITLE=$(~/.claude/skills/gstack/bin/gstack-pr-title-rewrite.sh "$NEW_VERSION" "$CURRENT")`. The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
3. If `NEW_TITLE` differs from `CURRENT`, run `gh pr edit --title "$NEW_TITLE"` (or `glab mr update -t "$NEW_TITLE"`).
4. **Self-check:** re-fetch the title and assert it starts with `v$NEW_VERSION `. If it does not, retry the edit once. If still wrong, surface the failure to the user.
This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version, and forces the format on PRs that were created without it.
Print the existing URL and continue to Step 20.
@@ -2840,6 +2847,8 @@ you missed it.>
**If GitHub:**
```bash
# PR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
gh pr create --base <base> --title "v$NEW_VERSION <type>: <summary>" --body "$(cat <<'EOF'
<PR body from above>
EOF
@@ -2849,6 +2858,8 @@ EOF
**If GitLab:**
```bash
# MR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
glab mr create -b <base> -t "v$NEW_VERSION <type>: <summary>" -d "$(cat <<'EOF'
<MR body from above>
EOF
+12 -1
View File
@@ -2385,7 +2385,14 @@ glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS"
If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary, documentation_section from Step 18). Never reuse stale PR body content from a prior run.
**Also update the PR title** if the version changed on rerun. PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first. If the current title's version prefix doesn't match `NEW_VERSION`, run `gh pr edit --title "v$NEW_VERSION <type>: <summary>"` (or the `glab mr update -t ...` equivalent). This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version. If the title has no `v<X.Y.Z.W>` prefix (a custom title kept intentionally), leave the title alone — only rewrite titles that already follow the format.
**Always update the PR title to start with `v$NEW_VERSION`.** PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first, no exceptions, no "custom title kept intentionally" escape hatch. The shared helper `bin/gstack-pr-title-rewrite.sh` is the single source of truth for the rule.
1. Read the current title: `CURRENT=$(gh pr view --json title -q .title)` (or `glab mr view -F json | jq -r .title`).
2. Compute the corrected title: `NEW_TITLE=$($GSTACK_ROOT/bin/gstack-pr-title-rewrite.sh "$NEW_VERSION" "$CURRENT")`. The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
3. If `NEW_TITLE` differs from `CURRENT`, run `gh pr edit --title "$NEW_TITLE"` (or `glab mr update -t "$NEW_TITLE"`).
4. **Self-check:** re-fetch the title and assert it starts with `v$NEW_VERSION `. If it does not, retry the edit once. If still wrong, surface the failure to the user.
This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version, and forces the format on PRs that were created without it.
Print the existing URL and continue to Step 20.
@@ -2455,6 +2462,8 @@ you missed it.>
**If GitHub:**
```bash
# PR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
gh pr create --base <base> --title "v$NEW_VERSION <type>: <summary>" --body "$(cat <<'EOF'
<PR body from above>
EOF
@@ -2464,6 +2473,8 @@ EOF
**If GitLab:**
```bash
# MR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
glab mr create -b <base> -t "v$NEW_VERSION <type>: <summary>" -d "$(cat <<'EOF'
<MR body from above>
EOF
+12 -1
View File
@@ -2761,7 +2761,14 @@ glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS"
If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary, documentation_section from Step 18). Never reuse stale PR body content from a prior run.
**Also update the PR title** if the version changed on rerun. PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first. If the current title's version prefix doesn't match `NEW_VERSION`, run `gh pr edit --title "v$NEW_VERSION <type>: <summary>"` (or the `glab mr update -t ...` equivalent). This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version. If the title has no `v<X.Y.Z.W>` prefix (a custom title kept intentionally), leave the title alone — only rewrite titles that already follow the format.
**Always update the PR title to start with `v$NEW_VERSION`.** PR titles use the workspace-aware format `v<NEW_VERSION> <type>: <summary>` — version ALWAYS first, no exceptions, no "custom title kept intentionally" escape hatch. The shared helper `bin/gstack-pr-title-rewrite.sh` is the single source of truth for the rule.
1. Read the current title: `CURRENT=$(gh pr view --json title -q .title)` (or `glab mr view -F json | jq -r .title`).
2. Compute the corrected title: `NEW_TITLE=$($GSTACK_ROOT/bin/gstack-pr-title-rewrite.sh "$NEW_VERSION" "$CURRENT")`. The helper handles three cases: title already correct (no-op), title has a different `v<X.Y.Z.W>` prefix (replace it), or title has no version prefix (prepend one).
3. If `NEW_TITLE` differs from `CURRENT`, run `gh pr edit --title "$NEW_TITLE"` (or `glab mr update -t "$NEW_TITLE"`).
4. **Self-check:** re-fetch the title and assert it starts with `v$NEW_VERSION `. If it does not, retry the edit once. If still wrong, surface the failure to the user.
This keeps the title truthful when Step 12's queue-drift detection rebumps a stale version, and forces the format on PRs that were created without it.
Print the existing URL and continue to Step 20.
@@ -2831,6 +2838,8 @@ you missed it.>
**If GitHub:**
```bash
# PR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
gh pr create --base <base> --title "v$NEW_VERSION <type>: <summary>" --body "$(cat <<'EOF'
<PR body from above>
EOF
@@ -2840,6 +2849,8 @@ EOF
**If GitLab:**
```bash
# MR title MUST start with v$NEW_VERSION — enforced on every run, no exceptions.
# (See Step 19 idempotency block + bin/gstack-pr-title-rewrite.sh for the rule.)
glab mr create -b <base> -t "v$NEW_VERSION <type>: <summary>" -d "$(cat <<'EOF'
<MR body from above>
EOF
+101
View File
@@ -0,0 +1,101 @@
import { describe, test, expect } from 'bun:test';
import { spawnSync } from 'child_process';
import * as path from 'path';
const ROOT = path.resolve(import.meta.dir, '..');
const BIN = path.join(ROOT, 'bin', 'gstack-paths');
// Invoke via `bash` rather than executing the shebang-script directly.
// On Windows, spawnSync(scriptPath, ...) goes through CreateProcess, which
// doesn't parse `#!/usr/bin/env bash`. Production usage always sources the
// helper from inside a bash block (`eval "$(~/.claude/skills/gstack/bin/gstack-paths)"`)
// so bash is always the executor — this matches that contract.
//
// USERPROFILE: '' is a Windows-specific override. Git Bash auto-populates
// HOME from USERPROFILE at shell startup if HOME is unset/empty, which
// silently breaks the "HOME unset" test scenarios. Clearing USERPROFILE
// alongside HOME prevents that auto-population on Windows runners.
function run(env: Record<string, string | undefined>): Record<string, string> {
const result = spawnSync('bash', [BIN], {
env: { PATH: process.env.PATH, USERPROFILE: '', ...env } as Record<string, string>,
encoding: 'utf-8',
});
if (result.status !== 0) {
throw new Error(`gstack-paths failed (status ${result.status}): ${result.stderr}`);
}
const out: Record<string, string> = {};
for (const line of result.stdout.split('\n')) {
const eq = line.indexOf('=');
if (eq > 0) out[line.slice(0, eq)] = line.slice(eq + 1);
}
return out;
}
describe('gstack-paths', () => {
test('GSTACK_HOME wins over CLAUDE_PLUGIN_DATA and HOME', () => {
const got = run({
GSTACK_HOME: '/tmp/explicit-state',
CLAUDE_PLUGIN_DATA: '/tmp/plugin-data',
HOME: '/tmp/home',
});
expect(got.GSTACK_STATE_ROOT).toBe('/tmp/explicit-state');
});
test('CLAUDE_PLUGIN_DATA wins over HOME when GSTACK_HOME unset', () => {
const got = run({
CLAUDE_PLUGIN_DATA: '/tmp/plugin-data',
HOME: '/tmp/home',
});
expect(got.GSTACK_STATE_ROOT).toBe('/tmp/plugin-data');
});
test('HOME-derived state root when GSTACK_HOME and CLAUDE_PLUGIN_DATA unset', () => {
const got = run({ HOME: '/tmp/myhome' });
expect(got.GSTACK_STATE_ROOT).toBe('/tmp/myhome/.gstack');
});
test('CWD fallback when HOME also unset (container env)', () => {
// Skip on Windows: Git Bash auto-derives HOME from USERPROFILE,
// HOMEDRIVE, and HOMEPATH at shell startup. Even with all three
// cleared, bash falls back to /c/Users/<user>. The container env
// (HOME genuinely unset) is unreachable on Windows runners. The bash
// script's CWD fallback IS correct — exercised on Linux/Mac CI.
if (process.platform === 'win32') return;
const got = run({ HOME: '' });
expect(got.GSTACK_STATE_ROOT).toBe('.gstack');
});
test('PLAN_ROOT chain: GSTACK_PLAN_DIR > CLAUDE_PLANS_DIR > HOME > CWD', () => {
expect(run({ GSTACK_PLAN_DIR: '/tmp/explicit', HOME: '/h' }).PLAN_ROOT).toBe('/tmp/explicit');
expect(run({ CLAUDE_PLANS_DIR: '/tmp/claude', HOME: '/h' }).PLAN_ROOT).toBe('/tmp/claude');
expect(run({ HOME: '/tmp/myhome' }).PLAN_ROOT).toBe('/tmp/myhome/.claude/plans');
// CWD fallback only verifiable on POSIX — Git Bash auto-populates HOME.
if (process.platform !== 'win32') {
expect(run({ HOME: '' }).PLAN_ROOT).toBe('.claude/plans');
}
});
test('TMP_ROOT chain: TMPDIR > TMP > .gstack/tmp', () => {
expect(run({ TMPDIR: '/tmp/x', HOME: '/h' }).TMP_ROOT).toBe('/tmp/x');
expect(run({ TMP: '/tmp/y', HOME: '/h' }).TMP_ROOT).toBe('/tmp/y');
expect(run({ HOME: '' }).TMP_ROOT).toBe('.gstack/tmp');
});
test('emits all three exports on every invocation', () => {
const got = run({ HOME: '/tmp/h' });
expect(got).toHaveProperty('GSTACK_STATE_ROOT');
expect(got).toHaveProperty('PLAN_ROOT');
expect(got).toHaveProperty('TMP_ROOT');
});
test('output is shell-evalable: only KEY=VALUE lines, no extra prose', () => {
const result = spawnSync('bash', [BIN], {
env: { PATH: process.env.PATH, USERPROFILE: '', HOME: '/tmp/h' } as Record<string, string>,
encoding: 'utf-8',
});
const lines = result.stdout.split('\n').filter(Boolean);
for (const line of lines) {
expect(line).toMatch(/^[A-Z_]+=.*/);
}
});
});
+2 -6
View File
@@ -35,7 +35,7 @@ import {
} from '@anthropic-ai/claude-agent-sdk';
import * as fs from 'fs';
import * as path from 'path';
import { execSync } from 'child_process';
import { resolveClaudeBinary as resolveClaudeBinaryShared } from '../../browse/src/claude-bin';
import type { SkillTestResult } from './session-runner';
// ---------------------------------------------------------------------------
@@ -278,11 +278,7 @@ function resolveSdkVersion(): string {
}
export function resolveClaudeBinary(): string | null {
try {
return execSync('which claude', { encoding: 'utf-8' }).trim() || null;
} catch {
return null;
}
return resolveClaudeBinaryShared();
}
// ---------------------------------------------------------------------------
+13 -7
View File
@@ -1,9 +1,10 @@
import type { ProviderAdapter, RunOpts, RunResult, AvailabilityCheck } from './types';
import { estimateCostUsd } from '../pricing';
import { execFileSync, spawnSync } from 'child_process';
import { execFileSync } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { resolveClaudeCommand } from '../../../browse/src/claude-bin';
/**
* Claude adapter wraps the `claude` CLI via claude -p.
@@ -18,10 +19,11 @@ export class ClaudeAdapter implements ProviderAdapter {
readonly family = 'claude' as const;
async available(): Promise<AvailabilityCheck> {
// Binary on PATH?
const res = spawnSync('sh', ['-c', 'command -v claude'], { timeout: 2000 });
if (res.status !== 0) {
return { ok: false, reason: 'claude CLI not found on PATH. Install from https://claude.ai/download or npm i -g @anthropic-ai/claude-code' };
// Binary on PATH (or GSTACK_CLAUDE_BIN override). Routes through the shared
// resolver so Windows + override paths behave the same as production sites.
const resolved = resolveClaudeCommand();
if (!resolved) {
return { ok: false, reason: 'claude CLI not found on PATH. Install from https://claude.ai/download or npm i -g @anthropic-ai/claude-code (or set GSTACK_CLAUDE_BIN)' };
}
// Auth sniff: ~/.claude/.credentials.json OR ANTHROPIC_API_KEY
const credsPath = path.join(os.homedir(), '.claude', '.credentials.json');
@@ -35,12 +37,16 @@ export class ClaudeAdapter implements ProviderAdapter {
async run(opts: RunOpts): Promise<RunResult> {
const start = Date.now();
const args = ['-p', '--output-format', 'json'];
const resolved = resolveClaudeCommand();
if (!resolved) {
throw new Error('claude CLI not resolvable (set GSTACK_CLAUDE_BIN or install)');
}
const args = [...resolved.argsPrefix, '-p', '--output-format', 'json'];
if (opts.model) args.push('--model', opts.model);
if (opts.extraArgs) args.push(...opts.extraArgs);
try {
const out = execFileSync('claude', args, {
const out = execFileSync(resolved.command, args, {
input: opts.prompt,
cwd: opts.workdir,
timeout: opts.timeoutMs,
+54
View File
@@ -0,0 +1,54 @@
import { describe, test, expect } from 'bun:test';
import { spawnSync } from 'child_process';
import * as path from 'path';
const HELPER = path.join(import.meta.dir, '..', 'bin', 'gstack-pr-title-rewrite.sh');
function rewrite(version: string, title: string): { stdout: string; status: number; stderr: string } {
const r = spawnSync(HELPER, [version, title], { encoding: 'utf-8' });
return { stdout: (r.stdout ?? '').trimEnd(), status: r.status ?? -1, stderr: r.stderr ?? '' };
}
describe('gstack-pr-title-rewrite', () => {
test('already correct: no change', () => {
const r = rewrite('1.2.3.4', 'v1.2.3.4 feat: foo');
expect(r.status).toBe(0);
expect(r.stdout).toBe('v1.2.3.4 feat: foo');
});
test('different version prefix: replaces it', () => {
expect(rewrite('1.2.3.5', 'v1.2.3.4 feat: foo').stdout).toBe('v1.2.3.5 feat: foo');
});
test('different prefix length (3-part vs 4-part): replaces it', () => {
expect(rewrite('1.2.3.4', 'v1.2.3 feat: foo').stdout).toBe('v1.2.3.4 feat: foo');
});
test('no version prefix: prepends', () => {
expect(rewrite('1.2.3.4', 'feat: foo').stdout).toBe('v1.2.3.4 feat: foo');
});
test('does not mistake plain words for a prefix', () => {
expect(rewrite('1.2.3.4', 'version 5 feature').stdout).toBe('v1.2.3.4 version 5 feature');
});
test('does not strip a single-segment prefix like v1', () => {
expect(rewrite('1.2.3.4', 'v1 feat: foo').stdout).toBe('v1.2.3.4 v1 feat: foo');
});
test('errors on missing args', () => {
const r = spawnSync(HELPER, ['1.2.3.4'], { encoding: 'utf-8' });
expect(r.status).not.toBe(0);
});
test('rejects malformed VERSION with shell metacharacters', () => {
expect(rewrite('1.*.*.*', 'feat: foo').status).toBe(2);
expect(rewrite('1.2.3.4; rm -rf /', 'feat: foo').status).toBe(2);
});
test('idempotent: applying twice yields the same result', () => {
const once = rewrite('1.2.3.4', 'feat: foo').stdout;
const twice = rewrite('1.2.3.4', once).stdout;
expect(twice).toBe(once);
});
});
+101
View File
@@ -1458,6 +1458,107 @@ describe('Skill trigger phrases', () => {
}
});
// ─── Private-path leak detector ──────────────────────────────
//
// Catches accidental references to maintainer-private files in skill output.
// Adapted from the McGluut fork's skill-contract-audit.ts (we don't take the
// whole script — these are the unique checks not already covered by
// test/gen-skill-docs.test.ts:1668-2074 .claude/skills leakage tests).
describe('Private-path leak detection', () => {
const PRIVATE_PATTERNS: Array<{ pattern: RegExp; label: string }> = [
{ pattern: /coordination-board\.md/i, label: 'coordination-board.md' },
{ pattern: /SEEKING_LOG\.md/, label: 'SEEKING_LOG.md' },
{ pattern: /RATIONAL_SUBJECT\.md/, label: 'RATIONAL_SUBJECT.md' },
{ pattern: /VALUE_SIGNAL_LOOP\.md/, label: 'VALUE_SIGNAL_LOOP.md' },
{ pattern: /C:\\\\LLM Playground\\\\go/i, label: 'C:\\LLM Playground\\go' },
];
// Walk every SKILL.md and SKILL.md.tmpl in the repo (excluding node_modules,
// generated host outputs, and .git).
function discoverSkillSurface(): string[] {
const results: string[] = [];
function walk(dir: string) {
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
if (entry.name.startsWith('.') && entry.name !== '.agents') continue;
if (entry.name === 'node_modules' || entry.name === 'dist') continue;
const full = path.join(dir, entry.name);
if (entry.isDirectory()) {
walk(full);
} else if (entry.name === 'SKILL.md' || entry.name === 'SKILL.md.tmpl') {
results.push(full);
}
}
}
walk(ROOT);
return results;
}
test('no SKILL.md or SKILL.md.tmpl references private maintainer files', () => {
const files = discoverSkillSurface();
expect(files.length).toBeGreaterThan(0);
const leaks: string[] = [];
for (const file of files) {
const content = fs.readFileSync(file, 'utf-8');
for (const { pattern, label } of PRIVATE_PATTERNS) {
if (pattern.test(content)) {
leaks.push(`${path.relative(ROOT, file)} mentions ${label}`);
}
}
}
expect(leaks).toEqual([]);
});
});
// ─── Doc-inventory cross-check ───────────────────────────────
//
// Every skill directory (with a SKILL.md.tmpl) must appear in both AGENTS.md
// and docs/skills.md. Catches the inventory drift codex flagged (/debug
// → /investigate; missing /autoplan, /context-save, /plan-devex-review, etc.).
describe('Doc inventory cross-check', () => {
// Skills that don't get user-invocation lines in agent-facing docs.
// - 'qa-only' is a sub-mode of /qa with shared docs.
// - The 5 listed below are infrastructure (model overlays, shipped binary,
// hosts) that don't show up in the user-facing skill table.
const DOC_INVENTORY_EXCLUDE = new Set([
// Infra / non-skills
'agents', 'claude', 'connect-chrome', 'contrib', 'hosts',
'lib', 'model-overlays', 'openclaw', 'supabase', 'scripts', 'test',
]);
function discoverSkillDirs(): string[] {
const dirs: string[] = [];
for (const entry of fs.readdirSync(ROOT, { withFileTypes: true })) {
if (!entry.isDirectory()) continue;
if (entry.name.startsWith('.')) continue;
if (DOC_INVENTORY_EXCLUDE.has(entry.name)) continue;
const tmplPath = path.join(ROOT, entry.name, 'SKILL.md.tmpl');
if (fs.existsSync(tmplPath)) dirs.push(entry.name);
}
return dirs.sort();
}
test('every skill is documented in AGENTS.md', () => {
const agents = fs.readFileSync(path.join(ROOT, 'AGENTS.md'), 'utf-8');
const missing: string[] = [];
for (const skill of discoverSkillDirs()) {
// Match `/skill-name` as a token boundary.
if (!new RegExp(`/${skill}\\b`).test(agents)) missing.push(skill);
}
expect(missing).toEqual([]);
});
test('every skill is documented in docs/skills.md', () => {
const docs = fs.readFileSync(path.join(ROOT, 'docs', 'skills.md'), 'utf-8');
const missing: string[] = [];
for (const skill of discoverSkillDirs()) {
if (!new RegExp(`/${skill}\\b`).test(docs)) missing.push(skill);
}
expect(missing).toEqual([]);
});
});
// ─── Codex Skill Validation ──────────────────────────────────
describe('Codex skill validation', () => {
+128
View File
@@ -0,0 +1,128 @@
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import {
isFreeTestFile,
collectFreeTestFiles,
detectWindowsFragility,
curateWindowsSafe,
stableHash,
assignFilesToShards,
normalizeRelativePath,
} from '../scripts/test-free-shards';
const ROOT = path.resolve(import.meta.dir, '..');
describe('test-free-shards: enumeration', () => {
test('isFreeTestFile rejects non-test files', () => {
expect(isFreeTestFile('test/foo.ts')).toBe(false);
expect(isFreeTestFile('test/foo.test.ts')).toBe(true);
expect(isFreeTestFile('test/foo.test.tsx')).toBe(true);
expect(isFreeTestFile('test/foo.test.mjs')).toBe(true);
});
test('isFreeTestFile rejects paid eval tests', () => {
expect(isFreeTestFile('test/skill-e2e-foo.test.ts')).toBe(false);
expect(isFreeTestFile('test/skill-llm-eval.test.ts')).toBe(false);
expect(isFreeTestFile('test/codex-e2e.test.ts')).toBe(false);
expect(isFreeTestFile('test/gemini-e2e.test.ts')).toBe(false);
});
test('collectFreeTestFiles returns sorted, deduped, only-free list', () => {
const files = collectFreeTestFiles(ROOT);
expect(files.length).toBeGreaterThan(10);
expect(files).toEqual([...files].sort());
expect(new Set(files).size).toBe(files.length);
for (const f of files) {
expect(isFreeTestFile(f)).toBe(true);
}
});
test('normalizeRelativePath converts Windows backslashes to forward slashes', () => {
expect(normalizeRelativePath('test\\foo\\bar.test.ts')).toBe('test/foo/bar.test.ts');
expect(normalizeRelativePath('test/foo/bar.test.ts')).toBe('test/foo/bar.test.ts');
});
});
describe('test-free-shards: Windows curation', () => {
function withTempFile(content: string, fn: (filePath: string) => void): void {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'curation-test-'));
const file = path.join(dir, 'sample.test.ts');
fs.writeFileSync(file, content);
try {
fn(file);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
}
test('detects /bin/bash hardcode', () => {
withTempFile(`spawn('/bin/bash', ['-c', 'echo hi']);`, (f) => {
expect(detectWindowsFragility(f)?.reason).toBe('hardcoded /bin/sh or /bin/bash');
});
});
test('detects spawn("sh", ...)', () => {
withTempFile(`spawnSync('sh', ['-c', 'command -v claude']);`, (f) => {
expect(detectWindowsFragility(f)?.reason).toBe('spawn("sh", ...)');
});
});
test('detects raw /tmp/ paths', () => {
withTempFile(`const TMPERR = '/tmp/codex-err.txt';`, (f) => {
expect(detectWindowsFragility(f)?.reason).toBe('raw /tmp/ path (use os.tmpdir())');
});
});
test('detects which claude shell command', () => {
withTempFile(`execSync('which claude').trim();`, (f) => {
expect(detectWindowsFragility(f)?.reason).toBe('which claude (use Bun.which)');
});
});
test('Windows-safe code passes the filter', () => {
withTempFile(`import { spawn } from 'child_process'; spawn(claude.command, args);`, (f) => {
expect(detectWindowsFragility(f)).toBeNull();
});
});
test('curateWindowsSafe partitions files into safe + excluded', () => {
const files = collectFreeTestFiles(ROOT);
const result = curateWindowsSafe(files, ROOT);
expect(result.safe.length + result.excluded.length).toBe(files.length);
// Sanity: at least one excluded entry, since we know test/ship-version-sync.test.ts uses /bin/bash
expect(result.excluded.length).toBeGreaterThan(0);
// Every excluded entry has a non-empty reason
for (const { reason } of result.excluded) {
expect(reason.length).toBeGreaterThan(0);
}
});
});
describe('test-free-shards: sharding', () => {
test('stableHash is deterministic', () => {
expect(stableHash('foo.test.ts')).toBe(stableHash('foo.test.ts'));
expect(stableHash('foo.test.ts')).not.toBe(stableHash('bar.test.ts'));
});
test('assignFilesToShards distributes files into N non-empty shards', () => {
const files = ['a.test.ts', 'b.test.ts', 'c.test.ts', 'd.test.ts', 'e.test.ts'];
const shards = assignFilesToShards(files, 3);
const flattened = shards.flat();
expect(flattened.sort()).toEqual([...files].sort());
expect(shards.every((s) => s.length > 0)).toBe(true);
});
test('assignFilesToShards rejects invalid shard counts', () => {
expect(() => assignFilesToShards(['a.test.ts'], 0)).toThrow();
expect(() => assignFilesToShards(['a.test.ts'], -1)).toThrow();
});
test('shards are stable across runs (same files always land in same shard)', () => {
const files = ['x.test.ts', 'y.test.ts', 'z.test.ts'];
const a = assignFilesToShards(files, 5);
const b = assignFilesToShards(files, 5);
expect(a).toEqual(b);
});
});
+2 -1
View File
@@ -29,7 +29,8 @@ echo '{"skill":"unfreeze","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(bas
## Clear the boundary
```bash
STATE_DIR="${CLAUDE_PLUGIN_DATA:-$HOME/.gstack}"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
STATE_DIR="$GSTACK_STATE_ROOT"
if [ -f "$STATE_DIR/freeze-dir.txt" ]; then
PREV=$(cat "$STATE_DIR/freeze-dir.txt")
rm -f "$STATE_DIR/freeze-dir.txt"
+2 -1
View File
@@ -28,7 +28,8 @@ echo '{"skill":"unfreeze","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(bas
## Clear the boundary
```bash
STATE_DIR="${CLAUDE_PLUGIN_DATA:-$HOME/.gstack}"
eval "$(~/.claude/skills/gstack/bin/gstack-paths)"
STATE_DIR="$GSTACK_STATE_ROOT"
if [ -f "$STATE_DIR/freeze-dir.txt" ]; then
PREV=$(cat "$STATE_DIR/freeze-dir.txt")
rm -f "$STATE_DIR/freeze-dir.txt"