diff --git a/CHANGELOG.md b/CHANGELOG.md index 8fc55131a..53063d9f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Changelog +## [1.53.1.0] - 2026-05-30 + +## **Workspace and scripted setup never hang on a hidden prompt again. Installing the plan-tune hooks is now flag-driven with safe defaults.** + +`./setup` asked "Install both hooks now? [y/N]" with a blocking read. Run under a Conductor workspace or any forwarded terminal, that prompt had nobody to answer it, so setup hung forever. Now the decision comes from a flag, an env var, or saved config, and when nobody is there to answer it takes a safe default instead of waiting. A real terminal still gets the prompt, but it is time-bounded (auto-skips after 10s) so it can never stall a pipeline. + +### What this means for you + +- Spinning up a new workspace just works. `bin/dev-setup` runs fully non-interactively and never rewrites your global Claude settings behind your back. +- Want the plan-tune hooks installed without a prompt? `./setup --plan-tune-hooks` (or `GSTACK_PLAN_TUNE_HOOKS=yes`, or `gstack-config set plan_tune_hooks yes`). Don't want them? `--no-plan-tune-hooks`. Leave it unset and a real terminal still asks once, then remembers. + +### Added + +- `--plan-tune-hooks` / `--no-plan-tune-hooks` / `--plan-tune-hooks=yes|no|prompt` flags on `./setup`, plus the `GSTACK_PLAN_TUNE_HOOKS` env var and a `plan_tune_hooks` config key (default `prompt`). Precedence: flag > env > saved config > prompt on a real terminal. + +### Fixed + +- `./setup` no longer hangs in non-interactive or forwarded-TTY contexts (Conductor workspaces, CI). The plan-tune consent prompt is time-bounded and defaults to skip. +- `bin/dev-setup` runs setup non-interactively and can no longer silently rewrite your global `~/.claude/settings.json` to point at an ephemeral workspace path that breaks when the workspace is deleted. +- Opt-in values like `YES`, `Yes`, or ` yes` are honored instead of being silently downgraded to skip, and `gstack-config` now rejects out-of-domain `plan_tune_hooks` values. + +### For contributors + +- New regression suite `test/setup-plan-tune-hooks-noninteractive.test.ts` (flag wiring, no-blocking-read guard, decision normalization, config round-trip + domain rejection, dev-setup pin) with host-config isolation via a temp `GSTACK_HOME`. +- Rebaselined `test/parity-suite.test.ts` from the stale v1.44.1 anchor to v1.53.0.0. The 1.05 per-skill ratio is kept (only the anchor moved), absorbing legitimate v1.49–v1.53 planning-skill growth and clearing the 5 pre-existing parity failures noted in the v1.53.0.0 entry. Historical baselines retained for the v1→v2 audit trail. +- De-flaked `test/plan-tune.test.ts` "derive pushes scope_appetite up" (was ~25–50% flaky, worse on main): it now sets `GSTACK_QUESTION_LOG_NO_DERIVE=1` so gstack-question-log's fire-and-forget background `--derive` can't race the test's explicit one. + ## [1.53.0.0] - 2026-05-29 ## **Secrets, PII, and legal landmines get caught before they reach a public sink. One redaction engine now guards /spec, /ship, /cso, and the /document-* skills.** diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e6ee90c75..e67a307d1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -326,11 +326,13 @@ If you're using [Conductor](https://conductor.build) to run multiple Claude Code | Hook | Script | What it does | |------|--------|-------------| -| `setup` | `bin/dev-setup` | Copies `.env` from main worktree, installs deps, symlinks skills | +| `setup` | `bin/dev-setup` | Copies `.env` from main worktree, installs deps, symlinks skills, runs `./setup` non-interactively | | `archive` | `bin/dev-teardown` | Removes skill symlinks, cleans up `.claude/` directory | When Conductor creates a new workspace, `bin/dev-setup` runs automatically. It detects the main worktree (via `git worktree list`), copies your `.env` so API keys carry over, and sets up dev mode — no manual steps needed. +`bin/dev-setup` runs `./setup` fully non-interactively (it passes `--plan-tune-hooks=prompt` and closes stdin), so a forwarded Conductor TTY can never hang on a hidden setup prompt. It also never installs the plan-tune Claude Code hooks, which means a throwaway workspace can't rewrite your global `~/.claude/settings.json` to point at an ephemeral worktree path. To install the plan-tune hooks deliberately, run `./setup --plan-tune-hooks` outside dev-setup (or `gstack-config set plan_tune_hooks yes`). + **First-time setup:** Put your `ANTHROPIC_API_KEY` in `.env` in the main repo (see `.env.example`). Every Conductor workspace inherits it automatically. **`GSTACK_*` env prefix (Conductor-injected keys).** Conductor explicitly strips `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` from every workspace's process env. The `.env` copy path doesn't restore them either — the strip happens after env inheritance. Users who want paid evals, `/sync-gbrain` embeddings, or `claude-agent-sdk` calls to work in a Conductor workspace must set `GSTACK_ANTHROPIC_API_KEY` and `GSTACK_OPENAI_API_KEY` in Conductor's workspace env config; Conductor passes those through untouched. On the gstack side, TS entry points import `lib/conductor-env-shim.ts` as a side effect, which promotes `GSTACK_FOO_API_KEY` to `FOO_API_KEY` when the canonical name is empty. If you add a new TS entry point that hits a paid API, add `import "../lib/conductor-env-shim";` to the top of the file. Today the shim is imported from `bin/gstack-gbrain-sync.ts`, `bin/gstack-model-benchmark`, `scripts/preflight-agent-sdk.ts`, and `test/helpers/e2e-helpers.ts`. diff --git a/TODOS.md b/TODOS.md index d3c32bc72..113223812 100644 --- a/TODOS.md +++ b/TODOS.md @@ -2,27 +2,22 @@ ## Test infrastructure -### P0: Rebaseline parity-suite (v1.44.1) — stale, 5 pre-existing failures +### ✅ DONE (v1.53.1.0): Rebaseline parity-suite (v1.44.1 → v1.53.0.0) -**What:** `test/parity-suite.test.ts` checks every skill's SKILL.md size against -the frozen `test/fixtures/parity-baseline-v1.44.1.json`. Five planning skills now -exceed the 1.05x ceiling: `plan-ceo-review` (1.052), `plan-eng-review` (1.062), -`plan-design-review` (1.068), `investigate` (1.053), `office-hours` (1.065). +**What:** `test/parity-suite.test.ts` checked every skill's SKILL.md size against +the frozen `test/fixtures/parity-baseline-v1.44.1.json`. Five planning skills had +crept past the 1.05x ceiling: `plan-ceo-review` (1.052), `plan-eng-review` (1.062), +`plan-design-review` (1.068), `investigate` (1.053), `office-hours` (1.065) — growth +from the brain-aware-planning releases (v1.49–v1.52) plus the v1.53 redaction guard. -**Why:** These grew during the brain-aware-planning releases (v1.49–v1.52) which -added the `BRAIN_PREFLIGHT`/`BRAIN_CACHE_REFRESH`/`BRAIN_WRITE_BACK` resolvers to -those skills. The v1.44.1 baseline was never regenerated, so it's four releases -stale. The failures are pre-existing on `origin/main` (proven: they fail with the -redaction branch absent). The active size gate (`skill-size-budget`, v1.47 baseline) -passes, and parity-suite is not in CI's `test:gate`, so nothing is blocked — but the -local `bun test` shows red until rebaselined. - -**How to start:** Either regenerate the fixture to a current baseline -(`bun run scripts/capture-baseline.ts ` and point the test at it), or bump the -per-skill ratio for the planning skills. Decide whether v1.44.1 should be retired in -favor of the v1.47 baseline the size-budget test already uses. - -**Depends on:** nothing. Standalone. +**Resolved:** Captured a fresh baseline at HEAD via +`bun run scripts/capture-baseline.ts --tag v1.53.0.0` and re-pointed the test at +`test/fixtures/parity-baseline-v1.53.0.0.json`. The per-skill 1.05 ratio is kept, so +future bloat is still caught — only the stale anchor moved. Mirrors the earlier +`skill-size-budget` rebase (v1.44.1 → v1.47.0.0). Historical v1.44.1 / v1.46.0.0 / +v1.47.0.0 baselines retained in `test/fixtures/` for the v1→v2 audit trail. The +captured skill bytes match `origin/main` exactly (the rebasing branch left every +SKILL.md untouched). `bun test` is green again. ## gbrowser memory follow-ups (filed via /plan-eng-review + /codex on the v1.49 leak-fix PR) diff --git a/VERSION b/VERSION index b8c5f21a9..69fadfb2d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.53.0.0 +1.53.1.0 diff --git a/bin/dev-setup b/bin/dev-setup index a5bd48275..0d8460f91 100755 --- a/bin/dev-setup +++ b/bin/dev-setup @@ -56,8 +56,23 @@ if [ ! -e "$AGENTS_LINK" ]; then ln -s "$REPO_ROOT" "$AGENTS_LINK" fi -# 6. Run setup via the symlink so it detects .claude/skills/ as its parent -"$GSTACK_LINK/setup" +# 6. Run setup via the symlink so it detects .claude/skills/ as its parent. +# +# Workspace/dev setup MUST be non-interactive: Conductor runs this under a +# forwarded pty, so any `read` in setup (skill-prefix prompt, plan-tune hook +# consent) would hang the workspace forever. Detaching stdin makes every setup +# prompt take its smart non-interactive default (flat skill names, etc.). +# +# `--plan-tune-hooks=prompt` is load-bearing, not redundant: stdin alone only +# suppresses the *prompt* branch. A saved `plan_tune_hooks: yes` or an exported +# GSTACK_PLAN_TUNE_HOOKS=yes would still resolve to "install" and rewrite the +# user's global ~/.claude/settings.json to point at THIS ephemeral worktree — +# which breaks once the workspace is deleted. The flag has highest precedence, +# so it pins resolution to "prompt", and closed stdin then makes prompt-mode a +# no-op skip (no install, no decline marker). A dev workspace must never mutate +# global settings.json. To install the hooks, run `./setup --plan-tune-hooks` +# directly (outside dev-setup). Saved prefix/other config preferences still apply. +"$GSTACK_LINK/setup" --plan-tune-hooks=prompt &2 VALUE="false" fi + if [ "$KEY" = "plan_tune_hooks" ] && [ "$VALUE" != "prompt" ] && [ "$VALUE" != "yes" ] && [ "$VALUE" != "no" ]; then + echo "Warning: plan_tune_hooks '$VALUE' not recognized. Valid values: prompt, yes, no. Using prompt." >&2 + VALUE="prompt" + fi mkdir -p "$STATE_DIR" # Write annotated header on first creation if [ ! -f "$CONFIG_FILE" ]; then @@ -315,7 +331,7 @@ case "${1:-}" in for KEY in proactive routing_declined telemetry auto_upgrade update_check \ skill_prefix checkpoint_mode checkpoint_push explain_level \ codex_reviews gstack_contributor skip_eng_review workspace_root \ - artifacts_sync_mode artifacts_sync_mode_prompted; do + artifacts_sync_mode artifacts_sync_mode_prompted plan_tune_hooks; do VALUE=$(grep -E "^${KEY}:" "$CONFIG_FILE" 2>/dev/null | tail -1 | awk '{print $2}' | tr -d '[:space:]' || true) SOURCE="default" if [ -n "$VALUE" ]; then @@ -331,7 +347,7 @@ case "${1:-}" in for KEY in proactive routing_declined telemetry auto_upgrade update_check \ skill_prefix checkpoint_mode checkpoint_push explain_level \ codex_reviews gstack_contributor skip_eng_review workspace_root \ - artifacts_sync_mode artifacts_sync_mode_prompted; do + artifacts_sync_mode artifacts_sync_mode_prompted plan_tune_hooks; do printf ' %-24s %s\n' "$KEY:" "$(lookup_default "$KEY")" done ;; diff --git a/package.json b/package.json index 75d05e770..65be6147f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gstack", - "version": "1.53.0.0", + "version": "1.53.1.0", "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.", "license": "MIT", "type": "module", diff --git a/setup b/setup index 1fae915a9..9d6453882 100755 --- a/setup +++ b/setup @@ -82,6 +82,7 @@ SKILL_PREFIX=1 SKILL_PREFIX_FLAG=0 TEAM_MODE=0 NO_TEAM_MODE=0 +PLAN_TUNE_HOOKS_MODE="" # "" = resolve from env/config/prompt; "yes"/"no" = explicit while [ $# -gt 0 ]; do case "$1" in --host) [ -z "$2" ] && echo "Missing value for --host (expected claude, codex, kiro, factory, opencode, openclaw, hermes, gbrain, or auto)" >&2 && exit 1; HOST="$2"; shift 2 ;; @@ -91,6 +92,9 @@ while [ $# -gt 0 ]; do --no-prefix) SKILL_PREFIX=0; SKILL_PREFIX_FLAG=1; shift ;; --team) TEAM_MODE=1; shift ;; --no-team) NO_TEAM_MODE=1; shift ;; + --plan-tune-hooks) PLAN_TUNE_HOOKS_MODE="yes"; shift ;; + --no-plan-tune-hooks) PLAN_TUNE_HOOKS_MODE="no"; shift ;; + --plan-tune-hooks=*) PLAN_TUNE_HOOKS_MODE="${1#--plan-tune-hooks=}"; shift ;; -q|--quiet) QUIET=1; shift ;; *) shift ;; esac @@ -1304,14 +1308,65 @@ if [ "$NO_TEAM_MODE" -ne 1 ] \ ALREADY_INSTALLED=1 fi + # Resolve the desired action without ever blocking. + # Priority: CLI flag (--plan-tune-hooks / --no-plan-tune-hooks) + # > env (GSTACK_PLAN_TUNE_HOOKS=yes|no) + # > saved config (plan_tune_hooks) + # > smart default ("prompt" → timed prompt on a real TTY, else skip). + # This guarantees scripted/workspace setups (conductor, CI) are never + # interactive: pass --no-plan-tune-hooks (or --plan-tune-hooks) and the + # block runs to completion with no `read`. + PT_DECISION="$PLAN_TUNE_HOOKS_MODE" + [ -z "$PT_DECISION" ] && PT_DECISION="${GSTACK_PLAN_TUNE_HOOKS:-}" + [ -z "$PT_DECISION" ] && PT_DECISION="$("$GSTACK_CONFIG" get plan_tune_hooks 2>/dev/null || true)" + # Normalize: strip whitespace + lowercase so "YES", "Yes", " yes" from a flag + # or env var all resolve correctly (an unrecognized opt-in must NOT silently + # downgrade to skip). Unknown values fall through to "prompt". + PT_DECISION=$(printf '%s' "$PT_DECISION" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]') + case "$PT_DECISION" in + y|yes|true|install|on|1) PT_DECISION="yes" ;; + n|no|false|skip|off|0) PT_DECISION="no" ;; + *) PT_DECISION="prompt" ;; + esac + + _install_plan_tune_hooks() { + "$SETTINGS_HOOK" add-event \ + --event PostToolUse \ + --matcher '(AskUserQuestion|mcp__.*__AskUserQuestion)' \ + --command "$PLAN_TUNE_LOG_HOOK" \ + --source plan-tune-cathedral \ + --timeout 5 + "$SETTINGS_HOOK" add-event \ + --event PreToolUse \ + --matcher '(AskUserQuestion|mcp__.*__AskUserQuestion)' \ + --command "$PLAN_TUNE_PREF_HOOK" \ + --source plan-tune-cathedral \ + --timeout 5 + } + if [ "$ALREADY_INSTALLED" -eq 1 ]; then log "" log "Plan-tune hooks already installed. Run \`$SETTINGS_HOOK list-sources\` to inspect." + elif [ "$PT_DECISION" = "yes" ]; then + # Explicit opt-in (flag / env / config). Non-interactive. + _install_plan_tune_hooks + log "" + log "Plan-tune hooks installed. Run /plan-tune anytime to inspect." + touch "$PLAN_TUNE_INSTALL_MARKER" + elif [ "$PT_DECISION" = "no" ]; then + # Explicit opt-out (flag / env / config). Non-interactive. + log "" + log "Plan-tune cathedral hooks not installed (opted out)." + log "Install later with: ./setup --plan-tune-hooks (or /update-config)." + touch "$PLAN_TUNE_INSTALL_MARKER" elif [ -f "$PLAN_TUNE_INSTALL_MARKER" ]; then # Previously declined. Don't re-ask. User can re-enable via /update-config. : - elif [ -t 0 ] && [ -t 1 ]; then - # Interactive install with explicit consent + diff preview. + elif [ "$QUIET" -ne 1 ] && [ -t 0 ] && [ -t 1 ]; then + # Real interactive terminal with no recorded preference: ask, with explicit + # consent + diff preview. The read is time-bounded and defaults to "skip" so + # it can never hang an automated/forwarded TTY (the conductor failure mode). + _PT_PROMPT_TIMEOUT=10 # single source of truth for the read + the countdown text log "" log "──────────────────────────────────────────────────────────" log "Plan-tune cathedral: install Claude Code hooks?" @@ -1336,33 +1391,32 @@ if [ "$NO_TEAM_MODE" -ne 1 ] \ log "Backup: settings.json.bak. written before any mutation." log "Rollback: $SETTINGS_HOOK rollback" log "" - printf "Install both hooks now? [y/N] " - read -r PLAN_TUNE_INSTALL_REPLY - if [ "$PLAN_TUNE_INSTALL_REPLY" = "y" ] || [ "$PLAN_TUNE_INSTALL_REPLY" = "Y" ]; then - "$SETTINGS_HOOK" add-event \ - --event PostToolUse \ - --matcher '(AskUserQuestion|mcp__.*__AskUserQuestion)' \ - --command "$PLAN_TUNE_LOG_HOOK" \ - --source plan-tune-cathedral \ - --timeout 5 - "$SETTINGS_HOOK" add-event \ - --event PreToolUse \ - --matcher '(AskUserQuestion|mcp__.*__AskUserQuestion)' \ - --command "$PLAN_TUNE_PREF_HOOK" \ - --source plan-tune-cathedral \ - --timeout 5 - log "" - log "Plan-tune hooks installed. Run /plan-tune anytime to inspect." - else - log "" - log "Skipped. Re-run ./setup or use /update-config to install later." - fi - touch "$PLAN_TUNE_INSTALL_MARKER" + printf "Install both hooks now? [y/N] (default: N, auto-skips in %ss): " "$_PT_PROMPT_TIMEOUT" + read -t "$_PT_PROMPT_TIMEOUT" -r PLAN_TUNE_INSTALL_REPLY /dev/null || PLAN_TUNE_INSTALL_REPLY="" + case "$PLAN_TUNE_INSTALL_REPLY" in + y|Y) + _install_plan_tune_hooks + log "" + log "Plan-tune hooks installed. Run /plan-tune anytime to inspect." + touch "$PLAN_TUNE_INSTALL_MARKER" + ;; + n|N) + log "" + log "Skipped. Re-run ./setup --plan-tune-hooks or use /update-config to install later." + touch "$PLAN_TUNE_INSTALL_MARKER" + ;; + *) + # Empty / timed out — treat as "ask me again" (don't persist a decline). + log "" + log "No response — skipped for now. Re-run ./setup --plan-tune-hooks to install." + ;; + esac else - # Non-interactive (CI, scripted setup). Don't prompt; print one-liner. + # Non-interactive (CI, scripted/workspace setup, quiet). Never prompt. log "" log "Plan-tune cathedral hooks not installed (non-interactive setup)." - log "Install with:" + log "Install with: ./setup --plan-tune-hooks" + log " (or set GSTACK_PLAN_TUNE_HOOKS=yes, or run the commands below)" log " $SETTINGS_HOOK add-event --event PostToolUse \\" log " --matcher '(AskUserQuestion|mcp__.*__AskUserQuestion)' \\" log " --command $PLAN_TUNE_LOG_HOOK --source plan-tune-cathedral --timeout 5" diff --git a/test/fixtures/parity-baseline-v1.53.0.0.json b/test/fixtures/parity-baseline-v1.53.0.0.json new file mode 100644 index 000000000..d3736bcc8 --- /dev/null +++ b/test/fixtures/parity-baseline-v1.53.0.0.json @@ -0,0 +1,633 @@ +{ + "tag": "v1.53.0.0", + "capturedAt": "2026-05-30T18:00:56.209Z", + "capturedFromCommit": "352f6a57", + "capturedFromBranch": "garrytan/setup-plan-tune-hooks-flags", + "totalSkills": 52, + "totalCorpusBytes": 3179282, + "estTotalCatalogTokens": 4116, + "topHeaviest": [ + { + "skill": "ship", + "skillMdBytes": 170491, + "skillMdLines": 3153, + "estTokens": 42623, + "tmplBytes": 53240, + "descriptionLen": 291, + "hasGateEval": true, + "hasPeriodicEval": true + }, + { + "skill": "plan-ceo-review", + "skillMdBytes": 137751, + "skillMdLines": 2290, + "estTokens": 34438, + "tmplBytes": 63461, + "descriptionLen": 794, + "hasGateEval": true, + "hasPeriodicEval": true + }, + { + "skill": "office-hours", + "skillMdBytes": 118280, + "skillMdLines": 2161, + "estTokens": 29570, + "tmplBytes": 55534, + "descriptionLen": 860, + "hasGateEval": true, + "hasPeriodicEval": false + }, + { + "skill": "plan-design-review", + "skillMdBytes": 112728, + "skillMdLines": 2019, + "estTokens": 28182, + "tmplBytes": 28717, + "descriptionLen": 218, + "hasGateEval": true, + "hasPeriodicEval": true + }, + { + "skill": "plan-devex-review", + "skillMdBytes": 111292, + "skillMdLines": 2212, + "estTokens": 27823, + "tmplBytes": 35773, + "descriptionLen": 250, + "hasGateEval": true, + "hasPeriodicEval": true + }, + { + "skill": "spec", + "skillMdBytes": 109688, + "skillMdLines": 2239, + "estTokens": 27422, + "tmplBytes": 30590, + "descriptionLen": 282, + "hasGateEval": true, + "hasPeriodicEval": false + }, + { + "skill": "plan-eng-review", + "skillMdBytes": 107655, + "skillMdLines": 1849, + "estTokens": 26914, + "tmplBytes": 26302, + "descriptionLen": 231, + "hasGateEval": true, + "hasPeriodicEval": true + }, + { + "skill": "design-review", + "skillMdBytes": 96618, + "skillMdLines": 1936, + "estTokens": 24155, + "tmplBytes": 11674, + "descriptionLen": 304, + "hasGateEval": true, + "hasPeriodicEval": false + }, + { + "skill": "review", + "skillMdBytes": 95012, + "skillMdLines": 1766, + "estTokens": 23753, + "tmplBytes": 14099, + "descriptionLen": 205, + "hasGateEval": true, + "hasPeriodicEval": false + }, + { + "skill": "land-and-deploy", + "skillMdBytes": 92850, + "skillMdLines": 1860, + "estTokens": 23213, + "tmplBytes": 48624, + "descriptionLen": 160, + "hasGateEval": true, + "hasPeriodicEval": false + } + ], + "skills": { + "autoplan": { + "skill": "autoplan", + "skillMdBytes": 91834, + "skillMdLines": 1788, + "estTokens": 22959, + "tmplBytes": 45271, + "descriptionLen": 366, + "hasGateEval": true, + "hasPeriodicEval": true + }, + "benchmark": { + "skill": "benchmark", + "skillMdBytes": 33266, + "skillMdLines": 747, + "estTokens": 8317, + "tmplBytes": 9378, + "descriptionLen": 213, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "benchmark-models": { + "skill": "benchmark-models", + "skillMdBytes": 29333, + "skillMdLines": 622, + "estTokens": 7333, + "tmplBytes": 6631, + "descriptionLen": 217, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "browse": { + "skill": "browse", + "skillMdBytes": 48151, + "skillMdLines": 930, + "estTokens": 12038, + "tmplBytes": 10805, + "descriptionLen": 181, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "canary": { + "skill": "canary", + "skillMdBytes": 48069, + "skillMdLines": 994, + "estTokens": 12017, + "tmplBytes": 8033, + "descriptionLen": 180, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "careful": { + "skill": "careful", + "skillMdBytes": 2551, + "skillMdLines": 68, + "estTokens": 638, + "tmplBytes": 2435, + "descriptionLen": 315, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "codex": { + "skill": "codex", + "skillMdBytes": 80584, + "skillMdLines": 1523, + "estTokens": 20146, + "tmplBytes": 34143, + "descriptionLen": 187, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "context-restore": { + "skill": "context-restore", + "skillMdBytes": 42457, + "skillMdLines": 852, + "estTokens": 10614, + "tmplBytes": 5255, + "descriptionLen": 238, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "context-save": { + "skill": "context-save", + "skillMdBytes": 46654, + "skillMdLines": 970, + "estTokens": 11664, + "tmplBytes": 9293, + "descriptionLen": 168, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "cso": { + "skill": "cso", + "skillMdBytes": 78849, + "skillMdLines": 1462, + "estTokens": 19712, + "tmplBytes": 35646, + "descriptionLen": 196, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "design-consultation": { + "skill": "design-consultation", + "skillMdBytes": 80186, + "skillMdLines": 1565, + "estTokens": 20047, + "tmplBytes": 25899, + "descriptionLen": 888, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "design-html": { + "skill": "design-html", + "skillMdBytes": 67511, + "skillMdLines": 1453, + "estTokens": 16878, + "tmplBytes": 22567, + "descriptionLen": 233, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "design-review": { + "skill": "design-review", + "skillMdBytes": 96618, + "skillMdLines": 1936, + "estTokens": 24155, + "tmplBytes": 11674, + "descriptionLen": 304, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "design-shotgun": { + "skill": "design-shotgun", + "skillMdBytes": 63800, + "skillMdLines": 1315, + "estTokens": 15950, + "tmplBytes": 13331, + "descriptionLen": 786, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "devex-review": { + "skill": "devex-review", + "skillMdBytes": 65377, + "skillMdLines": 1237, + "estTokens": 16344, + "tmplBytes": 7984, + "descriptionLen": 201, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "document-generate": { + "skill": "document-generate", + "skillMdBytes": 54797, + "skillMdLines": 1194, + "estTokens": 13699, + "tmplBytes": 15939, + "descriptionLen": 334, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "document-release": { + "skill": "document-release", + "skillMdBytes": 59827, + "skillMdLines": 1248, + "estTokens": 14957, + "tmplBytes": 20974, + "descriptionLen": 192, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "freeze": { + "skill": "freeze", + "skillMdBytes": 3154, + "skillMdLines": 92, + "estTokens": 789, + "tmplBytes": 3038, + "descriptionLen": 503, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "gstack-upgrade": { + "skill": "gstack-upgrade", + "skillMdBytes": 10817, + "skillMdLines": 285, + "estTokens": 2704, + "tmplBytes": 10667, + "descriptionLen": 163, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "guard": { + "skill": "guard", + "skillMdBytes": 3297, + "skillMdLines": 91, + "estTokens": 824, + "tmplBytes": 3181, + "descriptionLen": 686, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "health": { + "skill": "health", + "skillMdBytes": 48880, + "skillMdLines": 1018, + "estTokens": 12220, + "tmplBytes": 11617, + "descriptionLen": 184, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "investigate": { + "skill": "investigate", + "skillMdBytes": 51373, + "skillMdLines": 1016, + "estTokens": 12843, + "tmplBytes": 11561, + "descriptionLen": 1379, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "ios-clean": { + "skill": "ios-clean", + "skillMdBytes": 42009, + "skillMdLines": 817, + "estTokens": 10502, + "tmplBytes": 3851, + "descriptionLen": 252, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "ios-design-review": { + "skill": "ios-design-review", + "skillMdBytes": 42595, + "skillMdLines": 819, + "estTokens": 10649, + "tmplBytes": 4417, + "descriptionLen": 209, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "ios-fix": { + "skill": "ios-fix", + "skillMdBytes": 41724, + "skillMdLines": 815, + "estTokens": 10431, + "tmplBytes": 3574, + "descriptionLen": 187, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "ios-qa": { + "skill": "ios-qa", + "skillMdBytes": 48235, + "skillMdLines": 935, + "estTokens": 12059, + "tmplBytes": 10090, + "descriptionLen": 223, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "ios-sync": { + "skill": "ios-sync", + "skillMdBytes": 41701, + "skillMdLines": 808, + "estTokens": 10425, + "tmplBytes": 3544, + "descriptionLen": 269, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "land-and-deploy": { + "skill": "land-and-deploy", + "skillMdBytes": 92850, + "skillMdLines": 1860, + "estTokens": 23213, + "tmplBytes": 48624, + "descriptionLen": 160, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "landing-report": { + "skill": "landing-report", + "skillMdBytes": 44949, + "skillMdLines": 878, + "estTokens": 11237, + "tmplBytes": 6806, + "descriptionLen": 195, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "learn": { + "skill": "learn", + "skillMdBytes": 42686, + "skillMdLines": 895, + "estTokens": 10672, + "tmplBytes": 5594, + "descriptionLen": 178, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "make-pdf": { + "skill": "make-pdf", + "skillMdBytes": 29890, + "skillMdLines": 670, + "estTokens": 7473, + "tmplBytes": 5546, + "descriptionLen": 177, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "office-hours": { + "skill": "office-hours", + "skillMdBytes": 118280, + "skillMdLines": 2161, + "estTokens": 29570, + "tmplBytes": 55534, + "descriptionLen": 860, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "open-gstack-browser": { + "skill": "open-gstack-browser", + "skillMdBytes": 47095, + "skillMdLines": 958, + "estTokens": 11774, + "tmplBytes": 7702, + "descriptionLen": 204, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "pair-agent": { + "skill": "pair-agent", + "skillMdBytes": 47903, + "skillMdLines": 1014, + "estTokens": 11976, + "tmplBytes": 8548, + "descriptionLen": 167, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "plan-ceo-review": { + "skill": "plan-ceo-review", + "skillMdBytes": 137751, + "skillMdLines": 2290, + "estTokens": 34438, + "tmplBytes": 63461, + "descriptionLen": 794, + "hasGateEval": true, + "hasPeriodicEval": true + }, + "plan-design-review": { + "skill": "plan-design-review", + "skillMdBytes": 112728, + "skillMdLines": 2019, + "estTokens": 28182, + "tmplBytes": 28717, + "descriptionLen": 218, + "hasGateEval": true, + "hasPeriodicEval": true + }, + "plan-devex-review": { + "skill": "plan-devex-review", + "skillMdBytes": 111292, + "skillMdLines": 2212, + "estTokens": 27823, + "tmplBytes": 35773, + "descriptionLen": 250, + "hasGateEval": true, + "hasPeriodicEval": true + }, + "plan-eng-review": { + "skill": "plan-eng-review", + "skillMdBytes": 107655, + "skillMdLines": 1849, + "estTokens": 26914, + "tmplBytes": 26302, + "descriptionLen": 231, + "hasGateEval": true, + "hasPeriodicEval": true + }, + "plan-tune": { + "skill": "plan-tune", + "skillMdBytes": 64017, + "skillMdLines": 1355, + "estTokens": 16004, + "tmplBytes": 26922, + "descriptionLen": 325, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "qa": { + "skill": "qa", + "skillMdBytes": 74827, + "skillMdLines": 1626, + "estTokens": 18707, + "tmplBytes": 12701, + "descriptionLen": 218, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "qa-only": { + "skill": "qa-only", + "skillMdBytes": 57385, + "skillMdLines": 1198, + "estTokens": 14346, + "tmplBytes": 3851, + "descriptionLen": 165, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "retro": { + "skill": "retro", + "skillMdBytes": 83853, + "skillMdLines": 1754, + "estTokens": 20963, + "tmplBytes": 42427, + "descriptionLen": 648, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "review": { + "skill": "review", + "skillMdBytes": 95012, + "skillMdLines": 1766, + "estTokens": 23753, + "tmplBytes": 14099, + "descriptionLen": 205, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "scrape": { + "skill": "scrape", + "skillMdBytes": 44605, + "skillMdLines": 891, + "estTokens": 11151, + "tmplBytes": 5220, + "descriptionLen": 167, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "setup-browser-cookies": { + "skill": "setup-browser-cookies", + "skillMdBytes": 26618, + "skillMdLines": 594, + "estTokens": 6655, + "tmplBytes": 2724, + "descriptionLen": 222, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "setup-deploy": { + "skill": "setup-deploy", + "skillMdBytes": 44891, + "skillMdLines": 923, + "estTokens": 11223, + "tmplBytes": 7780, + "descriptionLen": 197, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "setup-gbrain": { + "skill": "setup-gbrain", + "skillMdBytes": 81964, + "skillMdLines": 1777, + "estTokens": 20491, + "tmplBytes": 44851, + "descriptionLen": 323, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "ship": { + "skill": "ship", + "skillMdBytes": 170491, + "skillMdLines": 3153, + "estTokens": 42623, + "tmplBytes": 53240, + "descriptionLen": 291, + "hasGateEval": true, + "hasPeriodicEval": true + }, + "skillify": { + "skill": "skillify", + "skillMdBytes": 54498, + "skillMdLines": 1172, + "estTokens": 13625, + "tmplBytes": 15107, + "descriptionLen": 233, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "spec": { + "skill": "spec", + "skillMdBytes": 109688, + "skillMdLines": 2239, + "estTokens": 27422, + "tmplBytes": 30590, + "descriptionLen": 282, + "hasGateEval": true, + "hasPeriodicEval": false + }, + "sync-gbrain": { + "skill": "sync-gbrain", + "skillMdBytes": 53201, + "skillMdLines": 1070, + "estTokens": 13300, + "tmplBytes": 16077, + "descriptionLen": 299, + "hasGateEval": false, + "hasPeriodicEval": false + }, + "unfreeze": { + "skill": "unfreeze", + "skillMdBytes": 1504, + "skillMdLines": 49, + "estTokens": 376, + "tmplBytes": 1386, + "descriptionLen": 199, + "hasGateEval": false, + "hasPeriodicEval": false + } + } +} diff --git a/test/parity-suite.test.ts b/test/parity-suite.test.ts index 9d6da4868..32ce49f12 100644 --- a/test/parity-suite.test.ts +++ b/test/parity-suite.test.ts @@ -2,9 +2,16 @@ * Cathedral parity suite — gate-tier (free, structural + content checks). * * Runs every PARITY_INVARIANTS check against the current SKILL.md output - * vs the v1.44.1 baseline. Failures get an actionable, per-skill report + * vs the v1.53.0.0 baseline. Failures get an actionable, per-skill report * showing missing phrases, missing headings, and size ratios. * + * Baseline rebased v1.44.1 → v1.53.0.0: the brain-aware-planning releases + * (v1.49–v1.52) plus the v1.53 redaction guard pushed five planning skills + * past the 5% ratchet on the frozen v1.44.1 anchor. Rebasing absorbs that + * legitimate growth at HEAD while keeping the per-skill 1.05 ratio so future + * bloat is still caught. Historical v1.44.1 / v1.46.0.0 / v1.47.0.0 baselines + * are retained in test/fixtures/ for the v1→v2 audit trail. + * * Periodic-tier LLM-judge parity (paid) lands in Phase B (v2.0.0.0) * alongside the sections/ extraction. Plumbing is in parity-harness.ts. */ @@ -16,9 +23,9 @@ import { runParityChecks, PARITY_INVARIANTS } from './helpers/parity-harness'; import type { ParityBaseline } from './helpers/capture-parity-baseline'; const REPO_ROOT = path.resolve(import.meta.dir, '..'); -const BASELINE_PATH = path.join(REPO_ROOT, 'test', 'fixtures', 'parity-baseline-v1.44.1.json'); +const BASELINE_PATH = path.join(REPO_ROOT, 'test', 'fixtures', 'parity-baseline-v1.53.0.0.json'); -describe('parity suite vs v1.44.1 baseline (gate, free)', () => { +describe('parity suite vs v1.53.0.0 baseline (gate, free)', () => { test('baseline exists', () => { expect(fs.existsSync(BASELINE_PATH)).toBe(true); }); @@ -43,7 +50,7 @@ describe('parity suite vs v1.44.1 baseline (gate, free)', () => { .map(d => ` ${d.skill}:\n - ${d.failures.join('\n - ')}`) .join('\n'); throw new Error( - `${report.failed} skill(s) failed parity checks vs v1.44.1:\n${failureMessages}`, + `${report.failed} skill(s) failed parity checks vs ${baseline.tag}:\n${failureMessages}`, ); }); }); diff --git a/test/plan-tune.test.ts b/test/plan-tune.test.ts index 9e83a0b4e..40a1465b6 100644 --- a/test/plan-tune.test.ts +++ b/test/plan-tune.test.ts @@ -535,7 +535,15 @@ describe('end-to-end pipeline (binaries working together)', () => { test('log many expand choices → derive pushes scope_appetite up', () => { const tmpHome = fs.mkdtempSync(path.join(require('os').tmpdir(), 'gstack-e2e-')); try { - const env = { ...process.env, GSTACK_HOME: tmpHome }; + // GSTACK_QUESTION_LOG_NO_DERIVE=1 suppresses gstack-question-log's + // fire-and-forget background `--derive` (it nohups one per write). Without + // it, the 5 rapid log writes spawn 5 racing background derives that collide + // with this test's explicit --derive below — a late background derive that + // only saw 3 entries can clobber developer-profile.json after the explicit + // one wrote sample_size=5, making the test flaky (~25-50% fail). The binary + // documents this flag for exactly this case. The explicit --derive still + // runs (it ignores the flag), so real derive behavior is still asserted. + const env = { ...process.env, GSTACK_HOME: tmpHome, GSTACK_QUESTION_LOG_NO_DERIVE: '1' }; const { spawnSync } = require('child_process'); const logBin = path.join(ROOT, 'bin', 'gstack-question-log'); const devBin = path.join(ROOT, 'bin', 'gstack-developer-profile'); diff --git a/test/setup-plan-tune-hooks-noninteractive.test.ts b/test/setup-plan-tune-hooks-noninteractive.test.ts new file mode 100644 index 000000000..9a0f03ded --- /dev/null +++ b/test/setup-plan-tune-hooks-noninteractive.test.ts @@ -0,0 +1,123 @@ +import { describe, test, expect, beforeAll, afterAll } from 'bun:test'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { execSync } from 'child_process'; + +// Regression guard for the conductor/workspace setup hang: +// `./setup` used a blocking `read -r` to ask "Install both hooks now? [y/N]". +// When setup runs under a forwarded/automated TTY (conductor workspace setup, +// CI with a pty) the read blocked forever. The fix moves the decision into +// flags + env + saved config with a non-blocking, time-bounded prompt fallback. +// +// These are static + binary-level assertions (free, <1s) — they lock in the +// contract without running the full (environment-mutating) setup script. + +const ROOT = path.resolve(import.meta.dir, '..'); +const SETUP = path.join(ROOT, 'setup'); +const GSTACK_CONFIG = path.join(ROOT, 'bin', 'gstack-config'); + +const setupSrc = fs.readFileSync(SETUP, 'utf-8'); + +describe('setup: plan-tune hooks are non-interactive-safe', () => { + test('exposes --plan-tune-hooks / --no-plan-tune-hooks / =value flags', () => { + expect(setupSrc).toContain('--plan-tune-hooks)'); + expect(setupSrc).toContain('--no-plan-tune-hooks)'); + expect(setupSrc).toContain('--plan-tune-hooks=*)'); + }); + + test('resolution falls through env then saved config', () => { + expect(setupSrc).toContain('GSTACK_PLAN_TUNE_HOOKS'); + expect(setupSrc).toContain('get plan_tune_hooks'); + }); + + test('explicit yes/no decisions never reach a prompt', () => { + // The yes/no branches must short-circuit before the interactive branch. + const yesIdx = setupSrc.indexOf('PT_DECISION" = "yes"'); + const noIdx = setupSrc.indexOf('PT_DECISION" = "no"'); + const promptIdx = setupSrc.indexOf('Install both hooks now?'); + expect(yesIdx).toBeGreaterThan(-1); + expect(noIdx).toBeGreaterThan(-1); + expect(yesIdx).toBeLessThan(promptIdx); + expect(noIdx).toBeLessThan(promptIdx); + }); + + test('the interactive prompt is time-bounded (cannot hang)', () => { + // No bare blocking read for the plan-tune reply. + expect(setupSrc).not.toMatch(/read -r PLAN_TUNE_INSTALL_REPLY\b/); + // It must use a timed read from the controlling tty with an empty fallback. + // The timeout may be a literal or a named variable (e.g. "$_PT_PROMPT_TIMEOUT"). + expect(setupSrc).toMatch(/read -t (?:\d+|"?\$\{?\w+\}?"?) -r PLAN_TUNE_INSTALL_REPLY <\/dev\/tty/); + }); + + test('interactive prompt is gated on a real TTY and non-quiet', () => { + // The prompt branch requires both stdin+stdout TTYs and not --quiet. + expect(setupSrc).toMatch(/\[ "\$QUIET" -ne 1 \] && \[ -t 0 \] && \[ -t 1 \]/); + }); + + test('decision input is normalized (lowercase + whitespace-stripped)', () => { + // "YES" / " yes" from a flag/env must not silently downgrade to skip. + expect(setupSrc).toMatch(/tr '\[:upper:\]' '\[:lower:\]'/); + expect(setupSrc).toMatch(/PT_DECISION=\$\(printf .* tr/); + }); +}); + +describe('dev-setup: never silently mutates global settings.json', () => { + const DEV_SETUP = path.join(ROOT, 'bin', 'dev-setup'); + const devSetupSrc = fs.readFileSync(DEV_SETUP, 'utf-8'); + + test('runs setup with stdin detached AND --plan-tune-hooks=prompt pin', () => { + // stdin alone only suppresses the prompt branch; the flag (highest + // precedence) is what stops a saved `plan_tune_hooks: yes` / env opt-in + // from rewriting global hooks to the ephemeral worktree path. + expect(devSetupSrc).toMatch(/setup" --plan-tune-hooks=prompt <\/dev\/null/); + }); +}); + +describe('gstack-config: plan_tune_hooks key', () => { + // Isolate state: gstack-config reads $GSTACK_HOME/config.yaml. Point it at a + // fresh temp dir so `get` returns the built-in default rather than whatever + // the host machine has in ~/.gstack/config.yaml (which would make the + // default-value assertion non-deterministic). + let tmpHome: string; + let env: NodeJS.ProcessEnv; + + beforeAll(() => { + tmpHome = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-cfg-test-')); + env = { ...process.env, GSTACK_HOME: tmpHome }; + }); + + afterAll(() => { + fs.rmSync(tmpHome, { recursive: true, force: true }); + }); + + test('default is "prompt"', () => { + const out = execSync(`${GSTACK_CONFIG} get plan_tune_hooks`, { + encoding: 'utf-8', + env, + }).trim(); + expect(out).toBe('prompt'); + }); + + test('appears in defaults and list output', () => { + const defaults = execSync(`${GSTACK_CONFIG} defaults`, { encoding: 'utf-8', env }); + expect(defaults).toContain('plan_tune_hooks'); + const list = execSync(`${GSTACK_CONFIG} list`, { encoding: 'utf-8', env }); + expect(list).toContain('plan_tune_hooks'); + }); + + test('accepts valid values (round-trips yes/no/prompt)', () => { + for (const v of ['yes', 'no', 'prompt']) { + execSync(`${GSTACK_CONFIG} set plan_tune_hooks ${v}`, { encoding: 'utf-8', env }); + const got = execSync(`${GSTACK_CONFIG} get plan_tune_hooks`, { encoding: 'utf-8', env }).trim(); + expect(got).toBe(v); + } + }); + + test('rejects out-of-domain values (warns + falls back to prompt)', () => { + const res = execSync(`${GSTACK_CONFIG} set plan_tune_hooks maybe 2>&1`, { encoding: 'utf-8', env }); + expect(res.toLowerCase()).toContain('not recognized'); + const got = execSync(`${GSTACK_CONFIG} get plan_tune_hooks`, { encoding: 'utf-8', env }).trim(); + expect(got).toBe('prompt'); + }); +});