diff --git a/CHANGELOG.md b/CHANGELOG.md index 56620db7..acbc55cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # Changelog +## [0.11.20.0] - 2026-03-26 + +### Added + +- **GitLab support for `/retro` and `/ship`.** You can now run `/ship` on GitLab repos — it creates merge requests via `glab mr create` instead of `gh pr create`. `/retro` detects default branches on both platforms. All 11 skills using `BASE_BRANCH_DETECT` automatically get GitHub, GitLab, and git-native fallback detection. +- **GitHub Enterprise and self-hosted GitLab detection.** If the remote URL doesn't match `github.com` or `gitlab`, gstack checks `gh auth status` / `glab auth status` to detect authenticated platforms — no manual config needed. +- **`/document-release` works on GitLab.** After `/ship` creates a merge request, the auto-invoked `/document-release` reads and updates the MR body via `glab` instead of failing silently. +- **GitLab safety gate for `/land-and-deploy`.** Instead of silently failing on GitLab repos, `/land-and-deploy` now stops early with a clear message that GitLab merge support is not yet implemented. + +### Fixed + +- **Deduplicated gen-skill-docs resolvers.** The template generator had duplicate inline resolver functions that shadowed the modular versions, causing generated SKILL.md files to miss recent resolver updates. + ## [0.11.19.0] - 2026-03-24 ### Fixed diff --git a/SKILL.md b/SKILL.md index f6d2831e..5f8d0f33 100644 --- a/SKILL.md +++ b/SKILL.md @@ -27,9 +27,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -47,8 +49,11 @@ echo '{"skill":"gstack","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basen for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -97,112 +102,44 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. -## AskUserQuestion Format +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: -**ALWAYS follow this structure for every AskUserQuestion call:** -1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences) -2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. -3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. -4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. -Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself -Per-skill instructions may add additional formatting rules on top of this baseline. +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` -## Completeness Principle — Boil the Lake - -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: - -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: - -| Task type | Human team | CC+gstack | Compression | -|-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +Always run: ```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true +touch ~/.gstack/.proactive-prompted ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/TODOS.md b/TODOS.md index 1c4b88ed..3ee995b6 100644 --- a/TODOS.md +++ b/TODOS.md @@ -168,6 +168,18 @@ Linux cookie import shipped in v0.11.11.0 (Wave 3). Supports Chrome, Chromium, B ## Ship +### GitLab support for /land-and-deploy + +**What:** Add GitLab MR merge + CI polling support to `/land-and-deploy` skill. Currently uses `gh pr view`, `gh pr checks`, `gh pr merge`, and `gh run list/view` in 15+ places — each needs a GitLab conditional path using `glab ci status`, `glab mr merge`, etc. + +**Why:** Without this, GitLab users can `/ship` (create MR) but can't `/land-and-deploy` (merge + verify). Completes the GitLab story end-to-end. + +**Context:** `/retro`, `/ship`, and `/document-release` now support GitLab via the multi-platform `BASE_BRANCH_DETECT` resolver. `/land-and-deploy` has deeper GitHub-specific semantics (merge queues, required checks via `gh pr checks`, deploy workflow polling) that have different shapes on GitLab. The `glab` CLI (v1.90.0) supports `glab mr merge`, `glab ci status`, `glab ci view` but with different output formats and no merge queue concept. + +**Effort:** L +**Priority:** P2 +**Depends on:** None (BASE_BRANCH_DETECT multi-platform resolver is already done) + ### Ship log — persistent record of /ship runs **What:** Append structured JSON entry to `.gstack/ship-log.json` at end of every /ship run (version, date, branch, PR URL, review findings, Greptile stats, todos completed, test results). diff --git a/VERSION b/VERSION index d20322e5..508c698a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.11.19.0 +0.11.20.0 diff --git a/autoplan/SKILL.md b/autoplan/SKILL.md index 14874900..d69fc285 100644 --- a/autoplan/SKILL.md +++ b/autoplan/SKILL.md @@ -36,9 +36,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -56,8 +58,11 @@ echo '{"skill":"autoplan","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(bas for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -106,6 +111,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -113,7 +139,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -121,97 +146,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +## Repo Ownership — See Something, Say Something -## Repo Ownership Mode — See Something, Say Something +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -302,22 +284,42 @@ Then write a `## GSTACK REVIEW REPORT` section to the end of the plan file: file you are allowed to edit in plan mode. The plan file review report is part of the plan's living status. -## Step 0: Detect base branch +## Step 0: Detect platform and base branch -Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps. +First, detect the git hosting platform from the remote URL: -1. Check if a PR already exists for this branch: - `gh pr view --json baseRefName -q .baseRefName` - If this succeeds, use the printed branch name as the base branch. +```bash +git remote get-url origin 2>/dev/null +``` -2. If no PR exists (command fails), detect the repo's default branch: - `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` +- If the URL contains "github.com" → platform is **GitHub** +- If the URL contains "gitlab" → platform is **GitLab** +- Otherwise, check CLI availability: + - `gh auth status 2>/dev/null` succeeds → platform is **GitHub** (covers GitHub Enterprise) + - `glab auth status 2>/dev/null` succeeds → platform is **GitLab** (covers self-hosted) + - Neither → **unknown** (use git-native commands only) -3. If both commands fail, fall back to `main`. +Determine which branch this PR/MR targets, or the repo's default branch if no +PR/MR exists. Use the result as "the base branch" in all subsequent steps. + +**If GitHub:** +1. `gh pr view --json baseRefName -q .baseRefName` — if succeeds, use it +2. `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` — if succeeds, use it + +**If GitLab:** +1. `glab mr view -F json 2>/dev/null` and extract the `target_branch` field — if succeeds, use it +2. `glab repo view -F json 2>/dev/null` and extract the `default_branch` field — if succeeds, use it + +**Git-native fallback (if unknown platform, or CLI commands fail):** +1. `git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'` +2. If that fails: `git rev-parse --verify origin/main 2>/dev/null` → use `main` +3. If that fails: `git rev-parse --verify origin/master 2>/dev/null` → use `master` + +If all fail, fall back to `main`. Print the detected base branch name. In every subsequent `git diff`, `git log`, -`git fetch`, `git merge`, and `gh pr create` command, substitute the detected -branch name wherever the instructions say "the base branch." +`git fetch`, `git merge`, and PR/MR creation command, substitute the detected +branch name wherever the instructions say "the base branch" or ``. --- diff --git a/benchmark/SKILL.md b/benchmark/SKILL.md index d9138a03..d6d65ae2 100644 --- a/benchmark/SKILL.md +++ b/benchmark/SKILL.md @@ -29,9 +29,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -49,8 +51,11 @@ echo '{"skill":"benchmark","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(ba for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -99,112 +104,44 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. -## AskUserQuestion Format +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: -**ALWAYS follow this structure for every AskUserQuestion call:** -1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences) -2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. -3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. -4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. -Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself -Per-skill instructions may add additional formatting rules on top of this baseline. +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` -## Completeness Principle — Boil the Lake - -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: - -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: - -| Task type | Human team | CC+gstack | Compression | -|-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +Always run: ```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true +touch ~/.gstack/.proactive-prompted ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/browse/SKILL.md b/browse/SKILL.md index 91845a99..c52dcaa5 100644 --- a/browse/SKILL.md +++ b/browse/SKILL.md @@ -29,9 +29,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -49,8 +51,11 @@ echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basen for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -99,112 +104,44 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. -## AskUserQuestion Format +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: -**ALWAYS follow this structure for every AskUserQuestion call:** -1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences) -2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. -3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. -4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. -Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself -Per-skill instructions may add additional formatting rules on top of this baseline. +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` -## Completeness Principle — Boil the Lake - -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: - -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: - -| Task type | Human team | CC+gstack | Compression | -|-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +Always run: ```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true +touch ~/.gstack/.proactive-prompted ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/canary/SKILL.md b/canary/SKILL.md index fe889c74..08903c71 100644 --- a/canary/SKILL.md +++ b/canary/SKILL.md @@ -29,9 +29,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -49,8 +51,11 @@ echo '{"skill":"canary","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basen for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -99,6 +104,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -106,7 +132,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -114,97 +139,36 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -314,22 +278,42 @@ If `NEEDS_SETUP`: 2. Run: `cd && ./setup` 3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash` -## Step 0: Detect base branch +## Step 0: Detect platform and base branch -Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps. +First, detect the git hosting platform from the remote URL: -1. Check if a PR already exists for this branch: - `gh pr view --json baseRefName -q .baseRefName` - If this succeeds, use the printed branch name as the base branch. +```bash +git remote get-url origin 2>/dev/null +``` -2. If no PR exists (command fails), detect the repo's default branch: - `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` +- If the URL contains "github.com" → platform is **GitHub** +- If the URL contains "gitlab" → platform is **GitLab** +- Otherwise, check CLI availability: + - `gh auth status 2>/dev/null` succeeds → platform is **GitHub** (covers GitHub Enterprise) + - `glab auth status 2>/dev/null` succeeds → platform is **GitLab** (covers self-hosted) + - Neither → **unknown** (use git-native commands only) -3. If both commands fail, fall back to `main`. +Determine which branch this PR/MR targets, or the repo's default branch if no +PR/MR exists. Use the result as "the base branch" in all subsequent steps. + +**If GitHub:** +1. `gh pr view --json baseRefName -q .baseRefName` — if succeeds, use it +2. `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` — if succeeds, use it + +**If GitLab:** +1. `glab mr view -F json 2>/dev/null` and extract the `target_branch` field — if succeeds, use it +2. `glab repo view -F json 2>/dev/null` and extract the `default_branch` field — if succeeds, use it + +**Git-native fallback (if unknown platform, or CLI commands fail):** +1. `git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'` +2. If that fails: `git rev-parse --verify origin/main 2>/dev/null` → use `main` +3. If that fails: `git rev-parse --verify origin/master 2>/dev/null` → use `master` + +If all fail, fall back to `main`. Print the detected base branch name. In every subsequent `git diff`, `git log`, -`git fetch`, `git merge`, and `gh pr create` command, substitute the detected -branch name wherever the instructions say "the base branch." +`git fetch`, `git merge`, and PR/MR creation command, substitute the detected +branch name wherever the instructions say "the base branch" or ``. --- diff --git a/codex/SKILL.md b/codex/SKILL.md index 8bce22e5..6e19cd04 100644 --- a/codex/SKILL.md +++ b/codex/SKILL.md @@ -30,9 +30,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -50,8 +52,11 @@ echo '{"skill":"codex","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basena for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -100,6 +105,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -107,7 +133,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -115,97 +140,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +## Repo Ownership — See Something, Say Something -## Repo Ownership Mode — See Something, Say Something +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -296,22 +278,42 @@ Then write a `## GSTACK REVIEW REPORT` section to the end of the plan file: file you are allowed to edit in plan mode. The plan file review report is part of the plan's living status. -## Step 0: Detect base branch +## Step 0: Detect platform and base branch -Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps. +First, detect the git hosting platform from the remote URL: -1. Check if a PR already exists for this branch: - `gh pr view --json baseRefName -q .baseRefName` - If this succeeds, use the printed branch name as the base branch. +```bash +git remote get-url origin 2>/dev/null +``` -2. If no PR exists (command fails), detect the repo's default branch: - `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` +- If the URL contains "github.com" → platform is **GitHub** +- If the URL contains "gitlab" → platform is **GitLab** +- Otherwise, check CLI availability: + - `gh auth status 2>/dev/null` succeeds → platform is **GitHub** (covers GitHub Enterprise) + - `glab auth status 2>/dev/null` succeeds → platform is **GitLab** (covers self-hosted) + - Neither → **unknown** (use git-native commands only) -3. If both commands fail, fall back to `main`. +Determine which branch this PR/MR targets, or the repo's default branch if no +PR/MR exists. Use the result as "the base branch" in all subsequent steps. + +**If GitHub:** +1. `gh pr view --json baseRefName -q .baseRefName` — if succeeds, use it +2. `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` — if succeeds, use it + +**If GitLab:** +1. `glab mr view -F json 2>/dev/null` and extract the `target_branch` field — if succeeds, use it +2. `glab repo view -F json 2>/dev/null` and extract the `default_branch` field — if succeeds, use it + +**Git-native fallback (if unknown platform, or CLI commands fail):** +1. `git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'` +2. If that fails: `git rev-parse --verify origin/main 2>/dev/null` → use `main` +3. If that fails: `git rev-parse --verify origin/master 2>/dev/null` → use `master` + +If all fail, fall back to `main`. Print the detected base branch name. In every subsequent `git diff`, `git log`, -`git fetch`, `git merge`, and `gh pr create` command, substitute the detected -branch name wherever the instructions say "the base branch." +`git fetch`, `git merge`, and PR/MR creation command, substitute the detected +branch name wherever the instructions say "the base branch" or ``. --- diff --git a/cso/SKILL.md b/cso/SKILL.md index c023e1eb..3f092fd6 100644 --- a/cso/SKILL.md +++ b/cso/SKILL.md @@ -33,9 +33,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -53,8 +55,11 @@ echo '{"skill":"cso","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -103,6 +108,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -110,7 +136,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -118,97 +143,36 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/design-consultation/SKILL.md b/design-consultation/SKILL.md index e265f26d..68cdd346 100644 --- a/design-consultation/SKILL.md +++ b/design-consultation/SKILL.md @@ -34,9 +34,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -54,8 +56,11 @@ echo '{"skill":"design-consultation","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","re for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -104,6 +109,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -111,7 +137,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -119,97 +144,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +## Repo Ownership — See Something, Say Something -## Repo Ownership Mode — See Something, Say Something +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -454,7 +436,7 @@ codex exec "Given this product context, propose a complete design direction: - Differentiation: 2 deliberate departures from category norms - Anti-slop: no purple gradients, no 3-column icon grids, no centered everything, no decorative blobs -Be opinionated. Be specific. Do not hedge. This is YOUR design direction — own it." -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_DESIGN" +Be opinionated. Be specific. Do not hedge. This is YOUR design direction — own it." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_DESIGN" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash diff --git a/design-review/SKILL.md b/design-review/SKILL.md index 38341033..5ebc9d1f 100644 --- a/design-review/SKILL.md +++ b/design-review/SKILL.md @@ -34,9 +34,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -54,8 +56,11 @@ echo '{"skill":"design-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"' for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -104,6 +109,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -111,7 +137,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -119,97 +144,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +## Repo Ownership — See Something, Say Something -## Repo Ownership Mode — See Something, Say Something +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -733,7 +715,7 @@ The test: would a human designer at a respected studio ever ship this? **10. Performance as Design** (6 items) - LCP < 2.0s (web apps), < 1.5s (informational sites) - CLS < 0.1 (no visible layout shifts during load) -- Skeleton quality: shapes match real content, shimmer animation +- Skeleton quality: shapes match real content layout, shimmer animation - Images: `loading="lazy"`, width/height dimensions set, WebP/AVIF format - Fonts: `font-display: swap`, preconnect to CDN origins - No visible font swap flash (FOUT) — critical fonts preloaded @@ -994,7 +976,7 @@ HARD REJECTION — flag if ANY apply: 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout -Be specific. Reference file:line for every finding." -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DESIGN" +Be specific. Reference file:line for every finding." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DESIGN" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash diff --git a/document-release/SKILL.md b/document-release/SKILL.md index 1364e4d9..ee08867a 100644 --- a/document-release/SKILL.md +++ b/document-release/SKILL.md @@ -31,9 +31,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -51,8 +53,11 @@ echo '{"skill":"document-release","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo" for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -101,6 +106,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -108,7 +134,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -116,97 +141,36 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -297,22 +261,42 @@ Then write a `## GSTACK REVIEW REPORT` section to the end of the plan file: file you are allowed to edit in plan mode. The plan file review report is part of the plan's living status. -## Step 0: Detect base branch +## Step 0: Detect platform and base branch -Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps. +First, detect the git hosting platform from the remote URL: -1. Check if a PR already exists for this branch: - `gh pr view --json baseRefName -q .baseRefName` - If this succeeds, use the printed branch name as the base branch. +```bash +git remote get-url origin 2>/dev/null +``` -2. If no PR exists (command fails), detect the repo's default branch: - `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` +- If the URL contains "github.com" → platform is **GitHub** +- If the URL contains "gitlab" → platform is **GitLab** +- Otherwise, check CLI availability: + - `gh auth status 2>/dev/null` succeeds → platform is **GitHub** (covers GitHub Enterprise) + - `glab auth status 2>/dev/null` succeeds → platform is **GitLab** (covers self-hosted) + - Neither → **unknown** (use git-native commands only) -3. If both commands fail, fall back to `main`. +Determine which branch this PR/MR targets, or the repo's default branch if no +PR/MR exists. Use the result as "the base branch" in all subsequent steps. + +**If GitHub:** +1. `gh pr view --json baseRefName -q .baseRefName` — if succeeds, use it +2. `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` — if succeeds, use it + +**If GitLab:** +1. `glab mr view -F json 2>/dev/null` and extract the `target_branch` field — if succeeds, use it +2. `glab repo view -F json 2>/dev/null` and extract the `default_branch` field — if succeeds, use it + +**Git-native fallback (if unknown platform, or CLI commands fail):** +1. `git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'` +2. If that fails: `git rev-parse --verify origin/main 2>/dev/null` → use `main` +3. If that fails: `git rev-parse --verify origin/master 2>/dev/null` → use `master` + +If all fail, fall back to `main`. Print the detected base branch name. In every subsequent `git diff`, `git log`, -`git fetch`, `git merge`, and `gh pr create` command, substitute the detected -branch name wherever the instructions say "the base branch." +`git fetch`, `git merge`, and PR/MR creation command, substitute the detected +branch name wherever the instructions say "the base branch" or ``. --- @@ -585,14 +569,20 @@ EOF git push ``` -**PR body update (idempotent, race-safe):** +**PR/MR body update (idempotent, race-safe):** -1. Read the existing PR body into a PID-unique tempfile: +1. Read the existing PR/MR body into a PID-unique tempfile (use the platform detected in Step 0): +**If GitHub:** ```bash gh pr view --json body -q .body > /tmp/gstack-pr-body-$$.md ``` +**If GitLab:** +```bash +glab mr view -F json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('description',''))" > /tmp/gstack-pr-body-$$.md +``` + 2. If the tempfile already contains a `## Documentation` section, replace that section with the updated content. If it does not contain one, append a `## Documentation` section at the end. @@ -602,18 +592,28 @@ gh pr view --json body -q .body > /tmp/gstack-pr-body-$$.md 4. Write the updated body back: +**If GitHub:** ```bash gh pr edit --body-file /tmp/gstack-pr-body-$$.md ``` +**If GitLab:** +Read the contents of `/tmp/gstack-pr-body-$$.md` using the Read tool, then pass it to `glab mr update` using a heredoc to avoid shell metacharacter issues: +```bash +glab mr update -d "$(cat <<'MRBODY' + +MRBODY +)" +``` + 5. Clean up the tempfile: ```bash rm -f /tmp/gstack-pr-body-$$.md ``` -6. If `gh pr view` fails (no PR exists): skip with message "No PR found — skipping body update." -7. If `gh pr edit` fails: warn "Could not update PR body — documentation changes are in the +6. If `gh pr view` / `glab mr view` fails (no PR/MR exists): skip with message "No PR/MR found — skipping body update." +7. If `gh pr edit` / `glab mr update` fails: warn "Could not update PR/MR body — documentation changes are in the commit." and continue. **Structured doc health summary (final output):** diff --git a/document-release/SKILL.md.tmpl b/document-release/SKILL.md.tmpl index 30cdee0c..5d236ae2 100644 --- a/document-release/SKILL.md.tmpl +++ b/document-release/SKILL.md.tmpl @@ -291,14 +291,20 @@ EOF git push ``` -**PR body update (idempotent, race-safe):** +**PR/MR body update (idempotent, race-safe):** -1. Read the existing PR body into a PID-unique tempfile: +1. Read the existing PR/MR body into a PID-unique tempfile (use the platform detected in Step 0): +**If GitHub:** ```bash gh pr view --json body -q .body > /tmp/gstack-pr-body-$$.md ``` +**If GitLab:** +```bash +glab mr view -F json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('description',''))" > /tmp/gstack-pr-body-$$.md +``` + 2. If the tempfile already contains a `## Documentation` section, replace that section with the updated content. If it does not contain one, append a `## Documentation` section at the end. @@ -308,18 +314,28 @@ gh pr view --json body -q .body > /tmp/gstack-pr-body-$$.md 4. Write the updated body back: +**If GitHub:** ```bash gh pr edit --body-file /tmp/gstack-pr-body-$$.md ``` +**If GitLab:** +Read the contents of `/tmp/gstack-pr-body-$$.md` using the Read tool, then pass it to `glab mr update` using a heredoc to avoid shell metacharacter issues: +```bash +glab mr update -d "$(cat <<'MRBODY' + +MRBODY +)" +``` + 5. Clean up the tempfile: ```bash rm -f /tmp/gstack-pr-body-$$.md ``` -6. If `gh pr view` fails (no PR exists): skip with message "No PR found — skipping body update." -7. If `gh pr edit` fails: warn "Could not update PR body — documentation changes are in the +6. If `gh pr view` / `glab mr view` fails (no PR/MR exists): skip with message "No PR/MR found — skipping body update." +7. If `gh pr edit` / `glab mr update` fails: warn "Could not update PR/MR body — documentation changes are in the commit." and continue. **Structured doc health summary (final output):** diff --git a/investigate/SKILL.md b/investigate/SKILL.md index b1df5ca2..4d1cb933 100644 --- a/investigate/SKILL.md +++ b/investigate/SKILL.md @@ -45,9 +45,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -65,8 +67,11 @@ echo '{"skill":"investigate","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$( for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -115,6 +120,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -122,7 +148,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -130,97 +155,36 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/land-and-deploy/SKILL.md b/land-and-deploy/SKILL.md index 85e52e4e..131c1f2d 100644 --- a/land-and-deploy/SKILL.md +++ b/land-and-deploy/SKILL.md @@ -28,9 +28,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -48,8 +50,11 @@ echo '{"skill":"land-and-deploy","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo": for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -98,6 +103,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -105,7 +131,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -113,97 +138,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +## Repo Ownership — See Something, Say Something -## Repo Ownership Mode — See Something, Say Something +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -313,25 +295,47 @@ If `NEEDS_SETUP`: 2. Run: `cd && ./setup` 3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash` -## Step 0: Detect base branch +## Step 0: Detect platform and base branch -Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps. +First, detect the git hosting platform from the remote URL: -1. Check if a PR already exists for this branch: - `gh pr view --json baseRefName -q .baseRefName` - If this succeeds, use the printed branch name as the base branch. +```bash +git remote get-url origin 2>/dev/null +``` -2. If no PR exists (command fails), detect the repo's default branch: - `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` +- If the URL contains "github.com" → platform is **GitHub** +- If the URL contains "gitlab" → platform is **GitLab** +- Otherwise, check CLI availability: + - `gh auth status 2>/dev/null` succeeds → platform is **GitHub** (covers GitHub Enterprise) + - `glab auth status 2>/dev/null` succeeds → platform is **GitLab** (covers self-hosted) + - Neither → **unknown** (use git-native commands only) -3. If both commands fail, fall back to `main`. +Determine which branch this PR/MR targets, or the repo's default branch if no +PR/MR exists. Use the result as "the base branch" in all subsequent steps. + +**If GitHub:** +1. `gh pr view --json baseRefName -q .baseRefName` — if succeeds, use it +2. `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` — if succeeds, use it + +**If GitLab:** +1. `glab mr view -F json 2>/dev/null` and extract the `target_branch` field — if succeeds, use it +2. `glab repo view -F json 2>/dev/null` and extract the `default_branch` field — if succeeds, use it + +**Git-native fallback (if unknown platform, or CLI commands fail):** +1. `git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'` +2. If that fails: `git rev-parse --verify origin/main 2>/dev/null` → use `main` +3. If that fails: `git rev-parse --verify origin/master 2>/dev/null` → use `master` + +If all fail, fall back to `main`. Print the detected base branch name. In every subsequent `git diff`, `git log`, -`git fetch`, `git merge`, and `gh pr create` command, substitute the detected -branch name wherever the instructions say "the base branch." +`git fetch`, `git merge`, and PR/MR creation command, substitute the detected +branch name wherever the instructions say "the base branch" or ``. --- +**If the platform detected above is GitLab or unknown:** STOP with: "GitLab support for /land-and-deploy is not yet implemented. Run `/ship` to create the MR, then merge manually via the GitLab web UI." Do not proceed. + # /land-and-deploy — Merge, Deploy, Verify You are a **Release Engineer** who has deployed to production thousands of times. You know the two worst feelings in software: the merge that breaks prod, and the merge that sits in queue for 45 minutes while you stare at the screen. Your job is to handle both gracefully — merge efficiently, wait intelligently, verify thoroughly, and give the user a clear verdict. diff --git a/land-and-deploy/SKILL.md.tmpl b/land-and-deploy/SKILL.md.tmpl index a82a75a2..7fcf6797 100644 --- a/land-and-deploy/SKILL.md.tmpl +++ b/land-and-deploy/SKILL.md.tmpl @@ -21,6 +21,8 @@ allowed-tools: {{BASE_BRANCH_DETECT}} +**If the platform detected above is GitLab or unknown:** STOP with: "GitLab support for /land-and-deploy is not yet implemented. Run `/ship` to create the MR, then merge manually via the GitLab web UI." Do not proceed. + # /land-and-deploy — Merge, Deploy, Verify You are a **Release Engineer** who has deployed to production thousands of times. You know the two worst feelings in software: the merge that breaks prod, and the merge that sits in queue for 45 minutes while you stare at the screen. Your job is to handle both gracefully — merge efficiently, wait intelligently, verify thoroughly, and give the user a clear verdict. diff --git a/office-hours/SKILL.md b/office-hours/SKILL.md index 8bf43efa..9e2debd4 100644 --- a/office-hours/SKILL.md +++ b/office-hours/SKILL.md @@ -36,9 +36,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -56,8 +58,11 @@ echo '{"skill":"office-hours","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -106,6 +111,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -113,7 +139,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -121,97 +146,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +## Repo Ownership — See Something, Say Something -## Repo Ownership Mode — See Something, Say Something +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -688,7 +670,7 @@ Write the full prompt (context block + instructions) to this file. Use the mode- ```bash TMPERR_OH=$(mktemp /tmp/codex-oh-err-XXXXXXXX) -codex exec "$(cat "$CODEX_PROMPT_FILE")" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_OH" +codex exec "$(cat "$CODEX_PROMPT_FILE")" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_OH" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: @@ -839,7 +821,7 @@ If user chooses A, launch both voices simultaneously: 1. **Codex** (via Bash, `model_reasoning_effort="medium"`): ```bash TMPERR_SKETCH=$(mktemp /tmp/codex-sketch-XXXXXXXX) -codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH" +codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH" ``` Use a 5-minute timeout (`timeout: 300000`). After completion: `cat "$TMPERR_SKETCH" && rm -f "$TMPERR_SKETCH"` diff --git a/package.json b/package.json index f666c9af..130af28f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gstack", - "version": "0.11.19.0", + "version": "0.11.20.0", "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.", "license": "MIT", "type": "module", diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md index e5d4af6a..d05be05f 100644 --- a/plan-ceo-review/SKILL.md +++ b/plan-ceo-review/SKILL.md @@ -34,9 +34,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -54,8 +56,11 @@ echo '{"skill":"plan-ceo-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo": for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -104,6 +109,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -111,7 +137,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -119,97 +144,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +## Repo Ownership — See Something, Say Something -## Repo Ownership Mode — See Something, Say Something +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -300,22 +282,42 @@ Then write a `## GSTACK REVIEW REPORT` section to the end of the plan file: file you are allowed to edit in plan mode. The plan file review report is part of the plan's living status. -## Step 0: Detect base branch +## Step 0: Detect platform and base branch -Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps. +First, detect the git hosting platform from the remote URL: -1. Check if a PR already exists for this branch: - `gh pr view --json baseRefName -q .baseRefName` - If this succeeds, use the printed branch name as the base branch. +```bash +git remote get-url origin 2>/dev/null +``` -2. If no PR exists (command fails), detect the repo's default branch: - `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` +- If the URL contains "github.com" → platform is **GitHub** +- If the URL contains "gitlab" → platform is **GitLab** +- Otherwise, check CLI availability: + - `gh auth status 2>/dev/null` succeeds → platform is **GitHub** (covers GitHub Enterprise) + - `glab auth status 2>/dev/null` succeeds → platform is **GitLab** (covers self-hosted) + - Neither → **unknown** (use git-native commands only) -3. If both commands fail, fall back to `main`. +Determine which branch this PR/MR targets, or the repo's default branch if no +PR/MR exists. Use the result as "the base branch" in all subsequent steps. + +**If GitHub:** +1. `gh pr view --json baseRefName -q .baseRefName` — if succeeds, use it +2. `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` — if succeeds, use it + +**If GitLab:** +1. `glab mr view -F json 2>/dev/null` and extract the `target_branch` field — if succeeds, use it +2. `glab repo view -F json 2>/dev/null` and extract the `default_branch` field — if succeeds, use it + +**Git-native fallback (if unknown platform, or CLI commands fail):** +1. `git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'` +2. If that fails: `git rev-parse --verify origin/main 2>/dev/null` → use `main` +3. If that fails: `git rev-parse --verify origin/master 2>/dev/null` → use `master` + +If all fail, fall back to `main`. Print the detected base branch name. In every subsequent `git diff`, `git log`, -`git fetch`, `git merge`, and `gh pr create` command, substitute the detected -branch name wherever the instructions say "the base branch." +`git fetch`, `git merge`, and PR/MR creation command, substitute the detected +branch name wherever the instructions say "the base branch" or ``. --- @@ -1045,7 +1047,7 @@ THE PLAN: ```bash TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX) -codex exec "" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_PV" +codex exec "" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_PV" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: diff --git a/plan-design-review/SKILL.md b/plan-design-review/SKILL.md index 9b45e8c8..5960ea18 100644 --- a/plan-design-review/SKILL.md +++ b/plan-design-review/SKILL.md @@ -32,9 +32,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -52,8 +54,11 @@ echo '{"skill":"plan-design-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","rep for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -102,6 +107,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -109,7 +135,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -117,97 +142,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +## Repo Ownership — See Something, Say Something -## Repo Ownership Mode — See Something, Say Something +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -298,22 +280,42 @@ Then write a `## GSTACK REVIEW REPORT` section to the end of the plan file: file you are allowed to edit in plan mode. The plan file review report is part of the plan's living status. -## Step 0: Detect base branch +## Step 0: Detect platform and base branch -Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps. +First, detect the git hosting platform from the remote URL: -1. Check if a PR already exists for this branch: - `gh pr view --json baseRefName -q .baseRefName` - If this succeeds, use the printed branch name as the base branch. +```bash +git remote get-url origin 2>/dev/null +``` -2. If no PR exists (command fails), detect the repo's default branch: - `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` +- If the URL contains "github.com" → platform is **GitHub** +- If the URL contains "gitlab" → platform is **GitLab** +- Otherwise, check CLI availability: + - `gh auth status 2>/dev/null` succeeds → platform is **GitHub** (covers GitHub Enterprise) + - `glab auth status 2>/dev/null` succeeds → platform is **GitLab** (covers self-hosted) + - Neither → **unknown** (use git-native commands only) -3. If both commands fail, fall back to `main`. +Determine which branch this PR/MR targets, or the repo's default branch if no +PR/MR exists. Use the result as "the base branch" in all subsequent steps. + +**If GitHub:** +1. `gh pr view --json baseRefName -q .baseRefName` — if succeeds, use it +2. `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` — if succeeds, use it + +**If GitLab:** +1. `glab mr view -F json 2>/dev/null` and extract the `target_branch` field — if succeeds, use it +2. `glab repo view -F json 2>/dev/null` and extract the `default_branch` field — if succeeds, use it + +**Git-native fallback (if unknown platform, or CLI commands fail):** +1. `git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'` +2. If that fails: `git rev-parse --verify origin/main 2>/dev/null` → use `main` +3. If that fails: `git rev-parse --verify origin/master 2>/dev/null` → use `master` + +If all fail, fall back to `main`. Print the detected base branch name. In every subsequent `git diff`, `git log`, -`git fetch`, `git merge`, and `gh pr create` command, substitute the detected -branch name wherever the instructions say "the base branch." +`git fetch`, `git merge`, and PR/MR creation command, substitute the detected +branch name wherever the instructions say "the base branch" or ``. --- @@ -468,7 +470,7 @@ HARD RULES — first classify as MARKETING/LANDING PAGE vs APP UI vs HYBRID, the - APP UI: Calm surface hierarchy, dense but readable, utility language, minimal chrome - UNIVERSAL: CSS variables for colors, no default font stacks, one job per section, cards earn existence -For each finding: what's wrong, what will happen if it ships unresolved, and the specific fix. Be opinionated. No hedging." -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DESIGN" +For each finding: what's wrong, what will happen if it ships unresolved, and the specific fix. Be opinionated. No hedging." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DESIGN" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md index 53bf7112..0b61d5f6 100644 --- a/plan-eng-review/SKILL.md +++ b/plan-eng-review/SKILL.md @@ -33,9 +33,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -53,8 +55,11 @@ echo '{"skill":"plan-eng-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo": for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -103,6 +108,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -110,7 +136,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -118,97 +143,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +## Repo Ownership — See Something, Say Something -## Repo Ownership Mode — See Something, Say Something +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -723,7 +705,7 @@ THE PLAN: ```bash TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX) -codex exec "" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_PV" +codex exec "" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_PV" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: diff --git a/qa-only/SKILL.md b/qa-only/SKILL.md index 6736211e..1129d52a 100644 --- a/qa-only/SKILL.md +++ b/qa-only/SKILL.md @@ -29,9 +29,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -49,8 +51,11 @@ echo '{"skill":"qa-only","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(base for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -99,6 +104,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -106,7 +132,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -114,97 +139,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +## Repo Ownership — See Something, Say Something -## Repo Ownership Mode — See Something, Say Something +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/qa/SKILL.md b/qa/SKILL.md index 290c89af..af9279c5 100644 --- a/qa/SKILL.md +++ b/qa/SKILL.md @@ -35,9 +35,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -55,8 +57,11 @@ echo '{"skill":"qa","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -105,6 +110,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -112,7 +138,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -120,97 +145,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +## Repo Ownership — See Something, Say Something -## Repo Ownership Mode — See Something, Say Something +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -301,22 +283,42 @@ Then write a `## GSTACK REVIEW REPORT` section to the end of the plan file: file you are allowed to edit in plan mode. The plan file review report is part of the plan's living status. -## Step 0: Detect base branch +## Step 0: Detect platform and base branch -Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps. +First, detect the git hosting platform from the remote URL: -1. Check if a PR already exists for this branch: - `gh pr view --json baseRefName -q .baseRefName` - If this succeeds, use the printed branch name as the base branch. +```bash +git remote get-url origin 2>/dev/null +``` -2. If no PR exists (command fails), detect the repo's default branch: - `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` +- If the URL contains "github.com" → platform is **GitHub** +- If the URL contains "gitlab" → platform is **GitLab** +- Otherwise, check CLI availability: + - `gh auth status 2>/dev/null` succeeds → platform is **GitHub** (covers GitHub Enterprise) + - `glab auth status 2>/dev/null` succeeds → platform is **GitLab** (covers self-hosted) + - Neither → **unknown** (use git-native commands only) -3. If both commands fail, fall back to `main`. +Determine which branch this PR/MR targets, or the repo's default branch if no +PR/MR exists. Use the result as "the base branch" in all subsequent steps. + +**If GitHub:** +1. `gh pr view --json baseRefName -q .baseRefName` — if succeeds, use it +2. `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` — if succeeds, use it + +**If GitLab:** +1. `glab mr view -F json 2>/dev/null` and extract the `target_branch` field — if succeeds, use it +2. `glab repo view -F json 2>/dev/null` and extract the `default_branch` field — if succeeds, use it + +**Git-native fallback (if unknown platform, or CLI commands fail):** +1. `git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'` +2. If that fails: `git rev-parse --verify origin/main 2>/dev/null` → use `main` +3. If that fails: `git rev-parse --verify origin/master 2>/dev/null` → use `master` + +If all fail, fall back to `main`. Print the detected base branch name. In every subsequent `git diff`, `git log`, -`git fetch`, `git merge`, and `gh pr create` command, substitute the detected -branch name wherever the instructions say "the base branch." +`git fetch`, `git merge`, and PR/MR creation command, substitute the detected +branch name wherever the instructions say "the base branch" or ``. --- diff --git a/retro/SKILL.md b/retro/SKILL.md index 806ffde3..8741fb30 100644 --- a/retro/SKILL.md +++ b/retro/SKILL.md @@ -29,9 +29,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -49,8 +51,11 @@ echo '{"skill":"retro","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basena for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -99,6 +104,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -106,7 +132,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -114,97 +139,36 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -295,13 +259,42 @@ Then write a `## GSTACK REVIEW REPORT` section to the end of the plan file: file you are allowed to edit in plan mode. The plan file review report is part of the plan's living status. -## Detect default branch +## Step 0: Detect platform and base branch -Before gathering data, detect the repo's default branch name: -`gh repo view --json defaultBranchRef -q .defaultBranchRef.name` +First, detect the git hosting platform from the remote URL: -If this fails, fall back to `main`. Use the detected name wherever the instructions -say `origin/` below. +```bash +git remote get-url origin 2>/dev/null +``` + +- If the URL contains "github.com" → platform is **GitHub** +- If the URL contains "gitlab" → platform is **GitLab** +- Otherwise, check CLI availability: + - `gh auth status 2>/dev/null` succeeds → platform is **GitHub** (covers GitHub Enterprise) + - `glab auth status 2>/dev/null` succeeds → platform is **GitLab** (covers self-hosted) + - Neither → **unknown** (use git-native commands only) + +Determine which branch this PR/MR targets, or the repo's default branch if no +PR/MR exists. Use the result as "the base branch" in all subsequent steps. + +**If GitHub:** +1. `gh pr view --json baseRefName -q .baseRefName` — if succeeds, use it +2. `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` — if succeeds, use it + +**If GitLab:** +1. `glab mr view -F json 2>/dev/null` and extract the `target_branch` field — if succeeds, use it +2. `glab repo view -F json 2>/dev/null` and extract the `default_branch` field — if succeeds, use it + +**Git-native fallback (if unknown platform, or CLI commands fail):** +1. `git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'` +2. If that fails: `git rev-parse --verify origin/main 2>/dev/null` → use `main` +3. If that fails: `git rev-parse --verify origin/master 2>/dev/null` → use `master` + +If all fail, fall back to `main`. + +Print the detected base branch name. In every subsequent `git diff`, `git log`, +`git fetch`, `git merge`, and PR/MR creation command, substitute the detected +branch name wherever the instructions say "the base branch" or ``. --- @@ -372,8 +365,8 @@ git log origin/ --since="" --format="%at|%aN|%ai|%s" | sort -n # 4. Files most frequently changed (hotspot analysis) git log origin/ --since="" --format="" --name-only | grep -v '^$' | sort | uniq -c | sort -rn -# 5. PR numbers from commit messages (extract #NNN patterns) -git log origin/ --since="" --format="%s" | grep -oE '#[0-9]+' | sed 's/^#//' | sort -n | uniq | sed 's/^/#/' +# 5. PR/MR numbers from commit messages (GitHub #NNN, GitLab !NNN) +git log origin/ --since="" --format="%s" | grep -oE '[#!][0-9]+' | sort -t'#' -k1 | uniq # 6. Per-author file hotspots (who touches what) git log origin/ --since="" --format="AUTHOR:%aN" --name-only @@ -866,8 +859,8 @@ git -C log origin/$DEFAULT --since="T00:00:00" --format="%at| # Per-author commit counts git -C shortlog origin/$DEFAULT --since="T00:00:00" -sn --no-merges -# PR numbers from commit messages -git -C log origin/$DEFAULT --since="T00:00:00" --format="%s" | grep -oE '#[0-9]+' | sort -n | uniq +# PR/MR numbers from commit messages (GitHub #NNN, GitLab !NNN) +git -C log origin/$DEFAULT --since="T00:00:00" --format="%s" | grep -oE '[#!][0-9]+' | sort -t'#' -k1 | uniq ``` For repos that fail (deleted paths, network errors): skip and note "N repos could not be reached." @@ -945,7 +938,7 @@ align cleanly. Never truncate project names. ║ • [1-line description of second theme] ║ • [1-line description of third theme] ║ -║ Powered by gstack · github.com/garrytan/gstack +║ Powered by gstack ╚═══════════════════════════════════════════════════════════════ ``` @@ -1074,7 +1067,7 @@ Use the Write tool to save JSON to `~/.gstack/retros/global-${today}-${next}.jso "projects": [ { "name": "gstack", - "remote": "https://github.com/garrytan/gstack", + "remote": "", "commits": 47, "insertions": 3200, "deletions": 800, diff --git a/retro/SKILL.md.tmpl b/retro/SKILL.md.tmpl index dae967ef..cc4f53fa 100644 --- a/retro/SKILL.md.tmpl +++ b/retro/SKILL.md.tmpl @@ -18,15 +18,7 @@ allowed-tools: {{PREAMBLE}} -## Detect default branch - -Before gathering data, detect the repo's default branch name: -`gh repo view --json defaultBranchRef -q .defaultBranchRef.name` - -If this fails, fall back to `main`. Use the detected name wherever the instructions -say `origin/` below. - ---- +{{BASE_BRANCH_DETECT}} # /retro — Weekly Engineering Retrospective @@ -95,8 +87,8 @@ git log origin/ --since="" --format="%at|%aN|%ai|%s" | sort -n # 4. Files most frequently changed (hotspot analysis) git log origin/ --since="" --format="" --name-only | grep -v '^$' | sort | uniq -c | sort -rn -# 5. PR numbers from commit messages (extract #NNN patterns) -git log origin/ --since="" --format="%s" | grep -oE '#[0-9]+' | sed 's/^#//' | sort -n | uniq | sed 's/^/#/' +# 5. PR/MR numbers from commit messages (GitHub #NNN, GitLab !NNN) +git log origin/ --since="" --format="%s" | grep -oE '[#!][0-9]+' | sort -t'#' -k1 | uniq # 6. Per-author file hotspots (who touches what) git log origin/ --since="" --format="AUTHOR:%aN" --name-only @@ -589,8 +581,8 @@ git -C log origin/$DEFAULT --since="T00:00:00" --format="%at| # Per-author commit counts git -C shortlog origin/$DEFAULT --since="T00:00:00" -sn --no-merges -# PR numbers from commit messages -git -C log origin/$DEFAULT --since="T00:00:00" --format="%s" | grep -oE '#[0-9]+' | sort -n | uniq +# PR/MR numbers from commit messages (GitHub #NNN, GitLab !NNN) +git -C log origin/$DEFAULT --since="T00:00:00" --format="%s" | grep -oE '[#!][0-9]+' | sort -t'#' -k1 | uniq ``` For repos that fail (deleted paths, network errors): skip and note "N repos could not be reached." @@ -668,7 +660,7 @@ align cleanly. Never truncate project names. ║ • [1-line description of second theme] ║ • [1-line description of third theme] ║ -║ Powered by gstack · github.com/garrytan/gstack +║ Powered by gstack ╚═══════════════════════════════════════════════════════════════ ``` @@ -797,7 +789,7 @@ Use the Write tool to save JSON to `~/.gstack/retros/global-${today}-${next}.jso "projects": [ { "name": "gstack", - "remote": "https://github.com/garrytan/gstack", + "remote": "", "commits": 47, "insertions": 3200, "deletions": 800, diff --git a/review/SKILL.md b/review/SKILL.md index a58e8627..3c28ed6c 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -32,9 +32,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -52,8 +54,11 @@ echo '{"skill":"review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basen for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -102,6 +107,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -109,7 +135,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -117,97 +142,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +## Repo Ownership — See Something, Say Something -## Repo Ownership Mode — See Something, Say Something +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -298,22 +280,42 @@ Then write a `## GSTACK REVIEW REPORT` section to the end of the plan file: file you are allowed to edit in plan mode. The plan file review report is part of the plan's living status. -## Step 0: Detect base branch +## Step 0: Detect platform and base branch -Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps. +First, detect the git hosting platform from the remote URL: -1. Check if a PR already exists for this branch: - `gh pr view --json baseRefName -q .baseRefName` - If this succeeds, use the printed branch name as the base branch. +```bash +git remote get-url origin 2>/dev/null +``` -2. If no PR exists (command fails), detect the repo's default branch: - `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` +- If the URL contains "github.com" → platform is **GitHub** +- If the URL contains "gitlab" → platform is **GitLab** +- Otherwise, check CLI availability: + - `gh auth status 2>/dev/null` succeeds → platform is **GitHub** (covers GitHub Enterprise) + - `glab auth status 2>/dev/null` succeeds → platform is **GitLab** (covers self-hosted) + - Neither → **unknown** (use git-native commands only) -3. If both commands fail, fall back to `main`. +Determine which branch this PR/MR targets, or the repo's default branch if no +PR/MR exists. Use the result as "the base branch" in all subsequent steps. + +**If GitHub:** +1. `gh pr view --json baseRefName -q .baseRefName` — if succeeds, use it +2. `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` — if succeeds, use it + +**If GitLab:** +1. `glab mr view -F json 2>/dev/null` and extract the `target_branch` field — if succeeds, use it +2. `glab repo view -F json 2>/dev/null` and extract the `default_branch` field — if succeeds, use it + +**Git-native fallback (if unknown platform, or CLI commands fail):** +1. `git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'` +2. If that fails: `git rev-parse --verify origin/main 2>/dev/null` → use `main` +3. If that fails: `git rev-parse --verify origin/master 2>/dev/null` → use `master` + +If all fail, fall back to `main`. Print the detected base branch name. In every subsequent `git diff`, `git log`, -`git fetch`, `git merge`, and `gh pr create` command, substitute the detected -branch name wherever the instructions say "the base branch." +`git fetch`, `git merge`, and PR/MR creation command, substitute the detected +branch name wherever the instructions say "the base branch" or ``. --- @@ -573,7 +575,7 @@ If Codex is available, run a lightweight design check on the diff: ```bash TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX) -codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line." -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" +codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: @@ -762,6 +764,21 @@ If no test framework detected → include gaps as INFORMATIONAL findings only, n **Diff is test-only changes:** Skip Step 4.75 entirely: "No new application code paths to audit." +### Coverage Warning + +After producing the coverage diagram, check the coverage percentage. Read CLAUDE.md for a `## Test Coverage` section with a `Minimum:` field. If not found, use default: 60%. + +If coverage is below the minimum threshold, output a prominent warning **before** the regular review findings: + +``` +⚠️ COVERAGE WARNING: AI-assessed coverage is {X}%. {N} code paths untested. +Consider writing tests before running /ship. +``` + +This is INFORMATIONAL — does not block /review. But it makes low coverage visible early so the developer can address it before reaching the /ship coverage gate. + +If coverage percentage cannot be determined, skip the warning silently. + This step subsumes the "Test Gaps" category from Pass 2 — do not duplicate findings between the checklist Test Gaps item and this coverage diagram. Include any coverage gaps alongside the findings from Step 4 and Step 4.5. They follow the same Fix-First flow — gaps are INFORMATIONAL findings. --- @@ -916,7 +933,7 @@ Claude's structured review already ran. Now add a **cross-model adversarial chal ```bash TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) -codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_ADV" +codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_ADV" ``` Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. After the command completes, read stderr: diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts index 8d483dad..81cd7476 100644 --- a/scripts/gen-skill-docs.ts +++ b/scripts/gen-skill-docs.ts @@ -34,35 +34,7 @@ const HOST: Host = (() => { throw new Error(`Unknown host: ${val}. Use claude, codex, or agents.`); })(); -interface HostPaths { - skillRoot: string; - localSkillRoot: string; - binDir: string; - browseDir: string; -} - -const HOST_PATHS: Record = { - claude: { - skillRoot: '~/.claude/skills/gstack', - localSkillRoot: '.claude/skills/gstack', - binDir: '~/.claude/skills/gstack/bin', - browseDir: '~/.claude/skills/gstack/browse/dist', - }, - codex: { - skillRoot: '$GSTACK_ROOT', - localSkillRoot: '.agents/skills/gstack', - binDir: '$GSTACK_BIN', - browseDir: '$GSTACK_BROWSE', - }, -}; - -interface TemplateContext { - skillName: string; - tmplPath: string; - benefitsFrom?: string[]; - host: Host; - paths: HostPaths; -} +// HostPaths, HOST_PATHS, and TemplateContext imported from ./resolvers/types (line 7-8) // ─── Shared Design Constants ──────────────────────────────── @@ -620,22 +592,42 @@ If \`NEEDS_SETUP\`: } function generateBaseBranchDetect(_ctx: TemplateContext): string { - return `## Step 0: Detect base branch + return `## Step 0: Detect platform and base branch -Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps. +First, detect the git hosting platform from the remote URL: -1. Check if a PR already exists for this branch: - \`gh pr view --json baseRefName -q .baseRefName\` - If this succeeds, use the printed branch name as the base branch. +\`\`\`bash +git remote get-url origin 2>/dev/null +\`\`\` -2. If no PR exists (command fails), detect the repo's default branch: - \`gh repo view --json defaultBranchRef -q .defaultBranchRef.name\` +- If the URL contains "github.com" → platform is **GitHub** +- If the URL contains "gitlab" → platform is **GitLab** +- Otherwise, check CLI availability: + - \`gh auth status 2>/dev/null\` succeeds → platform is **GitHub** (covers GitHub Enterprise) + - \`glab auth status 2>/dev/null\` succeeds → platform is **GitLab** (covers self-hosted) + - Neither → **unknown** (use git-native commands only) -3. If both commands fail, fall back to \`main\`. +Determine which branch this PR/MR targets, or the repo's default branch if no +PR/MR exists. Use the result as "the base branch" in all subsequent steps. + +**If GitHub:** +1. \`gh pr view --json baseRefName -q .baseRefName\` — if succeeds, use it +2. \`gh repo view --json defaultBranchRef -q .defaultBranchRef.name\` — if succeeds, use it + +**If GitLab:** +1. \`glab mr view -F json 2>/dev/null\` and extract the \`target_branch\` field — if succeeds, use it +2. \`glab repo view -F json 2>/dev/null\` and extract the \`default_branch\` field — if succeeds, use it + +**Git-native fallback (if unknown platform, or CLI commands fail):** +1. \`git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'\` +2. If that fails: \`git rev-parse --verify origin/main 2>/dev/null\` → use \`main\` +3. If that fails: \`git rev-parse --verify origin/master 2>/dev/null\` → use \`master\` + +If all fail, fall back to \`main\`. Print the detected base branch name. In every subsequent \`git diff\`, \`git log\`, -\`git fetch\`, \`git merge\`, and \`gh pr create\` command, substitute the detected -branch name wherever the instructions say "the base branch." +\`git fetch\`, \`git merge\`, and PR/MR creation command, substitute the detected +branch name wherever the instructions say "the base branch" or \`\`. ---`; } @@ -2793,46 +2785,7 @@ ${slopItems} Source: [OpenAI "Designing Delightful Frontends with GPT-5.4"](https://developers.openai.com/blog/designing-delightful-frontends-with-gpt-5-4) (Mar 2026) + gstack design methodology.`; } -function generateSlugEval(ctx: TemplateContext): string { - return `eval "$(${ctx.paths.binDir}/gstack-slug 2>/dev/null)"`; -} - -function generateSlugSetup(ctx: TemplateContext): string { - return `eval "$(${ctx.paths.binDir}/gstack-slug 2>/dev/null)" && mkdir -p ~/.gstack/projects/$SLUG`; -} - -const RESOLVERS: Record string> = { - SLUG_EVAL: generateSlugEval, - SLUG_SETUP: generateSlugSetup, - COMMAND_REFERENCE: generateCommandReference, - SNAPSHOT_FLAGS: generateSnapshotFlags, - PREAMBLE: generatePreamble, - BROWSE_SETUP: generateBrowseSetup, - BASE_BRANCH_DETECT: generateBaseBranchDetect, - QA_METHODOLOGY: generateQAMethodology, - DESIGN_METHODOLOGY: generateDesignMethodology, - DESIGN_HARD_RULES: generateDesignHardRules, - DESIGN_OUTSIDE_VOICES: generateDesignOutsideVoices, - DESIGN_REVIEW_LITE: generateDesignReviewLite, - REVIEW_DASHBOARD: generateReviewDashboard, - PLAN_FILE_REVIEW_REPORT: generatePlanFileReviewReport, - TEST_BOOTSTRAP: generateTestBootstrap, - TEST_COVERAGE_AUDIT_PLAN: generateTestCoverageAuditPlan, - TEST_COVERAGE_AUDIT_SHIP: generateTestCoverageAuditShip, - TEST_COVERAGE_AUDIT_REVIEW: generateTestCoverageAuditReview, - TEST_FAILURE_TRIAGE: generateTestFailureTriage, - SPEC_REVIEW_LOOP: generateSpecReviewLoop, - DESIGN_SKETCH: generateDesignSketch, - BENEFITS_FROM: generateBenefitsFrom, - CODEX_SECOND_OPINION: generateCodexSecondOpinion, - CODEX_REVIEW_STEP: generateAdversarialStep, - ADVERSARIAL_STEP: generateAdversarialStep, - DEPLOY_BOOTSTRAP: generateDeployBootstrap, - CODEX_PLAN_REVIEW: generateCodexPlanReview, - PLAN_COMPLETION_AUDIT_SHIP: generatePlanCompletionAuditShip, - PLAN_COMPLETION_AUDIT_REVIEW: generatePlanCompletionAuditReview, - PLAN_VERIFICATION_EXEC: generatePlanVerificationExec, -}; +// RESOLVERS imported from ./resolvers/index (line 19) — do not redeclare here // ─── Codex Helpers ─────────────────────────────────────────── diff --git a/scripts/resolvers/preamble.ts b/scripts/resolvers/preamble.ts index 76573422..44126771 100644 --- a/scripts/resolvers/preamble.ts +++ b/scripts/resolvers/preamble.ts @@ -250,14 +250,22 @@ Use AskUserQuestion: git log --format="%an (%ae)" -1 -- \`\`\` If these are different people, prefer the production code author — they likely introduced the regression. -- Create a GitHub issue assigned to that person: - \`\`\`bash - gh issue create \\ - --title "Pre-existing test failure: " \\ - --body "Found failing on branch . Failure is pre-existing.\\n\\n**Error:**\\n\`\`\`\\n\\n\`\`\`\\n\\n**Last modified by:** \\n**Noticed by:** gstack /ship on " \\ - --assignee "" - \`\`\` -- If \`gh\` is not available or \`--assignee\` fails (user not in org, etc.), create the issue without assignee and note who should look at it in the body. +- Create an issue assigned to that person (use the platform detected in Step 0): + - **If GitHub:** + \`\`\`bash + gh issue create \\ + --title "Pre-existing test failure: " \\ + --body "Found failing on branch . Failure is pre-existing.\\n\\n**Error:**\\n\`\`\`\\n\\n\`\`\`\\n\\n**Last modified by:** \\n**Noticed by:** gstack /ship on " \\ + --assignee "" + \`\`\` + - **If GitLab:** + \`\`\`bash + glab issue create \\ + -t "Pre-existing test failure: " \\ + -d "Found failing on branch . Failure is pre-existing.\\n\\n**Error:**\\n\`\`\`\\n\\n\`\`\`\\n\\n**Last modified by:** \\n**Noticed by:** gstack /ship on " \\ + -a "" + \`\`\` +- If neither CLI is available or \`--assignee\`/\`-a\` fails (user not in org, etc.), create the issue without assignee and note who should look at it in the body. - Continue with the workflow. **If "Skip":** diff --git a/scripts/resolvers/utility.ts b/scripts/resolvers/utility.ts index 03e72e21..6cd912f2 100644 --- a/scripts/resolvers/utility.ts +++ b/scripts/resolvers/utility.ts @@ -9,22 +9,42 @@ export function generateSlugSetup(ctx: TemplateContext): string { } export function generateBaseBranchDetect(_ctx: TemplateContext): string { - return `## Step 0: Detect base branch + return `## Step 0: Detect platform and base branch -Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps. +First, detect the git hosting platform from the remote URL: -1. Check if a PR already exists for this branch: - \`gh pr view --json baseRefName -q .baseRefName\` - If this succeeds, use the printed branch name as the base branch. +\`\`\`bash +git remote get-url origin 2>/dev/null +\`\`\` -2. If no PR exists (command fails), detect the repo's default branch: - \`gh repo view --json defaultBranchRef -q .defaultBranchRef.name\` +- If the URL contains "github.com" → platform is **GitHub** +- If the URL contains "gitlab" → platform is **GitLab** +- Otherwise, check CLI availability: + - \`gh auth status 2>/dev/null\` succeeds → platform is **GitHub** (covers GitHub Enterprise) + - \`glab auth status 2>/dev/null\` succeeds → platform is **GitLab** (covers self-hosted) + - Neither → **unknown** (use git-native commands only) -3. If both commands fail, fall back to \`main\`. +Determine which branch this PR/MR targets, or the repo's default branch if no +PR/MR exists. Use the result as "the base branch" in all subsequent steps. + +**If GitHub:** +1. \`gh pr view --json baseRefName -q .baseRefName\` — if succeeds, use it +2. \`gh repo view --json defaultBranchRef -q .defaultBranchRef.name\` — if succeeds, use it + +**If GitLab:** +1. \`glab mr view -F json 2>/dev/null\` and extract the \`target_branch\` field — if succeeds, use it +2. \`glab repo view -F json 2>/dev/null\` and extract the \`default_branch\` field — if succeeds, use it + +**Git-native fallback (if unknown platform, or CLI commands fail):** +1. \`git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'\` +2. If that fails: \`git rev-parse --verify origin/main 2>/dev/null\` → use \`main\` +3. If that fails: \`git rev-parse --verify origin/master 2>/dev/null\` → use \`master\` + +If all fail, fall back to \`main\`. Print the detected base branch name. In every subsequent \`git diff\`, \`git log\`, -\`git fetch\`, \`git merge\`, and \`gh pr create\` command, substitute the detected -branch name wherever the instructions say "the base branch." +\`git fetch\`, \`git merge\`, and PR/MR creation command, substitute the detected +branch name wherever the instructions say "the base branch" or \`\`. ---`; } diff --git a/setup-browser-cookies/SKILL.md b/setup-browser-cookies/SKILL.md index 85c1ce20..85815c91 100644 --- a/setup-browser-cookies/SKILL.md +++ b/setup-browser-cookies/SKILL.md @@ -26,9 +26,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -46,8 +48,11 @@ echo '{"skill":"setup-browser-cookies","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"," for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -96,112 +101,44 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. -## AskUserQuestion Format +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: -**ALWAYS follow this structure for every AskUserQuestion call:** -1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences) -2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. -3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. -4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. -Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself -Per-skill instructions may add additional formatting rules on top of this baseline. +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` -## Completeness Principle — Boil the Lake - -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: - -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: - -| Task type | Human team | CC+gstack | Compression | -|-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +Always run: ```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true +touch ~/.gstack/.proactive-prompted ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/setup-deploy/SKILL.md b/setup-deploy/SKILL.md index 9eba4479..e5c94278 100644 --- a/setup-deploy/SKILL.md +++ b/setup-deploy/SKILL.md @@ -32,9 +32,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -52,8 +54,11 @@ echo '{"skill":"setup-deploy","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -102,6 +107,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -109,7 +135,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -117,97 +142,36 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/ship/SKILL.md b/ship/SKILL.md index af2ea565..8999bf84 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -30,9 +30,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -50,8 +52,11 @@ echo '{"skill":"ship","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basenam for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -100,6 +105,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -107,7 +133,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -115,97 +140,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +## Repo Ownership — See Something, Say Something -## Repo Ownership Mode — See Something, Say Something +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -296,22 +278,42 @@ Then write a `## GSTACK REVIEW REPORT` section to the end of the plan file: file you are allowed to edit in plan mode. The plan file review report is part of the plan's living status. -## Step 0: Detect base branch +## Step 0: Detect platform and base branch -Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps. +First, detect the git hosting platform from the remote URL: -1. Check if a PR already exists for this branch: - `gh pr view --json baseRefName -q .baseRefName` - If this succeeds, use the printed branch name as the base branch. +```bash +git remote get-url origin 2>/dev/null +``` -2. If no PR exists (command fails), detect the repo's default branch: - `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` +- If the URL contains "github.com" → platform is **GitHub** +- If the URL contains "gitlab" → platform is **GitLab** +- Otherwise, check CLI availability: + - `gh auth status 2>/dev/null` succeeds → platform is **GitHub** (covers GitHub Enterprise) + - `glab auth status 2>/dev/null` succeeds → platform is **GitLab** (covers self-hosted) + - Neither → **unknown** (use git-native commands only) -3. If both commands fail, fall back to `main`. +Determine which branch this PR/MR targets, or the repo's default branch if no +PR/MR exists. Use the result as "the base branch" in all subsequent steps. + +**If GitHub:** +1. `gh pr view --json baseRefName -q .baseRefName` — if succeeds, use it +2. `gh repo view --json defaultBranchRef -q .defaultBranchRef.name` — if succeeds, use it + +**If GitLab:** +1. `glab mr view -F json 2>/dev/null` and extract the `target_branch` field — if succeeds, use it +2. `glab repo view -F json 2>/dev/null` and extract the `default_branch` field — if succeeds, use it + +**Git-native fallback (if unknown platform, or CLI commands fail):** +1. `git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'` +2. If that fails: `git rev-parse --verify origin/main 2>/dev/null` → use `main` +3. If that fails: `git rev-parse --verify origin/master 2>/dev/null` → use `master` + +If all fail, fall back to `main`. Print the detected base branch name. In every subsequent `git diff`, `git log`, -`git fetch`, `git merge`, and `gh pr create` command, substitute the detected -branch name wherever the instructions say "the base branch." +`git fetch`, `git merge`, and PR/MR creation command, substitute the detected +branch name wherever the instructions say "the base branch" or ``. --- @@ -437,12 +439,13 @@ service with existing deployment — verify that a distribution pipeline exists. 2. If new artifact detected, check for a release workflow: ```bash ls .github/workflows/ 2>/dev/null | grep -iE 'release|publish|dist' + grep -qE 'release|publish|deploy' .gitlab-ci.yml 2>/dev/null && echo "GITLAB_CI_RELEASE" ``` 3. **If no release pipeline exists and a new artifact was added:** Use AskUserQuestion: - "This PR adds a new binary/tool but there's no CI/CD pipeline to build and publish it. Users won't be able to download the artifact after merge." - - A) Add a release workflow now (GitHub Actions cross-platform build + GitHub Releases) + - A) Add a release workflow now (CI/CD release pipeline — GitHub Actions or GitLab CI depending on platform) - B) Defer — add to TODOS.md - C) Not needed — this is internal/web-only, existing deployment covers it @@ -722,14 +725,22 @@ Use AskUserQuestion: git log --format="%an (%ae)" -1 -- ``` If these are different people, prefer the production code author — they likely introduced the regression. -- Create a GitHub issue assigned to that person: - ```bash - gh issue create \ - --title "Pre-existing test failure: " \ - --body "Found failing on branch . Failure is pre-existing.\n\n**Error:**\n```\n\n```\n\n**Last modified by:** \n**Noticed by:** gstack /ship on " \ - --assignee "" - ``` -- If `gh` is not available or `--assignee` fails (user not in org, etc.), create the issue without assignee and note who should look at it in the body. +- Create an issue assigned to that person (use the platform detected in Step 0): + - **If GitHub:** + ```bash + gh issue create \ + --title "Pre-existing test failure: " \ + --body "Found failing on branch . Failure is pre-existing.\n\n**Error:**\n```\n\n```\n\n**Last modified by:** \n**Noticed by:** gstack /ship on " \ + --assignee "" + ``` + - **If GitLab:** + ```bash + glab issue create \ + -t "Pre-existing test failure: " \ + -d "Found failing on branch . Failure is pre-existing.\n\n**Error:**\n```\n\n```\n\n**Last modified by:** \n**Noticed by:** gstack /ship on " \ + -a "" + ``` +- If neither CLI is available or `--assignee`/`-a` fails (user not in org, etc.), create the issue without assignee and note who should look at it in the body. - Continue with the workflow. **If "Skip":** @@ -999,6 +1010,39 @@ find . -name '*.test.*' -o -name '*.spec.*' -o -name '*_test.*' -o -name '*_spec For PR body: `Tests: {before} → {after} (+{delta} new)` Coverage line: `Test Coverage Audit: N new code paths. M covered (X%). K tests generated, J committed.` +**7. Coverage gate:** + +Before proceeding, check CLAUDE.md for a `## Test Coverage` section with `Minimum:` and `Target:` fields. If found, use those percentages. Otherwise use defaults: Minimum = 60%, Target = 80%. + +Using the coverage percentage from the diagram in substep 4 (the `COVERAGE: X/Y (Z%)` line): + +- **>= target:** Pass. "Coverage gate: PASS ({X}%)." Continue. +- **>= minimum, < target:** Use AskUserQuestion: + - "AI-assessed coverage is {X}%. {N} code paths are untested. Target is {target}%." + - RECOMMENDATION: Choose A because untested code paths are where production bugs hide. + - Options: + A) Generate more tests for remaining gaps (recommended) + B) Ship anyway — I accept the coverage risk + C) These paths don't need tests — mark as intentionally uncovered + - If A: Loop back to substep 5 (generate tests) targeting the remaining gaps. After second pass, if still below target, present AskUserQuestion again with updated numbers. Maximum 2 generation passes total. + - If B: Continue. Include in PR body: "Coverage gate: {X}% — user accepted risk." + - If C: Continue. Include in PR body: "Coverage gate: {X}% — {N} paths intentionally uncovered." + +- **< minimum:** Use AskUserQuestion: + - "AI-assessed coverage is critically low ({X}%). {N} of {M} code paths have no tests. Minimum threshold is {minimum}%." + - RECOMMENDATION: Choose A because less than {minimum}% means more code is untested than tested. + - Options: + A) Generate tests for remaining gaps (recommended) + B) Override — ship with low coverage (I understand the risk) + - If A: Loop back to substep 5. Maximum 2 passes. If still below minimum after 2 passes, present the override choice again. + - If B: Continue. Include in PR body: "Coverage gate: OVERRIDDEN at {X}%." + +**Coverage percentage undetermined:** If the coverage diagram doesn't produce a clear numeric percentage (ambiguous output, parse error), **skip the gate** with: "Coverage gate: could not determine percentage — skipping." Do not default to 0% or block. + +**Test-only diffs:** Skip the gate (same as the existing fast-path). + +**100% coverage:** "Coverage gate: PASS (100%)." Continue. + ### Test Plan Artifact After producing the coverage diagram, write a test plan artifact so `/qa` and `/qa-only` can consume it: @@ -1262,7 +1306,7 @@ If Codex is available, run a lightweight design check on the diff: ```bash TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX) -codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line." -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" +codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: @@ -1377,7 +1421,7 @@ Claude's structured review already ran. Now add a **cross-model adversarial chal ```bash TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) -codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_ADV" +codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_ADV" ``` Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. After the command completes, read stderr: @@ -1641,12 +1685,13 @@ git push -u origin --- -## Step 8: Create PR +## Step 8: Create PR/MR -Create a pull request using `gh`: +Create a pull request (GitHub) or merge request (GitLab) using the platform detected in Step 0. -```bash -gh pr create --base --title ": " --body "$(cat <<'EOF' +The PR/MR body should contain these sections: + +``` ## Summary @@ -1690,11 +1735,30 @@ gh pr create --base --title ": " --body "$(cat <<'EOF' - [x] All Vitest tests pass (N tests) 🤖 Generated with [Claude Code](https://claude.com/claude-code) +``` + +**If GitHub:** + +```bash +gh pr create --base --title ": " --body "$(cat <<'EOF' + EOF )" ``` -**Output the PR URL** — then proceed to Step 8.5. +**If GitLab:** + +```bash +glab mr create -b -t ": " -d "$(cat <<'EOF' + +EOF +)" +``` + +**If neither CLI is available:** +Print the branch name, remote URL, and instruct the user to create the PR/MR manually via the web UI. Do not stop — the code is pushed and ready. + +**Output the PR/MR URL** — then proceed to Step 8.5. --- diff --git a/ship/SKILL.md.tmpl b/ship/SKILL.md.tmpl index 7f82c64d..d630e330 100644 --- a/ship/SKILL.md.tmpl +++ b/ship/SKILL.md.tmpl @@ -100,12 +100,13 @@ service with existing deployment — verify that a distribution pipeline exists. 2. If new artifact detected, check for a release workflow: ```bash ls .github/workflows/ 2>/dev/null | grep -iE 'release|publish|dist' + grep -qE 'release|publish|deploy' .gitlab-ci.yml 2>/dev/null && echo "GITLAB_CI_RELEASE" ``` 3. **If no release pipeline exists and a new artifact was added:** Use AskUserQuestion: - "This PR adds a new binary/tool but there's no CI/CD pipeline to build and publish it. Users won't be able to download the artifact after merge." - - A) Add a release workflow now (GitHub Actions cross-platform build + GitHub Releases) + - A) Add a release workflow now (CI/CD release pipeline — GitHub Actions or GitLab CI depending on platform) - B) Defer — add to TODOS.md - C) Not needed — this is internal/web-only, existing deployment covers it @@ -485,12 +486,13 @@ git push -u origin --- -## Step 8: Create PR +## Step 8: Create PR/MR -Create a pull request using `gh`: +Create a pull request (GitHub) or merge request (GitLab) using the platform detected in Step 0. -```bash -gh pr create --base --title ": " --body "$(cat <<'EOF' +The PR/MR body should contain these sections: + +``` ## Summary @@ -534,11 +536,30 @@ gh pr create --base --title ": " --body "$(cat <<'EOF' - [x] All Vitest tests pass (N tests) 🤖 Generated with [Claude Code](https://claude.com/claude-code) +``` + +**If GitHub:** + +```bash +gh pr create --base --title ": " --body "$(cat <<'EOF' + EOF )" ``` -**Output the PR URL** — then proceed to Step 8.5. +**If GitLab:** + +```bash +glab mr create -b -t ": " -d "$(cat <<'EOF' + +EOF +)" +``` + +**If neither CLI is available:** +Print the branch name, remote URL, and instruct the user to create the PR/MR manually via the web UI. Do not stop — the code is pushed and ready. + +**Output the PR/MR URL** — then proceed to Step 8.5. --- diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index c26bb64b..cab12413 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -351,6 +351,39 @@ describe('BASE_BRANCH_DETECT resolver', () => { test('resolver output uses "the base branch" phrasing', () => { expect(shipContent).toContain('the base branch'); }); + + test('resolver output contains GitLab CLI commands', () => { + expect(shipContent).toContain('glab'); + }); + + test('resolver output contains git-native fallback', () => { + expect(shipContent).toContain('git symbolic-ref'); + }); + + test('resolver output mentions GitLab platform', () => { + expect(shipContent).toMatch(/gitlab/i); + }); +}); + +describe('GitLab support in generated skills', () => { + const retroContent = fs.readFileSync(path.join(ROOT, 'retro', 'SKILL.md'), 'utf-8'); + const shipSkillContent = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); + + test('retro contains GitLab MR number extraction', () => { + expect(retroContent).toContain('[#!]'); + }); + + test('retro uses BASE_BRANCH_DETECT (contains glab)', () => { + expect(retroContent).toContain('glab'); + }); + + test('ship contains glab mr create', () => { + expect(shipSkillContent).toContain('glab mr create'); + }); + + test('ship checks .gitlab-ci.yml', () => { + expect(shipSkillContent).toContain('.gitlab-ci.yml'); + }); }); /** diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts index d61ae164..585e9dd3 100644 --- a/test/helpers/touchfiles.ts +++ b/test/helpers/touchfiles.ts @@ -79,6 +79,8 @@ export const E2E_TOUCHFILES: Record = { // Ship 'ship-base-branch': ['ship/**', 'bin/gstack-repo-mode'], 'ship-local-workflow': ['ship/**', 'scripts/gen-skill-docs.ts'], + 'ship-plan-completion': ['ship/**', 'scripts/gen-skill-docs.ts'], + 'ship-plan-verification': ['ship/**', 'scripts/gen-skill-docs.ts'], // Retro 'retro': ['retro/**'], @@ -184,6 +186,7 @@ export const E2E_TIERS: Record = { 'review-base-branch': 'gate', 'review-design-lite': 'periodic', // 4/7 threshold is subjective 'review-coverage-audit': 'gate', + 'review-plan-completion': 'gate', // Office Hours 'office-hours-spec-review': 'gate', @@ -208,6 +211,8 @@ export const E2E_TIERS: Record = { 'ship-local-workflow': 'gate', 'ship-coverage-audit': 'gate', 'ship-triage': 'gate', + 'ship-plan-completion': 'gate', + 'ship-plan-verification': 'gate', // Retro — gate for cheap branch detection, periodic for full Opus retro 'retro': 'periodic',