diff --git a/.agents/skills/gstack-browse/SKILL.md b/.agents/skills/gstack-browse/SKILL.md index 45a59485..52ebaba8 100644 --- a/.agents/skills/gstack-browse/SKILL.md +++ b/.agents/skills/gstack-browse/SKILL.md @@ -33,6 +33,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -58,28 +64,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -88,6 +97,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -125,26 +161,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -214,10 +230,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -227,12 +248,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # browse: QA Testing & Dogfooding @@ -378,7 +403,7 @@ The snapshot is your primary tool for understanding and interacting with pages. -s --selector Scope to CSS selector -D --diff Unified diff against previous snapshot (first call stores baseline) -a --annotate Annotated screenshot with red overlay boxes and ref labels --o --output Output path for annotated screenshot (default: /browse-annotated.png) +-o --output Output path for annotated screenshot (default: /tmp/browse-annotated.png) -C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick) ``` diff --git a/.agents/skills/gstack-design-consultation/SKILL.md b/.agents/skills/gstack-design-consultation/SKILL.md index 29e1a222..02f9081f 100644 --- a/.agents/skills/gstack-design-consultation/SKILL.md +++ b/.agents/skills/gstack-design-consultation/SKILL.md @@ -34,6 +34,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"design-consultation","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -59,28 +65,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -89,6 +98,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -126,26 +162,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -215,10 +231,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -228,12 +249,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # /design-consultation: Your Design System, Built Together @@ -343,12 +368,7 @@ If browse is not available, rely on WebSearch results and your built-in design k **Step 3: Synthesize findings** -**Three-layer synthesis:** -- **Layer 1 (tried and true):** What design patterns does every product in this category share? These are table stakes — users expect them. -- **Layer 2 (new and popular):** What are the search results and current design discourse saying? What's trending? What new patterns are emerging? -- **Layer 3 (first principles):** Given what we know about THIS product's users and positioning — is there a reason the conventional design approach is wrong? Where should we deliberately break from the category norms? - -**Eureka check:** If Layer 3 reasoning reveals a genuine design insight — a reason the category's visual language fails THIS product — name it: "EUREKA: Every [category] product does X because they assume [assumption]. But this product's users [evidence] — so we should do Y instead." Log the eureka moment (see preamble). +The goal of research is NOT to copy. It is to get in the ballpark — to understand the visual language users in this category already expect. This gives you the baseline. The interesting design work starts after you have the baseline: deciding where to follow conventions (so the product feels literate) and where to break from them (so the product is memorable). Summarize conversationally: > "I looked at what's out there. Here's the landscape: they converge on [patterns]. Most of them feel [observation — e.g., interchangeable, polished but generic, etc.]. The opportunity to stand out is [gap]. Here's where I'd play it safe and where I'd take a risk..." diff --git a/.agents/skills/gstack-design-review/SKILL.md b/.agents/skills/gstack-design-review/SKILL.md index 700bd33e..57cf6d37 100644 --- a/.agents/skills/gstack-design-review/SKILL.md +++ b/.agents/skills/gstack-design-review/SKILL.md @@ -34,6 +34,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"design-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -59,28 +65,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -89,6 +98,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -126,26 +162,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -215,10 +231,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -228,12 +249,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # /design-review: Design Audit → Fix → Verify diff --git a/.agents/skills/gstack-document-release/SKILL.md b/.agents/skills/gstack-document-release/SKILL.md index ccf34824..122baf07 100644 --- a/.agents/skills/gstack-document-release/SKILL.md +++ b/.agents/skills/gstack-document-release/SKILL.md @@ -32,6 +32,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"document-release","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -57,28 +63,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -87,6 +96,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -124,26 +160,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -213,10 +229,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -226,12 +247,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Step 0: Detect base branch diff --git a/.agents/skills/gstack-investigate/SKILL.md b/.agents/skills/gstack-investigate/SKILL.md index 0f53afef..5d24c4af 100644 --- a/.agents/skills/gstack-investigate/SKILL.md +++ b/.agents/skills/gstack-investigate/SKILL.md @@ -35,6 +35,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"investigate","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -60,28 +66,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -90,6 +99,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -127,26 +163,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -216,10 +232,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -229,12 +250,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # Systematic Debugging @@ -309,12 +334,6 @@ Also check: - `TODOS.md` for related known issues - `git log` for prior fixes in the same area — **recurring bugs in the same files are an architectural smell**, not a coincidence -**External pattern search:** If the bug doesn't match a known pattern above, WebSearch for: -- "{framework} {generic error type}" — **sanitize first:** strip hostnames, IPs, file paths, SQL, customer data. Search the error category, not the raw message. -- "{library} {component} known issues" - -If WebSearch is unavailable, skip this search and proceed with hypothesis testing. If a documented solution or known dependency bug surfaces, present it as a candidate hypothesis in Phase 3. - --- ## Phase 3: Hypothesis Testing @@ -323,7 +342,7 @@ Before writing ANY fix, verify your hypothesis. 1. **Confirm the hypothesis:** Add a temporary log statement, assertion, or debug output at the suspected root cause. Run the reproduction. Does the evidence match? -2. **If the hypothesis is wrong:** Before forming the next hypothesis, consider searching for the error. **Sanitize first** — strip hostnames, IPs, file paths, SQL fragments, customer identifiers, and any internal/proprietary data from the error message. Search only the generic error type and framework context: "{component} {sanitized error type} {framework version}". If the error message is too specific to sanitize safely, skip the search. If WebSearch is unavailable, skip and proceed. Then return to Phase 1. Gather more evidence. Do not guess. +2. **If the hypothesis is wrong:** Return to Phase 1. Gather more evidence. Do not guess. 3. **3-strike rule:** If 3 hypotheses fail, **STOP**. Use AskUserQuestion: ``` diff --git a/.agents/skills/gstack-office-hours/SKILL.md b/.agents/skills/gstack-office-hours/SKILL.md index 955f4400..f7a9ca79 100644 --- a/.agents/skills/gstack-office-hours/SKILL.md +++ b/.agents/skills/gstack-office-hours/SKILL.md @@ -36,6 +36,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"office-hours","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -61,28 +67,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -91,6 +100,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -128,26 +164,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -217,10 +233,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -230,33 +251,18 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. -## SETUP (run this check BEFORE any browse command) - -```bash -_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) -B="" -[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse" -[ -z "$B" ] && B=~/.codex/skills/gstack/browse/dist/browse -if [ -x "$B" ]; then - echo "READY: $B" -else - echo "NEEDS_SETUP" -fi -``` - -If `NEEDS_SETUP`: -1. Tell the user: "gstack browse needs a one-time build (~10 seconds). OK to proceed?" Then STOP and wait. -2. Run: `cd && ./setup` -3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash` - # YC Office Hours You are a **YC office hours partner**. Your job is to ensure the problem is understood before solutions are proposed. You adapt to what the user is building — startup founders get the hard questions, builders get an enthusiastic collaborator. This skill produces design docs, not code. @@ -330,54 +336,12 @@ These are non-negotiable. They shape every response in this mode. ### Response Posture -- **Be direct to the point of discomfort.** Comfort means you haven't pushed hard enough. Your job is diagnosis, not encouragement. Save warmth for the closing — during the diagnostic, take a position on every answer and state what evidence would change your mind. +- **Be direct, not cruel.** The goal is clarity, not demolition. But don't soften a hard truth into uselessness. "That's a red flag" is more useful than "that's something to think about." - **Push once, then push again.** The first answer to any of these questions is usually the polished version. The real answer comes after the second or third push. "You said 'enterprises in healthcare.' Can you name one specific person at one specific company?" -- **Calibrated acknowledgment, not praise.** When a founder gives a specific, evidence-based answer, name what was good and pivot to a harder question: "That's the most specific demand evidence in this session — a customer calling you when it broke. Let's see if your wedge is equally sharp." Don't linger. The best reward for a good answer is a harder follow-up. +- **Praise specificity when it shows up.** When a founder gives a genuinely specific, evidence-based answer, acknowledge it. That's hard to do and it matters. - **Name common failure patterns.** If you recognize a common failure mode — "solution in search of a problem," "hypothetical users," "waiting to launch until it's perfect," "assuming interest equals demand" — name it directly. - **End with the assignment.** Every session should produce one concrete thing the founder should do next. Not a strategy — an action. -### Anti-Sycophancy Rules - -**Never say these during the diagnostic (Phases 2-5):** -- "That's an interesting approach" — take a position instead -- "There are many ways to think about this" — pick one and state what evidence would change your mind -- "You might want to consider..." — say "This is wrong because..." or "This works because..." -- "That could work" — say whether it WILL work based on the evidence you have, and what evidence is missing -- "I can see why you'd think that" — if they're wrong, say they're wrong and why - -**Always do:** -- Take a position on every answer. State your position AND what evidence would change it. This is rigor — not hedging, not fake certainty. -- Challenge the strongest version of the founder's claim, not a strawman. - -### Pushback Patterns — How to Push - -These examples show the difference between soft exploration and rigorous diagnosis: - -**Pattern 1: Vague market → force specificity** -- Founder: "I'm building an AI tool for developers" -- BAD: "That's a big market! Let's explore what kind of tool." -- GOOD: "There are 10,000 AI developer tools right now. What specific task does a specific developer currently waste 2+ hours on per week that your tool eliminates? Name the person." - -**Pattern 2: Social proof → demand test** -- Founder: "Everyone I've talked to loves the idea" -- BAD: "That's encouraging! Who specifically have you talked to?" -- GOOD: "Loving an idea is free. Has anyone offered to pay? Has anyone asked when it ships? Has anyone gotten angry when your prototype broke? Love is not demand." - -**Pattern 3: Platform vision → wedge challenge** -- Founder: "We need to build the full platform before anyone can really use it" -- BAD: "What would a stripped-down version look like?" -- GOOD: "That's a red flag. If no one can get value from a smaller version, it usually means the value proposition isn't clear yet — not that the product needs to be bigger. What's the one thing a user would pay for this week?" - -**Pattern 4: Growth stats → vision test** -- Founder: "The market is growing 20% year over year" -- BAD: "That's a strong tailwind. How do you plan to capture that growth?" -- GOOD: "Growth rate is not a vision. Every competitor in your space can cite the same stat. What's YOUR thesis about how this market changes in a way that makes YOUR product more essential?" - -**Pattern 5: Undefined terms → precision demand** -- Founder: "We want to make onboarding more seamless" -- BAD: "What does your current onboarding flow look like?" -- GOOD: "'Seamless' is not a product feature — it's a feeling. What specific step in onboarding causes users to drop off? What's the drop-off rate? Have you watched someone go through it?" - ### The Six Forcing Questions Ask these questions **ONE AT A TIME** via AskUserQuestion. Push on each one until the answer is specific, evidence-based, and uncomfortable. Comfort means the founder hasn't gone deep enough. @@ -398,13 +362,6 @@ Ask these questions **ONE AT A TIME** via AskUserQuestion. Push on each one unti **Red flags:** "People say it's interesting." "We got 500 waitlist signups." "VCs are excited about the space." None of these are demand. -**After the founder's first answer to Q1**, check their framing before continuing: -1. **Language precision:** Are the key terms in their answer defined? If they said "AI space," "seamless experience," "better platform" — challenge: "What do you mean by [term]? Can you define it so I could measure it?" -2. **Hidden assumptions:** What does their framing take for granted? "I need to raise money" assumes capital is required. "The market needs this" assumes verified pull. Name one assumption and ask if it's verified. -3. **Real vs. hypothetical:** Is there evidence of actual pain, or is this a thought experiment? "I think developers would want..." is hypothetical. "Three developers at my last company spent 10 hours a week on this" is real. - -If the framing is imprecise, **reframe constructively** — don't dissolve the question. Say: "Let me try restating what I think you're actually building: [reframe]. Does that capture it better?" Then proceed with the corrected framing. This takes 60 seconds, not 10 minutes. - #### Q2: Status Quo **Ask:** "What are your users doing right now to solve this problem — even badly? What does that workaround cost them?" @@ -455,12 +412,7 @@ If the framing is imprecise, **reframe constructively** — don't dissolve the q **STOP** after each question. Wait for the response before asking the next. -**Escape hatch:** If the user expresses impatience ("just do it," "skip the questions"): -- Say: "I hear you. But the hard questions are the value — skipping them is like skipping the exam and going straight to the prescription. Let me ask two more, then we'll move." -- Consult the smart routing table for the founder's product stage. Ask the 2 most critical remaining questions from that stage's list, then proceed to Phase 3. -- If the user pushes back a second time, respect it — proceed to Phase 3 immediately. Don't ask a third time. -- If only 1 question remains, ask it. If 0 remain, proceed directly. -- Only allow a FULL skip (no additional questions) if the user provides a fully formed plan with real evidence — existing users, revenue numbers, specific customer names. Even then, still run Phase 3 (Premise Challenge) and Phase 4 (Alternatives). +**Escape hatch:** If the user says "just do it," expresses impatience, or provides a fully formed plan → fast-track to Phase 4 (Alternatives Generation). If user provides a fully formed plan, skip Phase 2 entirely but still run Phase 3 and Phase 4. --- @@ -521,43 +473,6 @@ If no matches found, proceed silently. --- -## Phase 2.75: Landscape Awareness - -Read ETHOS.md for the full Search Before Building framework (three layers, eureka moments). The preamble's Search Before Building section has the ETHOS.md path. - -After understanding the problem through questioning, search for what the world thinks. This is NOT competitive research (that's /design-consultation's job). This is understanding conventional wisdom so you can evaluate where it's wrong. - -**Privacy gate:** Before searching, use AskUserQuestion: "I'd like to search for what the world thinks about this space to inform our discussion. This sends generalized category terms (not your specific idea) to a search provider. OK to proceed?" -Options: A) Yes, search away B) Skip — keep this session private -If B: skip this phase entirely and proceed to Phase 3. Use only in-distribution knowledge. - -When searching, use **generalized category terms** — never the user's specific product name, proprietary concept, or stealth idea. For example, search "task management app landscape" not "SuperTodo AI-powered task killer." - -If WebSearch is unavailable, skip this phase and note: "Search unavailable — proceeding with in-distribution knowledge only." - -**Startup mode:** WebSearch for: -- "[problem space] startup approach {current year}" -- "[problem space] common mistakes" -- "why [incumbent solution] fails" OR "why [incumbent solution] works" - -**Builder mode:** WebSearch for: -- "[thing being built] existing solutions" -- "[thing being built] open source alternatives" -- "best [thing category] {current year}" - -Read the top 2-3 results. Run the three-layer synthesis: -- **[Layer 1]** What does everyone already know about this space? -- **[Layer 2]** What are the search results and current discourse saying? -- **[Layer 3]** Given what WE learned in Phase 2A/2B — is there a reason the conventional approach is wrong? - -**Eureka check:** If Layer 3 reasoning reveals a genuine insight, name it: "EUREKA: Everyone does X because they assume [assumption]. But [evidence from our conversation] suggests that's wrong here. This means [implication]." Log the eureka moment (see preamble). - -If no eureka moment exists, say: "The conventional wisdom seems sound here. Let's build on it." Proceed to Phase 3. - -**Important:** This search feeds Phase 3 (Premise Challenge). If you found reasons the conventional approach fails, those become premises to challenge. If conventional wisdom is solid, that raises the bar for any premise that contradicts it. - ---- - ## Phase 3: Premise Challenge Before proposing solutions, challenge the premises: @@ -612,66 +527,6 @@ Present via AskUserQuestion. Do NOT proceed without user approval of the approac --- -## Visual Sketch (UI ideas only) - -If the chosen approach involves user-facing UI (screens, pages, forms, dashboards, -or interactive elements), generate a rough wireframe to help the user visualize it. -If the idea is backend-only, infrastructure, or has no UI component — skip this -section silently. - -**Step 1: Gather design context** - -1. Check if `DESIGN.md` exists in the repo root. If it does, read it for design - system constraints (colors, typography, spacing, component patterns). Use these - constraints in the wireframe. -2. Apply core design principles: - - **Information hierarchy** — what does the user see first, second, third? - - **Interaction states** — loading, empty, error, success, partial - - **Edge case paranoia** — what if the name is 47 chars? Zero results? Network fails? - - **Subtraction default** — "as little design as possible" (Rams). Every element earns its pixels. - - **Design for trust** — every interface element builds or erodes user trust. - -**Step 2: Generate wireframe HTML** - -Generate a single-page HTML file with these constraints: -- **Intentionally rough aesthetic** — use system fonts, thin gray borders, no color, - hand-drawn-style elements. This is a sketch, not a polished mockup. -- Self-contained — no external dependencies, no CDN links, inline CSS only -- Show the core interaction flow (1-3 screens/states max) -- Include realistic placeholder content (not "Lorem ipsum" — use content that - matches the actual use case) -- Add HTML comments explaining design decisions - -Write to a temp file: -```bash -SKETCH_FILE="/tmp/gstack-sketch-$(date +%s).html" -``` - -**Step 3: Render and capture** - -```bash -$B goto "file://$SKETCH_FILE" -$B screenshot /tmp/gstack-sketch.png -``` - -If `$B` is not available (browse binary not set up), skip the render step. Tell the -user: "Visual sketch requires the browse binary. Run the setup script to enable it." - -**Step 4: Present and iterate** - -Show the screenshot to the user. Ask: "Does this feel right? Want to iterate on the layout?" - -If they want changes, regenerate the HTML with their feedback and re-render. -If they approve or say "good enough," proceed. - -**Step 5: Include in design doc** - -Reference the wireframe screenshot in the design doc's "Recommended Approach" section. -The screenshot file at `/tmp/gstack-sketch.png` can be referenced by downstream skills -(`/plan-design-review`, `/design-review`) to see what was originally envisioned. - ---- - ## Phase 4.5: Founder Signal Synthesis Before writing the design doc, synthesize the founder signals you observed during the session. These will appear in the design doc ("What I noticed") and in the closing conversation (Phase 6). @@ -808,73 +663,7 @@ Supersedes: {prior filename — omit this line if first design on this branch} {observational, mentor-like reflections referencing specific things the user said during the session. Quote their words back to them — don't characterize their behavior. 2-4 bullets.} ``` ---- - -## Spec Review Loop - -Before presenting the document to the user for approval, run an adversarial review. - -**Step 1: Dispatch reviewer subagent** - -Use the Agent tool to dispatch an independent reviewer. The reviewer has fresh context -and cannot see the brainstorming conversation — only the document. This ensures genuine -adversarial independence. - -Prompt the subagent with: -- The file path of the document just written -- "Read this document and review it on 5 dimensions. For each dimension, note PASS or - list specific issues with suggested fixes. At the end, output a quality score (1-10) - across all dimensions." - -**Dimensions:** -1. **Completeness** — Are all requirements addressed? Missing edge cases? -2. **Consistency** — Do parts of the document agree with each other? Contradictions? -3. **Clarity** — Could an engineer implement this without asking questions? Ambiguous language? -4. **Scope** — Does the document creep beyond the original problem? YAGNI violations? -5. **Feasibility** — Can this actually be built with the stated approach? Hidden complexity? - -The subagent should return: -- A quality score (1-10) -- PASS if no issues, or a numbered list of issues with dimension, description, and fix - -**Step 2: Fix and re-dispatch** - -If the reviewer returns issues: -1. Fix each issue in the document on disk (use Edit tool) -2. Re-dispatch the reviewer subagent with the updated document -3. Maximum 3 iterations total - -**Convergence guard:** If the reviewer returns the same issues on consecutive iterations -(the fix didn't resolve them or the reviewer disagrees with the fix), stop the loop -and persist those issues as "Reviewer Concerns" in the document rather than looping -further. - -If the subagent fails, times out, or is unavailable — skip the review loop entirely. -Tell the user: "Spec review unavailable — presenting unreviewed doc." The document is -already written to disk; the review is a quality bonus, not a gate. - -**Step 3: Report and persist metrics** - -After the loop completes (PASS, max iterations, or convergence guard): - -1. Tell the user the result — summary by default: - "Your doc survived N rounds of adversarial review. M issues caught and fixed. - Quality score: X/10." - If they ask "what did the reviewer find?", show the full reviewer output. - -2. If issues remain after max iterations or convergence, add a "## Reviewer Concerns" - section to the document listing each unresolved issue. Downstream skills will see this. - -3. Append metrics: -```bash -mkdir -p ~/.gstack/analytics -echo '{"skill":"office-hours","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","iterations":ITERATIONS,"issues_found":FOUND,"issues_fixed":FIXED,"remaining":REMAINING,"quality_score":SCORE}' >> ~/.gstack/analytics/spec-review.jsonl 2>/dev/null || true -``` -Replace ITERATIONS, FOUND, FIXED, REMAINING, SCORE with actual values from the review. - ---- - -Present the reviewed design doc to the user via AskUserQuestion: +Present the design doc to the user via AskUserQuestion: - A) Approve — mark Status: APPROVED and proceed to handoff - B) Revise — specify which sections need changes (loop back to revise those sections) - C) Start over — return to Phase 2 diff --git a/.agents/skills/gstack-plan-ceo-review/SKILL.md b/.agents/skills/gstack-plan-ceo-review/SKILL.md index f253d18d..5fcb37e8 100644 --- a/.agents/skills/gstack-plan-ceo-review/SKILL.md +++ b/.agents/skills/gstack-plan-ceo-review/SKILL.md @@ -35,6 +35,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"plan-ceo-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -60,28 +66,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -90,6 +99,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -127,26 +163,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -216,10 +232,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -229,12 +250,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Step 0: Detect base branch @@ -344,94 +369,6 @@ DESIGN=$(ls -t ~/.gstack/projects/$SLUG/*-$BRANCH-design-*.md 2>/dev/null | head ``` If a design doc exists (from `/office-hours`), read it. Use it as the source of truth for the problem statement, constraints, and chosen approach. If it has a `Supersedes:` field, note that this is a revised design. -**Handoff note check** (reuses $SLUG and $BRANCH from the design doc check above): -```bash -HANDOFF=$(ls -t ~/.gstack/projects/$SLUG/*-$BRANCH-ceo-handoff-*.md 2>/dev/null | head -1) -[ -n "$HANDOFF" ] && echo "HANDOFF_FOUND: $HANDOFF" || echo "NO_HANDOFF" -``` -If this block runs in a separate shell from the design doc check, recompute $SLUG and $BRANCH first using the same commands from that block. -If a handoff note is found: read it. This contains system audit findings and discussion -from a prior CEO review session that paused so the user could run `/office-hours`. Use it -as additional context alongside the design doc. The handoff note helps you avoid re-asking -questions the user already answered. Do NOT skip any steps — run the full review, but use -the handoff note to inform your analysis and avoid redundant questions. - -Tell the user: "Found a handoff note from your prior CEO review session. I'll use that -context to pick up where we left off." - -## Prerequisite Skill Offer - -When the design doc check above prints "No design doc found," offer the prerequisite -skill before proceeding. - -Say to the user via AskUserQuestion: - -> "No design doc found for this branch. `/office-hours` produces a structured problem -> statement, premise challenge, and explored alternatives — it gives this review much -> sharper input to work with. Takes about 10 minutes. The design doc is per-feature, -> not per-product — it captures the thinking behind this specific change." - -Options: -- A) Run /office-hours first (in another window, then come back) -- B) Skip — proceed with standard review - -If they skip: "No worries — standard review. If you ever want sharper input, try -/office-hours first next time." Then proceed normally. Do not re-offer later in the session. - -**Handoff note save (BENEFITS_FROM):** If the user chose A (run /office-hours first), -save a handoff context note before they leave. Reuse $SLUG and $BRANCH from the -design doc check block above (they use the same `remote-slug || basename` fallback -that handles repos without an origin remote). Then run: -```bash -mkdir -p ~/.gstack/projects/$SLUG -USER=$(whoami) -DATETIME=$(date +%Y%m%d-%H%M%S) -``` -Write to `~/.gstack/projects/$SLUG/$USER-$BRANCH-ceo-handoff-$DATETIME.md`: -```markdown -# CEO Review Handoff Note - -Generated by /plan-ceo-review on {date} -Branch: {branch} -Repo: {owner/repo} - -## Why I paused -User chose to run /office-hours first (no design doc found). - -## System Audit Summary -{Summarize what the system audit found — recent git history, diff scope, -CLAUDE.md key points, TODOS.md relevant items, known pain points} - -## Discussion So Far -{Empty — handoff happened before Step 0. Frontend/UI scope detection has not -run yet — it will be assessed when the review resumes.} -``` - -Tell the user: "Context saved. Run /office-hours in another window. When you come back -and invoke /plan-ceo-review, I'll pick up the context automatically — including the -design doc /office-hours produces." - -**Mid-session detection:** During Step 0A (Premise Challenge), if the user can't -articulate the problem, keeps changing the problem statement, answers with "I'm not -sure," or is clearly exploring rather than reviewing — offer `/office-hours`: - -> "It sounds like you're still figuring out what to build — that's totally fine, but -> that's what /office-hours is designed for. Want to pause this review and run -> /office-hours first? It'll help you nail down the problem and approach, then come -> back here for the strategic review." - -Options: A) Yes, run /office-hours first. B) No, keep going. -If they keep going, proceed normally — no guilt, no re-asking. - -**Handoff note save (mid-session):** If the user chose A (run /office-hours first from -mid-session detection), save a handoff context note with the same format above, but -include any Step 0A progress in the "Discussion So Far" section — premises discussed, -problem framing attempts, user answers so far. Use the same bash block to generate the -file path. - -Tell the user: "Context saved with your discussion so far. Run /office-hours, then -come back to /plan-ceo-review." - When reading TODOS.md, specifically: * Note any TODOs this plan touches, blocks, or unlocks * Check if deferred work from prior reviews relates to this plan @@ -454,22 +391,6 @@ Analyze the plan. If it involves ANY of: new UI screens/pages, changes to existi Identify 2-3 files or patterns in the existing codebase that are particularly well-designed. Note them as style references for the review. Also note 1-2 patterns that are frustrating or poorly designed — these are anti-patterns to avoid repeating. Report findings before proceeding to Step 0. -### Landscape Check - -Read ETHOS.md for the Search Before Building framework (the preamble's Search Before Building section has the path). Before challenging scope, understand the landscape. WebSearch for: -- "[product category] landscape {current year}" -- "[key feature] alternatives" -- "why [incumbent/conventional approach] [succeeds/fails]" - -If WebSearch is unavailable, skip this check and note: "Search unavailable — proceeding with in-distribution knowledge only." - -Run the three-layer synthesis: -- **[Layer 1]** What's the tried-and-true approach in this space? -- **[Layer 2]** What are the search results saying? -- **[Layer 3]** First-principles reasoning — where might the conventional wisdom be wrong? - -Feed into the Premise Challenge (0A) and Dream State Mapping (0C). If you find a eureka moment, surface it during the Expansion opt-in ceremony as a differentiation opportunity. Log it (see preamble). - ## Step 0: Nuclear Scope Challenge + Mode Selection ### 0A. Premise Challenge @@ -591,70 +512,6 @@ Repo: {owner/repo} Derive the feature slug from the plan being reviewed (e.g., "user-dashboard", "auth-refactor"). Use the date in YYYY-MM-DD format. -After writing the CEO plan, run the spec review loop on it: - -## Spec Review Loop - -Before presenting the document to the user for approval, run an adversarial review. - -**Step 1: Dispatch reviewer subagent** - -Use the Agent tool to dispatch an independent reviewer. The reviewer has fresh context -and cannot see the brainstorming conversation — only the document. This ensures genuine -adversarial independence. - -Prompt the subagent with: -- The file path of the document just written -- "Read this document and review it on 5 dimensions. For each dimension, note PASS or - list specific issues with suggested fixes. At the end, output a quality score (1-10) - across all dimensions." - -**Dimensions:** -1. **Completeness** — Are all requirements addressed? Missing edge cases? -2. **Consistency** — Do parts of the document agree with each other? Contradictions? -3. **Clarity** — Could an engineer implement this without asking questions? Ambiguous language? -4. **Scope** — Does the document creep beyond the original problem? YAGNI violations? -5. **Feasibility** — Can this actually be built with the stated approach? Hidden complexity? - -The subagent should return: -- A quality score (1-10) -- PASS if no issues, or a numbered list of issues with dimension, description, and fix - -**Step 2: Fix and re-dispatch** - -If the reviewer returns issues: -1. Fix each issue in the document on disk (use Edit tool) -2. Re-dispatch the reviewer subagent with the updated document -3. Maximum 3 iterations total - -**Convergence guard:** If the reviewer returns the same issues on consecutive iterations -(the fix didn't resolve them or the reviewer disagrees with the fix), stop the loop -and persist those issues as "Reviewer Concerns" in the document rather than looping -further. - -If the subagent fails, times out, or is unavailable — skip the review loop entirely. -Tell the user: "Spec review unavailable — presenting unreviewed doc." The document is -already written to disk; the review is a quality bonus, not a gate. - -**Step 3: Report and persist metrics** - -After the loop completes (PASS, max iterations, or convergence guard): - -1. Tell the user the result — summary by default: - "Your doc survived N rounds of adversarial review. M issues caught and fixed. - Quality score: X/10." - If they ask "what did the reviewer find?", show the full reviewer output. - -2. If issues remain after max iterations or convergence, add a "## Reviewer Concerns" - section to the document listing each unresolved issue. Downstream skills will see this. - -3. Append metrics: -```bash -mkdir -p ~/.gstack/analytics -echo '{"skill":"plan-ceo-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","iterations":ITERATIONS,"issues_found":FOUND,"issues_fixed":FIXED,"remaining":REMAINING,"quality_score":SCORE}' >> ~/.gstack/analytics/spec-review.jsonl 2>/dev/null || true -``` -Replace ITERATIONS, FOUND, FIXED, REMAINING, SCORE with actual values from the review. - ### 0E. Temporal Interrogation (EXPANSION, SELECTIVE EXPANSION, and HOLD modes) Think ahead to implementation: What decisions will need to be made during implementation that should be resolved NOW in the plan? ``` @@ -1035,28 +892,12 @@ List every ASCII diagram in files this plan touches. Still accurate? ### Unresolved Decisions If any AskUserQuestion goes unanswered, note it here. Never silently default. -## Handoff Note Cleanup - -After producing the Completion Summary, clean up any handoff notes for this branch — -the review is complete and the context is no longer needed. - -```bash -source <(~/.codex/skills/gstack/bin/gstack-slug 2>/dev/null) -rm -f ~/.gstack/projects/$SLUG/*-$BRANCH-ceo-handoff-*.md 2>/dev/null || true -``` - ## Review Log -After producing the Completion Summary above, persist the review result. - -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes review metadata to -`~/.gstack/` (user config directory, not project files). The skill preamble -already writes to `~/.gstack/sessions/` and `~/.gstack/analytics/` — this is -the same pattern. The review dashboard depends on this data. Skipping this -command breaks the review readiness dashboard in /ship. +After producing the Completion Summary above, persist the review result: ```bash -~/.codex/skills/gstack/bin/gstack-review-log '{"skill":"plan-ceo-review","timestamp":"TIMESTAMP","status":"STATUS","unresolved":N,"critical_gaps":N,"mode":"MODE","scope_proposed":N,"scope_accepted":N,"scope_deferred":N,"commit":"COMMIT"}' +~/.codex/skills/gstack/bin/gstack-review-log '{"skill":"plan-ceo-review","timestamp":"TIMESTAMP","status":"STATUS","unresolved":N,"critical_gaps":N,"mode":"MODE","commit":"COMMIT"}' ``` Before running this command, substitute the placeholder values from the Completion Summary you just produced: @@ -1065,9 +906,6 @@ Before running this command, substitute the placeholder values from the Completi - **unresolved**: number from "Unresolved decisions" in the summary - **critical_gaps**: number from "Failure modes: ___ CRITICAL GAPS" in the summary - **MODE**: the mode the user selected (SCOPE_EXPANSION / SELECTIVE_EXPANSION / HOLD_SCOPE / SCOPE_REDUCTION) -- **scope_proposed**: number from "Scope proposals: ___ proposed" in the summary (0 for HOLD/REDUCTION) -- **scope_accepted**: number from "Scope proposals: ___ accepted" in the summary (0 for HOLD/REDUCTION) -- **scope_deferred**: number of items deferred to TODOS.md from scope decisions (0 for HOLD/REDUCTION) - **COMMIT**: output of `git rev-parse --short HEAD` ## Review Readiness Dashboard @@ -1078,7 +916,7 @@ After completing the review, read the review log and config to display the dashb ~/.codex/skills/gstack/bin/gstack-review-read ``` -Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, adversarial-review, codex-review). Ignore entries with timestamps older than 7 days. For the Adversarial row, show whichever is more recent between `adversarial-review` (new auto-scaled) and `codex-review` (legacy). For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: +Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, codex-review). Ignore entries with timestamps older than 7 days. For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: ``` +====================================================================+ @@ -1089,7 +927,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl | Eng Review | 1 | 2026-03-16 15:00 | CLEAR | YES | | CEO Review | 0 | — | — | no | | Design Review | 0 | — | — | no | -| Adversarial | 0 | — | — | no | +| Codex Review | 0 | — | — | no | +--------------------------------------------------------------------+ | VERDICT: CLEARED — Eng Review passed | +====================================================================+ @@ -1099,7 +937,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - **Eng Review (required by default):** The only review that gates shipping. Covers architecture, code quality, tests, performance. Can be disabled globally with \`gstack-config set skip_eng_review true\` (the "don't bother me" setting). - **CEO Review (optional):** Use your judgment. Recommend it for big product/business changes, new user-facing features, or scope decisions. Skip for bug fixes, refactors, infra, and cleanup. - **Design Review (optional):** Use your judgment. Recommend it for UI/UX changes. Skip for backend-only, infra, or prompt-only changes. -- **Adversarial Review (automatic):** Auto-scales by diff size. Small diffs (<50 lines) skip adversarial. Medium diffs (50–199) get cross-model adversarial. Large diffs (200+) get all 4 passes: Claude structured, Codex structured, Claude adversarial subagent, Codex adversarial. No configuration needed. +- **Codex Review (optional):** Independent second opinion from OpenAI Codex CLI. Shows pass/fail gate. Recommend for critical code changes where a second AI perspective adds value. Skip when Codex CLI is not installed. **Verdict logic:** - **CLEARED**: Eng Review has >= 1 entry within 7 days with status "clean" (or \`skip_eng_review\` is \`true\`) @@ -1113,73 +951,6 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - For entries without a \`commit\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection" - If all reviews match the current HEAD, do not display any staleness notes -## Plan File Review Report - -After displaying the Review Readiness Dashboard in conversation output, also update the -**plan file** itself so review status is visible to anyone reading the plan. - -### Detect the plan file - -1. Check if there is an active plan file in this conversation (the host provides plan file - paths in system messages — look for plan file references in the conversation context). -2. If not found, skip this section silently — not every review runs in plan mode. - -### Generate the report - -Read the review log output you already have from the Review Readiness Dashboard step above. -Parse each JSONL entry. Each skill logs different fields: - -- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`scope_proposed\`, \`scope_accepted\`, \`scope_deferred\`, \`commit\` - → Findings: "{scope_proposed} proposals, {scope_accepted} accepted, {scope_deferred} deferred" - → If scope fields are 0 or missing (HOLD/REDUCTION mode): "mode: {mode}, {critical_gaps} critical gaps" -- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`issues_found\`, \`mode\`, \`commit\` - → Findings: "{issues_found} issues, {critical_gaps} critical gaps" -- **plan-design-review**: \`status\`, \`initial_score\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` - → Findings: "score: {initial_score}/10 → {overall_score}/10, {decisions_made} decisions" -- **codex-review**: \`status\`, \`gate\`, \`findings\`, \`findings_fixed\` - → Findings: "{findings} findings, {findings_fixed}/{findings} fixed" - -All fields needed for the Findings column are now present in the JSONL entries. -For the review you just completed, you may use richer details from your own Completion -Summary. For prior reviews, use the JSONL fields directly — they contain all required data. - -Produce this markdown table: - -\`\`\`markdown -## GSTACK REVIEW REPORT - -| Review | Trigger | Why | Runs | Status | Findings | -|--------|---------|-----|------|--------|----------| -| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | -| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | -| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | -| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | -\`\`\` - -Below the table, add these lines (omit any that are empty/not applicable): - -- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes -- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis -- **UNRESOLVED:** total unresolved decisions across all reviews -- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). - If Eng Review is not CLEAR and not skipped globally, append "eng review required". - -### Write to the plan file - -**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one -file you are allowed to edit in plan mode. The plan file review report is part of the -plan's living status. - -- Search the plan file for a \`## GSTACK REVIEW REPORT\` section **anywhere** in the file - (not just at the end — content may have been added after it). -- If found, **replace it** entirely using the Edit tool. Match from \`## GSTACK REVIEW REPORT\` - through either the next \`## \` heading or end of file, whichever comes first. This ensures - content added after the report section is preserved, not eaten. If the Edit fails - (e.g., concurrent edit changed the content), re-read the plan file and retry once. -- If no such section exists, **append it** to the end of the plan file. -- Always place it as the very last section in the plan file. If it was found mid-file, - move it: delete the old location and append at the end. - ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, recommend the next review(s) based on what this CEO review discovered. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/.agents/skills/gstack-plan-design-review/SKILL.md b/.agents/skills/gstack-plan-design-review/SKILL.md index af092247..353b08c3 100644 --- a/.agents/skills/gstack-plan-design-review/SKILL.md +++ b/.agents/skills/gstack-plan-design-review/SKILL.md @@ -34,6 +34,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"plan-design-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -59,28 +65,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -89,6 +98,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -126,26 +162,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -215,10 +231,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -228,12 +249,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Step 0: Detect base branch @@ -499,23 +524,16 @@ If any AskUserQuestion goes unanswered, note it here. Never silently default to ## Review Log -After producing the Completion Summary above, persist the review result. - -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes review metadata to -`~/.gstack/` (user config directory, not project files). The skill preamble -already writes to `~/.gstack/sessions/` and `~/.gstack/analytics/` — this is -the same pattern. The review dashboard depends on this data. Skipping this -command breaks the review readiness dashboard in /ship. +After producing the Completion Summary above, persist the review result: ```bash -~/.codex/skills/gstack/bin/gstack-review-log '{"skill":"plan-design-review","timestamp":"TIMESTAMP","status":"STATUS","initial_score":N,"overall_score":N,"unresolved":N,"decisions_made":N,"commit":"COMMIT"}' +~/.codex/skills/gstack/bin/gstack-review-log '{"skill":"plan-design-review","timestamp":"TIMESTAMP","status":"STATUS","overall_score":N,"unresolved":N,"decisions_made":N,"commit":"COMMIT"}' ``` Substitute values from the Completion Summary: - **TIMESTAMP**: current ISO 8601 datetime - **STATUS**: "clean" if overall score 8+ AND 0 unresolved; otherwise "issues_open" -- **initial_score**: initial overall design score before fixes (0-10) -- **overall_score**: final overall design score after fixes (0-10) +- **overall_score**: final overall design score (0-10) - **unresolved**: number of unresolved design decisions - **decisions_made**: number of design decisions added to the plan - **COMMIT**: output of `git rev-parse --short HEAD` @@ -528,7 +546,7 @@ After completing the review, read the review log and config to display the dashb ~/.codex/skills/gstack/bin/gstack-review-read ``` -Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, adversarial-review, codex-review). Ignore entries with timestamps older than 7 days. For the Adversarial row, show whichever is more recent between `adversarial-review` (new auto-scaled) and `codex-review` (legacy). For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: +Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, codex-review). Ignore entries with timestamps older than 7 days. For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: ``` +====================================================================+ @@ -539,7 +557,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl | Eng Review | 1 | 2026-03-16 15:00 | CLEAR | YES | | CEO Review | 0 | — | — | no | | Design Review | 0 | — | — | no | -| Adversarial | 0 | — | — | no | +| Codex Review | 0 | — | — | no | +--------------------------------------------------------------------+ | VERDICT: CLEARED — Eng Review passed | +====================================================================+ @@ -549,7 +567,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - **Eng Review (required by default):** The only review that gates shipping. Covers architecture, code quality, tests, performance. Can be disabled globally with \`gstack-config set skip_eng_review true\` (the "don't bother me" setting). - **CEO Review (optional):** Use your judgment. Recommend it for big product/business changes, new user-facing features, or scope decisions. Skip for bug fixes, refactors, infra, and cleanup. - **Design Review (optional):** Use your judgment. Recommend it for UI/UX changes. Skip for backend-only, infra, or prompt-only changes. -- **Adversarial Review (automatic):** Auto-scales by diff size. Small diffs (<50 lines) skip adversarial. Medium diffs (50–199) get cross-model adversarial. Large diffs (200+) get all 4 passes: Claude structured, Codex structured, Claude adversarial subagent, Codex adversarial. No configuration needed. +- **Codex Review (optional):** Independent second opinion from OpenAI Codex CLI. Shows pass/fail gate. Recommend for critical code changes where a second AI perspective adds value. Skip when Codex CLI is not installed. **Verdict logic:** - **CLEARED**: Eng Review has >= 1 entry within 7 days with status "clean" (or \`skip_eng_review\` is \`true\`) @@ -563,73 +581,6 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - For entries without a \`commit\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection" - If all reviews match the current HEAD, do not display any staleness notes -## Plan File Review Report - -After displaying the Review Readiness Dashboard in conversation output, also update the -**plan file** itself so review status is visible to anyone reading the plan. - -### Detect the plan file - -1. Check if there is an active plan file in this conversation (the host provides plan file - paths in system messages — look for plan file references in the conversation context). -2. If not found, skip this section silently — not every review runs in plan mode. - -### Generate the report - -Read the review log output you already have from the Review Readiness Dashboard step above. -Parse each JSONL entry. Each skill logs different fields: - -- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`scope_proposed\`, \`scope_accepted\`, \`scope_deferred\`, \`commit\` - → Findings: "{scope_proposed} proposals, {scope_accepted} accepted, {scope_deferred} deferred" - → If scope fields are 0 or missing (HOLD/REDUCTION mode): "mode: {mode}, {critical_gaps} critical gaps" -- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`issues_found\`, \`mode\`, \`commit\` - → Findings: "{issues_found} issues, {critical_gaps} critical gaps" -- **plan-design-review**: \`status\`, \`initial_score\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` - → Findings: "score: {initial_score}/10 → {overall_score}/10, {decisions_made} decisions" -- **codex-review**: \`status\`, \`gate\`, \`findings\`, \`findings_fixed\` - → Findings: "{findings} findings, {findings_fixed}/{findings} fixed" - -All fields needed for the Findings column are now present in the JSONL entries. -For the review you just completed, you may use richer details from your own Completion -Summary. For prior reviews, use the JSONL fields directly — they contain all required data. - -Produce this markdown table: - -\`\`\`markdown -## GSTACK REVIEW REPORT - -| Review | Trigger | Why | Runs | Status | Findings | -|--------|---------|-----|------|--------|----------| -| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | -| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | -| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | -| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | -\`\`\` - -Below the table, add these lines (omit any that are empty/not applicable): - -- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes -- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis -- **UNRESOLVED:** total unresolved decisions across all reviews -- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). - If Eng Review is not CLEAR and not skipped globally, append "eng review required". - -### Write to the plan file - -**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one -file you are allowed to edit in plan mode. The plan file review report is part of the -plan's living status. - -- Search the plan file for a \`## GSTACK REVIEW REPORT\` section **anywhere** in the file - (not just at the end — content may have been added after it). -- If found, **replace it** entirely using the Edit tool. Match from \`## GSTACK REVIEW REPORT\` - through either the next \`## \` heading or end of file, whichever comes first. This ensures - content added after the report section is preserved, not eaten. If the Edit fails - (e.g., concurrent edit changed the content), re-read the plan file and retry once. -- If no such section exists, **append it** to the end of the plan file. -- Always place it as the very last section in the plan file. If it was found mid-file, - move it: delete the old location and append at the end. - ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, recommend the next review(s) based on what this design review discovered. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/.agents/skills/gstack-plan-eng-review/SKILL.md b/.agents/skills/gstack-plan-eng-review/SKILL.md index f2be53a3..163a6c4d 100644 --- a/.agents/skills/gstack-plan-eng-review/SKILL.md +++ b/.agents/skills/gstack-plan-eng-review/SKILL.md @@ -33,6 +33,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"plan-eng-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -58,28 +64,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -88,6 +97,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -125,26 +161,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -214,10 +230,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -227,12 +248,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # Plan Review Mode @@ -289,39 +314,12 @@ DESIGN=$(ls -t ~/.gstack/projects/$SLUG/*-$BRANCH-design-*.md 2>/dev/null | head ``` If a design doc exists, read it. Use it as the source of truth for the problem statement, constraints, and chosen approach. If it has a `Supersedes:` field, note that this is a revised design — check the prior version for context on what changed and why. -## Prerequisite Skill Offer - -When the design doc check above prints "No design doc found," offer the prerequisite -skill before proceeding. - -Say to the user via AskUserQuestion: - -> "No design doc found for this branch. `/office-hours` produces a structured problem -> statement, premise challenge, and explored alternatives — it gives this review much -> sharper input to work with. Takes about 10 minutes. The design doc is per-feature, -> not per-product — it captures the thinking behind this specific change." - -Options: -- A) Run /office-hours first (in another window, then come back) -- B) Skip — proceed with standard review - -If they skip: "No worries — standard review. If you ever want sharper input, try -/office-hours first next time." Then proceed normally. Do not re-offer later in the session. - ### Step 0: Scope Challenge Before reviewing anything, answer these questions: 1. **What existing code already partially or fully solves each sub-problem?** Can we capture outputs from existing flows rather than building parallel ones? 2. **What is the minimum set of changes that achieves the stated goal?** Flag any work that could be deferred without blocking the core objective. Be ruthless about scope creep. 3. **Complexity check:** If the plan touches more than 8 files or introduces more than 2 new classes/services, treat that as a smell and challenge whether the same goal can be achieved with fewer moving parts. -4. **Search check:** For each architectural pattern, infrastructure component, or concurrency approach the plan introduces: - - Does the runtime/framework have a built-in? Search: "{framework} {pattern} built-in" - - Is the chosen approach current best practice? Search: "{pattern} best practice {current year}" - - Are there known footguns? Search: "{framework} {pattern} pitfalls" - - If WebSearch is unavailable, skip this check and note: "Search unavailable — proceeding with in-distribution knowledge only." - - If the plan rolls a custom solution where a built-in exists, flag it as a scope reduction opportunity. Annotate recommendations with **[Layer 1]**, **[Layer 2]**, **[Layer 3]**, or **[EUREKA]** (see preamble's Search Before Building section). If you find a eureka moment — a reason the standard approach is wrong for this case — present it as an architectural insight. -5. **TODOS cross-reference:** Read `TODOS.md` if it exists. Are any deferred items blocking this plan? Can any deferred items be bundled into this PR without expanding scope? Does this plan create new work that should be captured as a TODO? +4. **TODOS cross-reference:** Read `TODOS.md` if it exists. Are any deferred items blocking this plan? Can any deferred items be bundled into this PR without expanding scope? Does this plan create new work that should be captured as a TODO? 5. **Completeness check:** Is the plan doing the complete version or a shortcut? With AI-assisted coding, the cost of completeness (100% test coverage, full edge case handling, complete error paths) is 10-100x cheaper than with a human team. If the plan proposes a shortcut that saves human-hours but only saves minutes with CC+gstack, recommend the complete version. Boil the lake. @@ -496,16 +494,10 @@ Check the git log for this branch. If there are prior commits suggesting a previ ## Review Log -After producing the Completion Summary above, persist the review result. - -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes review metadata to -`~/.gstack/` (user config directory, not project files). The skill preamble -already writes to `~/.gstack/sessions/` and `~/.gstack/analytics/` — this is -the same pattern. The review dashboard depends on this data. Skipping this -command breaks the review readiness dashboard in /ship. +After producing the Completion Summary above, persist the review result: ```bash -~/.codex/skills/gstack/bin/gstack-review-log '{"skill":"plan-eng-review","timestamp":"TIMESTAMP","status":"STATUS","unresolved":N,"critical_gaps":N,"issues_found":N,"mode":"MODE","commit":"COMMIT"}' +~/.codex/skills/gstack/bin/gstack-review-log '{"skill":"plan-eng-review","timestamp":"TIMESTAMP","status":"STATUS","unresolved":N,"critical_gaps":N,"mode":"MODE","commit":"COMMIT"}' ``` Substitute values from the Completion Summary: @@ -513,7 +505,6 @@ Substitute values from the Completion Summary: - **STATUS**: "clean" if 0 unresolved decisions AND 0 critical gaps; otherwise "issues_open" - **unresolved**: number from "Unresolved decisions" count - **critical_gaps**: number from "Failure modes: ___ critical gaps flagged" -- **issues_found**: total issues found across all review sections (Architecture + Code Quality + Performance + Test gaps) - **MODE**: FULL_REVIEW / SCOPE_REDUCED - **COMMIT**: output of `git rev-parse --short HEAD` @@ -525,7 +516,7 @@ After completing the review, read the review log and config to display the dashb ~/.codex/skills/gstack/bin/gstack-review-read ``` -Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, adversarial-review, codex-review). Ignore entries with timestamps older than 7 days. For the Adversarial row, show whichever is more recent between `adversarial-review` (new auto-scaled) and `codex-review` (legacy). For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: +Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, codex-review). Ignore entries with timestamps older than 7 days. For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: ``` +====================================================================+ @@ -536,7 +527,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl | Eng Review | 1 | 2026-03-16 15:00 | CLEAR | YES | | CEO Review | 0 | — | — | no | | Design Review | 0 | — | — | no | -| Adversarial | 0 | — | — | no | +| Codex Review | 0 | — | — | no | +--------------------------------------------------------------------+ | VERDICT: CLEARED — Eng Review passed | +====================================================================+ @@ -546,7 +537,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - **Eng Review (required by default):** The only review that gates shipping. Covers architecture, code quality, tests, performance. Can be disabled globally with \`gstack-config set skip_eng_review true\` (the "don't bother me" setting). - **CEO Review (optional):** Use your judgment. Recommend it for big product/business changes, new user-facing features, or scope decisions. Skip for bug fixes, refactors, infra, and cleanup. - **Design Review (optional):** Use your judgment. Recommend it for UI/UX changes. Skip for backend-only, infra, or prompt-only changes. -- **Adversarial Review (automatic):** Auto-scales by diff size. Small diffs (<50 lines) skip adversarial. Medium diffs (50–199) get cross-model adversarial. Large diffs (200+) get all 4 passes: Claude structured, Codex structured, Claude adversarial subagent, Codex adversarial. No configuration needed. +- **Codex Review (optional):** Independent second opinion from OpenAI Codex CLI. Shows pass/fail gate. Recommend for critical code changes where a second AI perspective adds value. Skip when Codex CLI is not installed. **Verdict logic:** - **CLEARED**: Eng Review has >= 1 entry within 7 days with status "clean" (or \`skip_eng_review\` is \`true\`) @@ -560,73 +551,6 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - For entries without a \`commit\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection" - If all reviews match the current HEAD, do not display any staleness notes -## Plan File Review Report - -After displaying the Review Readiness Dashboard in conversation output, also update the -**plan file** itself so review status is visible to anyone reading the plan. - -### Detect the plan file - -1. Check if there is an active plan file in this conversation (the host provides plan file - paths in system messages — look for plan file references in the conversation context). -2. If not found, skip this section silently — not every review runs in plan mode. - -### Generate the report - -Read the review log output you already have from the Review Readiness Dashboard step above. -Parse each JSONL entry. Each skill logs different fields: - -- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`scope_proposed\`, \`scope_accepted\`, \`scope_deferred\`, \`commit\` - → Findings: "{scope_proposed} proposals, {scope_accepted} accepted, {scope_deferred} deferred" - → If scope fields are 0 or missing (HOLD/REDUCTION mode): "mode: {mode}, {critical_gaps} critical gaps" -- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`issues_found\`, \`mode\`, \`commit\` - → Findings: "{issues_found} issues, {critical_gaps} critical gaps" -- **plan-design-review**: \`status\`, \`initial_score\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` - → Findings: "score: {initial_score}/10 → {overall_score}/10, {decisions_made} decisions" -- **codex-review**: \`status\`, \`gate\`, \`findings\`, \`findings_fixed\` - → Findings: "{findings} findings, {findings_fixed}/{findings} fixed" - -All fields needed for the Findings column are now present in the JSONL entries. -For the review you just completed, you may use richer details from your own Completion -Summary. For prior reviews, use the JSONL fields directly — they contain all required data. - -Produce this markdown table: - -\`\`\`markdown -## GSTACK REVIEW REPORT - -| Review | Trigger | Why | Runs | Status | Findings | -|--------|---------|-----|------|--------|----------| -| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | -| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | -| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | -| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | -\`\`\` - -Below the table, add these lines (omit any that are empty/not applicable): - -- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes -- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis -- **UNRESOLVED:** total unresolved decisions across all reviews -- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). - If Eng Review is not CLEAR and not skipped globally, append "eng review required". - -### Write to the plan file - -**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one -file you are allowed to edit in plan mode. The plan file review report is part of the -plan's living status. - -- Search the plan file for a \`## GSTACK REVIEW REPORT\` section **anywhere** in the file - (not just at the end — content may have been added after it). -- If found, **replace it** entirely using the Edit tool. Match from \`## GSTACK REVIEW REPORT\` - through either the next \`## \` heading or end of file, whichever comes first. This ensures - content added after the report section is preserved, not eaten. If the Edit fails - (e.g., concurrent edit changed the content), re-read the plan file and retry once. -- If no such section exists, **append it** to the end of the plan file. -- Always place it as the very last section in the plan file. If it was found mid-file, - move it: delete the old location and append at the end. - ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, check if additional reviews would be valuable. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/.agents/skills/gstack-qa-only/SKILL.md b/.agents/skills/gstack-qa-only/SKILL.md index f310fb25..75aa4630 100644 --- a/.agents/skills/gstack-qa-only/SKILL.md +++ b/.agents/skills/gstack-qa-only/SKILL.md @@ -32,6 +32,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"qa-only","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -57,28 +63,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -87,6 +96,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -124,26 +160,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -213,10 +229,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -226,12 +247,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # /qa-only: Report-Only QA Testing diff --git a/.agents/skills/gstack-qa/SKILL.md b/.agents/skills/gstack-qa/SKILL.md index 92e61a9a..a527e80a 100644 --- a/.agents/skills/gstack-qa/SKILL.md +++ b/.agents/skills/gstack-qa/SKILL.md @@ -35,6 +35,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"qa","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -60,28 +66,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -90,6 +99,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -127,26 +163,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -216,10 +232,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -229,12 +250,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Step 0: Detect base branch diff --git a/.agents/skills/gstack-retro/SKILL.md b/.agents/skills/gstack-retro/SKILL.md index a0b796ba..6f334a9c 100644 --- a/.agents/skills/gstack-retro/SKILL.md +++ b/.agents/skills/gstack-retro/SKILL.md @@ -32,6 +32,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"retro","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -57,28 +63,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -87,6 +96,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -124,26 +160,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -213,10 +229,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -226,12 +247,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Detect default branch @@ -389,20 +414,6 @@ If TODOS.md doesn't exist, skip the Backlog Health row. If the JSONL file doesn't exist or has no entries in the window, skip the Skill Usage row. -**Eureka Moments (if logged):** Read `~/.gstack/analytics/eureka.jsonl` if it exists. Filter entries within the retro time window by `ts` field. For each eureka moment, show the skill that flagged it, the branch, and a one-line summary of the insight. Present as: - -``` -| Eureka Moments | 2 this period | -``` - -If moments exist, list them: -``` - EUREKA /office-hours (branch: garrytan/auth-rethink): "Session tokens don't need server storage — browser crypto API makes client-side JWT validation viable" - EUREKA /plan-eng-review (branch: garrytan/cache-layer): "Redis isn't needed here — Bun's built-in LRU cache handles this workload" -``` - -If the JSONL file doesn't exist or has no entries in the window, skip the Eureka Moments row. - ### Step 3: Commit Time Distribution Show hourly histogram in local time using bar chart: @@ -462,7 +473,7 @@ From commit diffs, estimate PR sizes and bucket them: - **Small** (<100 LOC) - **Medium** (100-500 LOC) - **Large** (500-1500 LOC) -- **XL** (1500+ LOC) +- **XL** (1500+ LOC) — flag these with file counts ### Step 8: Focus Score + Ship of the Week @@ -654,13 +665,14 @@ Narrative interpreting what the team-wide patterns mean: Narrative covering: - Commit type mix and what it reveals -- PR size distribution and what it reveals about shipping cadence +- PR size discipline (are PRs staying small?) - Fix-chain detection (sequences of fix commits on the same subsystem) - Version bump discipline ### Code Quality Signals - Test LOC ratio trend - Hotspot analysis (are the same files churning?) +- Any XL PRs that should have been split - Greptile signal ratio and trend (if history exists): "Greptile: X% signal (Y valid catches, Z false positives)" ### Test Health @@ -699,7 +711,7 @@ For each teammate (sorted by commits descending), write a section: - "Fixed the N+1 query that was causing 2s load times on the dashboard" - **Opportunity for growth**: 1 specific, constructive suggestion. Frame as investment, not criticism. Examples: - "Test coverage on the payment module is at 8% — worth investing in before the next feature lands on top of it" - - "Most commits land in a single burst — spacing work across the day could reduce context-switching fatigue" + - "3 of the 5 PRs were 800+ LOC — breaking these up would catch issues earlier and make review easier" - "All commits land between 1-4am — sustainable pace matters for code quality long-term" **AI collaboration note:** If many commits have `Co-Authored-By` AI trailers (e.g., Claude, Copilot), note the AI-assisted commit percentage as a team metric. Frame it neutrally — "N% of commits were AI-assisted" — without judgment. diff --git a/.agents/skills/gstack-review/SKILL.md b/.agents/skills/gstack-review/SKILL.md index 8d37d6dd..3bbec6b7 100644 --- a/.agents/skills/gstack-review/SKILL.md +++ b/.agents/skills/gstack-review/SKILL.md @@ -31,6 +31,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -56,28 +62,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -86,6 +95,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -123,26 +159,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -212,10 +228,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -225,12 +246,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Step 0: Detect base branch @@ -335,17 +360,10 @@ Run `git diff origin/` to get the full diff. This includes both committed Apply the checklist against the diff in two passes: 1. **Pass 1 (CRITICAL):** SQL & Data Safety, Race Conditions & Concurrency, LLM Output Trust Boundary, Enum & Value Completeness -2. **Pass 2 (INFORMATIONAL):** Conditional Side Effects, Magic Numbers & String Coupling, Dead Code & Consistency, LLM Prompt Issues, Test Gaps, View/Frontend, Performance & Bundle Impact +2. **Pass 2 (INFORMATIONAL):** Conditional Side Effects, Magic Numbers & String Coupling, Dead Code & Consistency, LLM Prompt Issues, Test Gaps, View/Frontend **Enum & Value Completeness requires reading code OUTSIDE the diff.** When the diff introduces a new enum value, status, tier, or type constant, use Grep to find all files that reference sibling values, then Read those files to check if the new value is handled. This is the one category where within-diff review is insufficient. -**Search-before-recommending:** When recommending a fix pattern (especially for concurrency, caching, auth, or framework-specific behavior): -- Verify the pattern is current best practice for the framework version in use -- Check if a built-in solution exists in newer versions before recommending a workaround -- Verify API signatures against current docs (APIs change between versions) - -Takes seconds, prevents recommending outdated patterns. If WebSearch is unavailable, note it and proceed with in-distribution knowledge. - Follow the output format specified in the checklist. Respect the suppressions — do NOT flag items listed in the "DO NOT flag" section. --- @@ -501,7 +519,54 @@ If no documentation files exist, skip this step silently. --- +## Step 5.7: Codex second opinion (optional) +After completing the review, check if the Codex CLI is available: + +```bash +which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +``` + +If Codex is available, use AskUserQuestion: + +``` +Review complete. Want an independent second opinion from Codex (OpenAI)? + +A) Run Codex code review — independent diff review with pass/fail gate +B) Run Codex adversarial challenge — try to find ways this code will fail in production +C) Both — review first, then adversarial challenge +D) Skip — no Codex review needed +``` + +If the user chooses A, B, or C: + +**For code review (A or C):** Run `codex review --base ` with a 5-minute timeout. +Present the full output verbatim under a `CODEX SAYS (code review):` header. +Check the output for `[P1]` markers — if found, note `GATE: FAIL`, otherwise `GATE: PASS`. +After presenting, compare Codex's findings with your own review findings from Steps 4-5 +and output a CROSS-MODEL ANALYSIS showing what both found, what only Codex found, +and what only Claude found. + +**For adversarial challenge (B or C):** Run: +```bash +codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, failure modes. Be adversarial." -s read-only +``` +Present the full output verbatim under a `CODEX SAYS (adversarial challenge):` header. + +**Only if a code review ran (user chose A or C):** Persist the Codex review result to the review log: +```bash +~/.codex/skills/gstack/bin/gstack-review-log '{"skill":"codex-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","gate":"GATE"}' +``` + +Substitute: STATUS ("clean" if PASS, "issues_found" if FAIL), GATE ("pass" or "fail"). + +**Do NOT persist a codex-review entry when only the adversarial challenge (B) ran** — +there is no gate verdict to record, and a false entry would make the Review Readiness +Dashboard believe a code review happened when it didn't. + +If Codex is not available, skip this step silently. + +--- ## Important Rules diff --git a/.agents/skills/gstack-setup-browser-cookies/SKILL.md b/.agents/skills/gstack-setup-browser-cookies/SKILL.md index 49e2e900..c9c084c2 100644 --- a/.agents/skills/gstack-setup-browser-cookies/SKILL.md +++ b/.agents/skills/gstack-setup-browser-cookies/SKILL.md @@ -31,6 +31,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"setup-browser-cookies","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -56,28 +62,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -86,6 +95,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -123,26 +159,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -212,10 +228,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -225,12 +246,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # Setup Browser Cookies diff --git a/.agents/skills/gstack-ship/SKILL.md b/.agents/skills/gstack-ship/SKILL.md index 442c4a72..c922523e 100644 --- a/.agents/skills/gstack-ship/SKILL.md +++ b/.agents/skills/gstack-ship/SKILL.md @@ -29,6 +29,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"ship","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -54,28 +60,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -84,6 +93,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -121,26 +157,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -210,10 +226,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -223,12 +244,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Step 0: Detect base branch @@ -294,7 +319,7 @@ After completing the review, read the review log and config to display the dashb ~/.codex/skills/gstack/bin/gstack-review-read ``` -Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, adversarial-review, codex-review). Ignore entries with timestamps older than 7 days. For the Adversarial row, show whichever is more recent between `adversarial-review` (new auto-scaled) and `codex-review` (legacy). For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: +Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, codex-review). Ignore entries with timestamps older than 7 days. For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: ``` +====================================================================+ @@ -305,7 +330,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl | Eng Review | 1 | 2026-03-16 15:00 | CLEAR | YES | | CEO Review | 0 | — | — | no | | Design Review | 0 | — | — | no | -| Adversarial | 0 | — | — | no | +| Codex Review | 0 | — | — | no | +--------------------------------------------------------------------+ | VERDICT: CLEARED — Eng Review passed | +====================================================================+ @@ -315,7 +340,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - **Eng Review (required by default):** The only review that gates shipping. Covers architecture, code quality, tests, performance. Can be disabled globally with \`gstack-config set skip_eng_review true\` (the "don't bother me" setting). - **CEO Review (optional):** Use your judgment. Recommend it for big product/business changes, new user-facing features, or scope decisions. Skip for bug fixes, refactors, infra, and cleanup. - **Design Review (optional):** Use your judgment. Recommend it for UI/UX changes. Skip for backend-only, infra, or prompt-only changes. -- **Adversarial Review (automatic):** Auto-scales by diff size. Small diffs (<50 lines) skip adversarial. Medium diffs (50–199) get cross-model adversarial. Large diffs (200+) get all 4 passes: Claude structured, Codex structured, Claude adversarial subagent, Codex adversarial. No configuration needed. +- **Codex Review (optional):** Independent second opinion from OpenAI Codex CLI. Shows pass/fail gate. Recommend for critical code changes where a second AI perspective adds value. Skip when Codex CLI is not installed. **Verdict logic:** - **CLEARED**: Eng Review has >= 1 entry within 7 days with status "clean" (or \`skip_eng_review\` is \`true\`) @@ -857,7 +882,43 @@ For each classified comment: --- +## Step 3.8: Codex second opinion (optional) +Check if the Codex CLI is available: + +```bash +which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +``` + +If Codex is available, use AskUserQuestion: + +``` +Pre-landing review complete. Want an independent Codex (OpenAI) review before shipping? + +A) Run Codex code review — independent diff review with pass/fail gate +B) Run Codex adversarial challenge — try to break this code +C) Skip — ship without Codex review +``` + +If the user chooses A or B: + +**For code review (A):** Run `codex review --base ` with a 5-minute timeout. +Present the full output verbatim under a `CODEX SAYS:` header. Check for `[P1]` markers +to determine pass/fail gate. Persist the result: + +```bash +~/.codex/skills/gstack/bin/gstack-review-log '{"skill":"codex-review","timestamp":"TIMESTAMP","status":"STATUS","gate":"GATE"}' +``` + +If GATE is FAIL, use AskUserQuestion: "Codex found critical issues. Ship anyway?" +If the user says no, stop. If yes, continue to Step 4. + +**For adversarial (B):** Run codex exec with the adversarial prompt (see /codex skill). +Present findings. This is informational — does not block shipping. + +If Codex is not available, skip silently. Continue to Step 4. + +--- ## Step 4: Version bump (auto-decide) @@ -1098,7 +1159,7 @@ doc updates — the user runs `/ship` and documentation stays current without a - **Never skip tests.** If tests fail, stop. - **Never skip the pre-landing review.** If checklist.md is unreadable, stop. - **Never force push.** Use regular `git push` only. -- **Never ask for trivial confirmations** (e.g., "ready to push?", "create PR?"). DO stop for: version bumps (MINOR/MAJOR), pre-landing review findings (ASK items), and Codex structured review [P1] findings (large diffs only). +- **Never ask for confirmation** except for MINOR/MAJOR version bumps and pre-landing review ASK items (batched into at most one AskUserQuestion). - **Always use the 4-digit version format** from the VERSION file. - **Date format in CHANGELOG:** `YYYY-MM-DD` - **Split commits for bisectability** — each commit = one logical change. diff --git a/.agents/skills/gstack/SKILL.md b/.agents/skills/gstack/SKILL.md index 93128866..02b5d704 100644 --- a/.agents/skills/gstack/SKILL.md +++ b/.agents/skills/gstack/SKILL.md @@ -64,6 +64,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.codex/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.codex/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"gstack","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.codex/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -89,28 +95,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.codex/skills/gstack/bin/gstack-config set telemetry community +~/.codex/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.codex/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.codex/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -119,6 +128,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.codex/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.codex/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -156,26 +192,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.codex/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -245,10 +261,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -258,12 +279,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.codex/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during this session. @@ -506,7 +531,7 @@ The snapshot is your primary tool for understanding and interacting with pages. -s --selector Scope to CSS selector -D --diff Unified diff against previous snapshot (first call stores baseline) -a --annotate Annotated screenshot with red overlay boxes and ref labels --o --output Output path for annotated screenshot (default: /browse-annotated.png) +-o --output Output path for annotated screenshot (default: /tmp/browse-annotated.png) -C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick) ``` diff --git a/SKILL.md b/SKILL.md index d8e51bd1..5328edbe 100644 --- a/SKILL.md +++ b/SKILL.md @@ -70,6 +70,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"gstack","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -95,28 +101,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -125,6 +134,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -162,26 +198,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -251,10 +267,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -264,12 +285,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during this session. @@ -512,7 +537,7 @@ The snapshot is your primary tool for understanding and interacting with pages. -s --selector Scope to CSS selector -D --diff Unified diff against previous snapshot (first call stores baseline) -a --annotate Annotated screenshot with red overlay boxes and ref labels --o --output Output path for annotated screenshot (default: /browse-annotated.png) +-o --output Output path for annotated screenshot (default: /tmp/browse-annotated.png) -C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick) ``` diff --git a/bin/gstack-auth b/bin/gstack-auth new file mode 100755 index 00000000..99693a5a --- /dev/null +++ b/bin/gstack-auth @@ -0,0 +1,213 @@ +#!/usr/bin/env bash +# gstack-auth — authenticate with Supabase via email OTP +# +# Usage: +# gstack-auth [email] — start auth flow (prompts if no email) +# gstack-auth status — show current auth status +# gstack-auth logout — remove saved tokens +# +# Sends a 6-digit verification code to the user's email. +# User enters the code in the terminal to authenticate. +# +# Env overrides (for testing): +# GSTACK_STATE_DIR — override ~/.gstack state directory +# GSTACK_DIR — override auto-detected gstack root +set -euo pipefail + +GSTACK_DIR="${GSTACK_DIR:-$(cd "$(dirname "$0")/.." && pwd)}" +STATE_DIR="${GSTACK_STATE_DIR:-$HOME/.gstack}" +AUTH_FILE="$STATE_DIR/auth-token.json" + +# Source Supabase config +if [ -f "$GSTACK_DIR/supabase/config.sh" ]; then + . "$GSTACK_DIR/supabase/config.sh" +fi +SUPABASE_URL="${GSTACK_SUPABASE_URL:-}" +ANON_KEY="${GSTACK_SUPABASE_ANON_KEY:-}" + +if [ -z "$SUPABASE_URL" ] || [ -z "$ANON_KEY" ]; then + echo "Error: Supabase not configured. Check supabase/config.sh" + exit 1 +fi + +AUTH_URL="${SUPABASE_URL}/auth/v1" + +# ─── Helper: write auth token file ────────────────────────── +save_token() { + local access_token="$1" + local refresh_token="$2" + local expires_in="$3" + local email="$4" + local user_id="$5" + + local expires_at + expires_at=$(( $(date +%s) + expires_in )) + + mkdir -p "$STATE_DIR" + cat > "$AUTH_FILE" <" + exit 0 + fi + AUTH_JSON="$(cat "$AUTH_FILE")" + EMAIL="$(json_field "$AUTH_JSON" "email")" + EXPIRES_AT="$(json_field "$AUTH_JSON" "expires_at")" + NOW="$(date +%s)" + if [ "$NOW" -lt "$EXPIRES_AT" ] 2>/dev/null; then + REMAINING=$(( (EXPIRES_AT - NOW) / 60 )) + echo "Authenticated as: $EMAIL" + echo "Token expires in: ${REMAINING}m" + else + echo "Authenticated as: $EMAIL (token expired — will auto-refresh)" + fi + exit 0 +fi + +# ─── Subcommand: logout ───────────────────────────────────── +if [ "${1:-}" = "logout" ]; then + rm -f "$AUTH_FILE" + echo "Logged out. Auth token removed." + exit 0 +fi + +# ─── Main: auth flow ──────────────────────────────────────── +EMAIL="${1:-}" +if [ -z "$EMAIL" ]; then + printf "Enter your email: " + read -r EMAIL +fi + +if [ -z "$EMAIL" ]; then + echo "Error: email is required" + exit 1 +fi + +if ! echo "$EMAIL" | grep -qE '^[^@]+@[^@]+\.[^@]+$'; then + echo "Error: invalid email format" + exit 1 +fi + +# ─── Step 1: Send OTP ──────────────────────────────────────── +echo "" +echo "Sending verification code to ${EMAIL}..." + +OTP_BODY="{\"email\":\"${EMAIL}\"}" + +HTTP_RESPONSE="$(curl -s -w "\n%{http_code}" \ + -X POST "${AUTH_URL}/otp" \ + -H "Content-Type: application/json" \ + -H "apikey: ${ANON_KEY}" \ + -d "$OTP_BODY" 2>/dev/null || echo -e "\n000")" + +HTTP_CODE="$(echo "$HTTP_RESPONSE" | tail -1)" +HTTP_BODY="$(echo "$HTTP_RESPONSE" | sed '$d')" + +case "$HTTP_CODE" in + 2*) + ;; # success + 429) + if echo "$HTTP_BODY" | grep -q "email_send_rate_limit"; then + echo "" + echo "Email rate limit exceeded (Supabase free tier: ~3 emails/hour)." + echo "Try again in a few minutes, or set up custom SMTP in the Supabase" + echo "dashboard for unlimited sends." + exit 1 + fi + echo "Cooldown active — waiting 60s before retrying..." + for i in $(seq 60 -1 1); do + printf "\r Retrying in %2ds..." "$i" + sleep 1 + done + printf "\r \r" + echo "Retrying..." + HTTP_RESPONSE="$(curl -s -w "\n%{http_code}" \ + -X POST "${AUTH_URL}/otp" \ + -H "Content-Type: application/json" \ + -H "apikey: ${ANON_KEY}" \ + -d "$OTP_BODY" 2>/dev/null || echo -e "\n000")" + HTTP_CODE="$(echo "$HTTP_RESPONSE" | tail -1)" + HTTP_BODY="$(echo "$HTTP_RESPONSE" | sed '$d')" + case "$HTTP_CODE" in + 2*) ;; # success on retry + *) echo "Error sending OTP (HTTP ${HTTP_CODE}): ${HTTP_BODY}"; exit 1 ;; + esac + ;; + *) + echo "Error sending OTP (HTTP ${HTTP_CODE}): ${HTTP_BODY}" + exit 1 + ;; +esac + +echo "" +echo "Check your email for a 6-digit code." +echo "" + +# ─── Step 2: Read OTP code ─────────────────────────────────── +printf "Enter code: " +read -r OTP_CODE + +if [ -z "$OTP_CODE" ]; then + echo "No code entered." + exit 1 +fi + +# ─── Step 3: Verify OTP ───────────────────────────────────── +OTP_CODE="$(echo "$OTP_CODE" | tr -d '[:space:]')" + +if ! echo "$OTP_CODE" | grep -qE '^[0-9]{6}$'; then + echo "Error: code must be exactly 6 digits" + exit 1 +fi + +VERIFY_RESPONSE="$(curl -s \ + -X POST "${AUTH_URL}/verify" \ + -H "Content-Type: application/json" \ + -H "apikey: ${ANON_KEY}" \ + -d "{\"email\":\"${EMAIL}\",\"token\":\"${OTP_CODE}\",\"type\":\"email\"}" \ + 2>/dev/null || echo "{}")" + +ACCESS_TOKEN="$(json_field "$VERIFY_RESPONSE" "access_token")" +REFRESH_TOKEN="$(json_field "$VERIFY_RESPONSE" "refresh_token")" +EXPIRES_IN="$(json_field "$VERIFY_RESPONSE" "expires_in")" +USER_ID="$(json_field "$VERIFY_RESPONSE" "id" 2>/dev/null || true)" + +if [ -z "$USER_ID" ]; then + USER_ID="$(echo "$VERIFY_RESPONSE" | grep -o '"id":"[^"]*"' | head -1 | sed 's/"id":"//;s/"//')" +fi + +if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then + ERROR_MSG="$(json_field "$VERIFY_RESPONSE" "error_description" 2>/dev/null || json_field "$VERIFY_RESPONSE" "msg" 2>/dev/null || echo "unknown error")" + echo "" + echo "Verification failed: $ERROR_MSG" + echo "Check the code and try again." + exit 1 +fi + +save_token "$ACCESS_TOKEN" "$REFRESH_TOKEN" "${EXPIRES_IN:-3600}" "$EMAIL" "$USER_ID" + +# ─── Step 4: Save email to config ──────────────────────────── +"$GSTACK_DIR/bin/gstack-config" set email "$EMAIL" + +echo "" +echo "Authenticated as: ${EMAIL}" +echo "Token saved to: ${AUTH_FILE}" diff --git a/bin/gstack-auth-refresh b/bin/gstack-auth-refresh new file mode 100755 index 00000000..010d2908 --- /dev/null +++ b/bin/gstack-auth-refresh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# gstack-auth-refresh — silently refresh auth token if expired +# +# Usage: +# gstack-auth-refresh — refresh and print access token +# gstack-auth-refresh --check — exit 0 if authenticated, 1 if not +# +# Called by gstack-community-backup and other authenticated scripts. +# If the refresh token is also expired, prints an error and exits 1. +# +# Env overrides (for testing): +# GSTACK_STATE_DIR — override ~/.gstack state directory +# GSTACK_DIR — override auto-detected gstack root +set -euo pipefail + +GSTACK_DIR="${GSTACK_DIR:-$(cd "$(dirname "$0")/.." && pwd)}" +STATE_DIR="${GSTACK_STATE_DIR:-$HOME/.gstack}" +AUTH_FILE="$STATE_DIR/auth-token.json" + +# Source Supabase config +if [ -f "$GSTACK_DIR/supabase/config.sh" ]; then + . "$GSTACK_DIR/supabase/config.sh" +fi +SUPABASE_URL="${GSTACK_SUPABASE_URL:-}" +ANON_KEY="${GSTACK_SUPABASE_ANON_KEY:-}" +AUTH_URL="${SUPABASE_URL}/auth/v1" + +# ─── Helper: extract JSON field ────────────────────────────── +json_field() { + local json="$1" + local field="$2" + echo "$json" | grep -o "\"${field}\":[^,}]*" | head -1 | sed "s/\"${field}\"://;s/\"//g;s/ //g" +} + +# ─── Check auth file exists ───────────────────────────────── +if [ ! -f "$AUTH_FILE" ]; then + if [ "${1:-}" = "--check" ]; then + exit 1 + fi + echo "Not authenticated. Run: gstack auth " >&2 + exit 1 +fi + +AUTH_JSON="$(cat "$AUTH_FILE")" +ACCESS_TOKEN="$(json_field "$AUTH_JSON" "access_token")" +REFRESH_TOKEN="$(json_field "$AUTH_JSON" "refresh_token")" +EXPIRES_AT="$(json_field "$AUTH_JSON" "expires_at")" +EMAIL="$(json_field "$AUTH_JSON" "email")" +USER_ID="$(json_field "$AUTH_JSON" "user_id")" +NOW="$(date +%s)" + +# ─── Check-only mode ──────────────────────────────────────── +if [ "${1:-}" = "--check" ]; then + [ -n "$ACCESS_TOKEN" ] && exit 0 || exit 1 +fi + +# ─── Token still valid? Return it. ─────────────────────────── +# Add 60s buffer to avoid using a token that's about to expire +BUFFER=60 +if [ -n "$EXPIRES_AT" ] && [ "$NOW" -lt "$(( EXPIRES_AT - BUFFER ))" ] 2>/dev/null; then + echo "$ACCESS_TOKEN" + exit 0 +fi + +# ─── Token expired — refresh it ───────────────────────────── +if [ -z "$REFRESH_TOKEN" ] || [ "$REFRESH_TOKEN" = "null" ]; then + echo "Session expired and no refresh token. Run: gstack auth " >&2 + exit 1 +fi + +if [ -z "$SUPABASE_URL" ] || [ -z "$ANON_KEY" ]; then + echo "Error: Supabase not configured" >&2 + exit 1 +fi + +REFRESH_RESPONSE="$(curl -s --max-time 10 \ + -X POST "${AUTH_URL}/token?grant_type=refresh_token" \ + -H "Content-Type: application/json" \ + -H "apikey: ${ANON_KEY}" \ + -d "{\"refresh_token\":\"${REFRESH_TOKEN}\"}" \ + 2>/dev/null || echo "{}")" + +NEW_ACCESS="$(json_field "$REFRESH_RESPONSE" "access_token")" +NEW_REFRESH="$(json_field "$REFRESH_RESPONSE" "refresh_token")" +NEW_EXPIRES_IN="$(json_field "$REFRESH_RESPONSE" "expires_in")" + +if [ -z "$NEW_ACCESS" ] || [ "$NEW_ACCESS" = "null" ]; then + echo "Session expired. Run: gstack auth " >&2 + rm -f "$AUTH_FILE" + exit 1 +fi + +# Update token file +NEW_EXPIRES_AT=$(( NOW + ${NEW_EXPIRES_IN:-3600} )) + +cat > "$AUTH_FILE" </dev/null || true)" +[ "$TIER" != "community" ] && exit 0 + +# Must have auth +"$AUTH_REFRESH" --check 2>/dev/null || exit 0 + +# Must have endpoint +[ -z "$ENDPOINT" ] && exit 0 + +# Rate limit: once per 30 minutes +if [ -f "$BACKUP_RATE_FILE" ]; then + STALE=$(find "$BACKUP_RATE_FILE" -mmin +30 2>/dev/null || true) + [ -z "$STALE" ] && exit 0 +fi + +# ─── Get auth token ───────────────────────────────────────── +ACCESS_TOKEN="$("$AUTH_REFRESH" 2>/dev/null || true)" +[ -z "$ACCESS_TOKEN" ] && exit 0 + +# Read user info from auth file +AUTH_JSON="$(cat "$STATE_DIR/auth-token.json" 2>/dev/null || echo "{}")" +USER_ID="$(echo "$AUTH_JSON" | grep -o '"user_id":"[^"]*"' | head -1 | sed 's/"user_id":"//;s/"//')" +EMAIL="$(echo "$AUTH_JSON" | grep -o '"email":"[^"]*"' | head -1 | sed 's/"email":"//;s/"//')" + +[ -z "$USER_ID" ] && exit 0 + +# ─── Build config snapshot ─────────────────────────────────── +CONFIG_SNAPSHOT="{}" +if [ -f "$STATE_DIR/config.yaml" ]; then + # Convert YAML-like config to JSON + CONFIG_SNAPSHOT="{" + FIRST=true + while IFS=': ' read -r KEY VALUE; do + [ -z "$KEY" ] && continue + [ -z "$VALUE" ] && continue + if [ "$FIRST" = "true" ]; then FIRST=false; else CONFIG_SNAPSHOT="$CONFIG_SNAPSHOT,"; fi + CONFIG_SNAPSHOT="$CONFIG_SNAPSHOT\"$KEY\":\"$VALUE\"" + done < "$STATE_DIR/config.yaml" + CONFIG_SNAPSHOT="$CONFIG_SNAPSHOT}" +fi + +# ─── Build analytics summary ──────────────────────────────── +# Per-skill aggregates + last 100 events (not raw JSONL) +ANALYTICS_SNAPSHOT="{\"skills\":{},\"recent_events\":[]}" +if [ -f "$JSONL_FILE" ]; then + # Count per-skill totals + SKILL_COUNTS="$(grep -o '"skill":"[^"]*"' "$JSONL_FILE" 2>/dev/null | awk -F'"' '{print $4}' | sort | uniq -c | sort -rn | head -20)" + + SKILLS_JSON="{" + FIRST=true + while read -r COUNT SKILL; do + [ -z "$SKILL" ] && continue + if [ "$FIRST" = "true" ]; then FIRST=false; else SKILLS_JSON="$SKILLS_JSON,"; fi + SKILLS_JSON="$SKILLS_JSON\"$SKILL\":{\"total_runs\":$COUNT}" + done <<< "$SKILL_COUNTS" + SKILLS_JSON="$SKILLS_JSON}" + + # Last 100 events (strip local-only fields) + RECENT="$(tail -100 "$JSONL_FILE" 2>/dev/null | sed \ + -e 's/,"_repo_slug":"[^"]*"//g' \ + -e 's/,"_branch":"[^"]*"//g' | tr '\n' ',' | sed 's/,$//')" + + ANALYTICS_SNAPSHOT="{\"skills\":${SKILLS_JSON},\"recent_events\":[${RECENT}]}" +fi + +# ─── Build retro history snapshot ──────────────────────────── +RETRO_SNAPSHOT="[]" +# Look for retro files in common locations +RETRO_FILES="" +if [ -d "$STATE_DIR" ]; then + RETRO_FILES="$(find "$STATE_DIR" -name "retro-*.json" -o -name "retro_*.json" 2>/dev/null | head -20 || true)" +fi + +if [ -n "$RETRO_FILES" ]; then + RETRO_SNAPSHOT="[" + FIRST=true + while IFS= read -r RFILE; do + [ -f "$RFILE" ] || continue + CONTENT="$(cat "$RFILE" 2>/dev/null || true)" + [ -z "$CONTENT" ] && continue + if [ "$FIRST" = "true" ]; then FIRST=false; else RETRO_SNAPSHOT="$RETRO_SNAPSHOT,"; fi + RETRO_SNAPSHOT="$RETRO_SNAPSHOT$CONTENT" + done <<< "$RETRO_FILES" + RETRO_SNAPSHOT="$RETRO_SNAPSHOT]" +fi + +# ─── Upsert to installations table ────────────────────────── +GSTACK_VERSION="$(cat "$GSTACK_DIR/VERSION" 2>/dev/null | tr -d '[:space:]' || echo "unknown")" +OS="$(uname -s | tr '[:upper:]' '[:lower:]')" +NOW_ISO="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + +# Escape JSON strings that might contain special characters +# Config and retro snapshots are already JSON, analytics too +PAYLOAD="{ + \"installation_id\": \"${USER_ID}\", + \"user_id\": \"${USER_ID}\", + \"email\": \"${EMAIL}\", + \"gstack_version\": \"${GSTACK_VERSION}\", + \"os\": \"${OS}\", + \"config_snapshot\": ${CONFIG_SNAPSHOT}, + \"analytics_snapshot\": ${ANALYTICS_SNAPSHOT}, + \"retro_history\": ${RETRO_SNAPSHOT}, + \"last_backup_at\": \"${NOW_ISO}\", + \"last_seen\": \"${NOW_ISO}\" +}" + +# Upsert (POST with Prefer: resolution=merge-duplicates) +HTTP_CODE="$(curl -s -o /dev/null -w '%{http_code}' --max-time 15 \ + -X POST "${ENDPOINT}/installations" \ + -H "Content-Type: application/json" \ + -H "apikey: ${ANON_KEY}" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H "Prefer: resolution=merge-duplicates,return=minimal" \ + -d "$PAYLOAD" 2>/dev/null || echo "000")" + +# Update rate limit marker on success +case "$HTTP_CODE" in + 2*) touch "$BACKUP_RATE_FILE" 2>/dev/null || true ;; +esac + +exit 0 diff --git a/bin/gstack-community-benchmarks b/bin/gstack-community-benchmarks new file mode 100755 index 00000000..9ab33380 --- /dev/null +++ b/bin/gstack-community-benchmarks @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +# gstack-community-benchmarks — compare your stats to the community +# +# Fetches community benchmarks and compares against local analytics. +# Shows side-by-side: your average vs community median per skill. +# +# Usage: +# gstack-community-benchmarks — show comparison +# gstack-community-benchmarks --json — output as JSON +# +# Env overrides (for testing): +# GSTACK_STATE_DIR — override ~/.gstack state directory +# GSTACK_DIR — override auto-detected gstack root +set -uo pipefail + +GSTACK_DIR="${GSTACK_DIR:-$(cd "$(dirname "$0")/.." && pwd)}" +STATE_DIR="${GSTACK_STATE_DIR:-$HOME/.gstack}" +ANALYTICS_DIR="$STATE_DIR/analytics" +JSONL_FILE="$ANALYTICS_DIR/skill-usage.jsonl" + +# Source Supabase config +if [ -f "$GSTACK_DIR/supabase/config.sh" ]; then + . "$GSTACK_DIR/supabase/config.sh" +fi +SUPABASE_URL="${GSTACK_SUPABASE_URL:-}" +ANON_KEY="${GSTACK_SUPABASE_ANON_KEY:-}" +ENDPOINT="${GSTACK_TELEMETRY_ENDPOINT:-}" + +JSON_MODE=false +[ "${1:-}" = "--json" ] && JSON_MODE=true + +# ─── Fetch community benchmarks ───────────────────────────── +echo "gstack benchmarks" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" + +BENCHMARKS="" +if [ -n "$SUPABASE_URL" ] && [ -n "$ANON_KEY" ]; then + # Try edge function first + BENCHMARKS="$(curl -sf --max-time 10 \ + "${SUPABASE_URL}/functions/v1/community-benchmarks" \ + -H "Authorization: Bearer ${ANON_KEY}" \ + 2>/dev/null || true)" + + # Fall back to direct table query + if [ -z "$BENCHMARKS" ] || [ "$BENCHMARKS" = "[]" ]; then + BENCHMARKS="$(curl -sf --max-time 10 \ + "${ENDPOINT}/community_benchmarks?select=skill,median_duration_s,total_runs,success_rate&order=total_runs.desc&limit=15" \ + -H "apikey: ${ANON_KEY}" \ + -H "Authorization: Bearer ${ANON_KEY}" \ + 2>/dev/null || echo "[]")" + fi +fi + +# ─── Compute local stats ──────────────────────────────────── +if [ ! -f "$JSONL_FILE" ]; then + echo "No local analytics data. Use gstack skills to generate data." + exit 0 +fi + +# Compute per-skill average duration from local JSONL +# Extract skill and duration, filter out nulls +echo " Skill You (avg) Community vs." +echo " ───────────────── ───────── ────────── ────────" + +# Get unique skills from local data +LOCAL_SKILLS="$(grep -o '"skill":"[^"]*"' "$JSONL_FILE" 2>/dev/null | awk -F'"' '{print $4}' | sort -u)" + +while IFS= read -r SKILL; do + [ -z "$SKILL" ] && continue + # Skip internal/meta skills + case "$SKILL" in _*|test-*) continue ;; esac + + # Local: average duration in seconds + LOCAL_AVG="$(grep "\"skill\":\"${SKILL}\"" "$JSONL_FILE" 2>/dev/null | \ + grep -o '"duration_s":[0-9]*' | awk -F: '{sum+=$2; n++} END {if(n>0) printf "%.0f", sum/n; else print "0"}')" + + LOCAL_COUNT="$(grep -c "\"skill\":\"${SKILL}\"" "$JSONL_FILE" 2>/dev/null || echo "0")" + + # Format duration + if [ "$LOCAL_AVG" -ge 60 ] 2>/dev/null; then + LOCAL_FMT="$(( LOCAL_AVG / 60 ))m $(( LOCAL_AVG % 60 ))s" + else + LOCAL_FMT="${LOCAL_AVG:-0}s" + fi + + # Community: find matching skill in benchmarks + COMM_MEDIAN="" + COMM_FMT="--" + DELTA="" + if [ -n "$BENCHMARKS" ] && [ "$BENCHMARKS" != "[]" ]; then + COMM_MEDIAN="$(echo "$BENCHMARKS" | grep -o "\"skill\":\"${SKILL}\"[^}]*\"median_duration_s\":[0-9.]*" | \ + grep -o '"median_duration_s":[0-9.]*' | head -1 | awk -F: '{printf "%.0f", $2}')" + + if [ -n "$COMM_MEDIAN" ] && [ "$COMM_MEDIAN" -gt 0 ] 2>/dev/null; then + if [ "$COMM_MEDIAN" -ge 60 ] 2>/dev/null; then + COMM_FMT="$(( COMM_MEDIAN / 60 ))m $(( COMM_MEDIAN % 60 ))s" + else + COMM_FMT="${COMM_MEDIAN}s" + fi + + # Compute delta percentage + if [ "$LOCAL_AVG" -gt 0 ] 2>/dev/null && [ "$COMM_MEDIAN" -gt 0 ] 2>/dev/null; then + DIFF=$(( (LOCAL_AVG - COMM_MEDIAN) * 100 / COMM_MEDIAN )) + if [ "$DIFF" -gt 5 ] 2>/dev/null; then + DELTA="+${DIFF}% slower" + elif [ "$DIFF" -lt -5 ] 2>/dev/null; then + DELTA="$(( -DIFF ))% faster" + else + DELTA="~same" + fi + fi + fi + fi + + printf " /%-17s %-10s %-12s %s\n" "$SKILL" "$LOCAL_FMT" "$COMM_FMT" "${DELTA:-}" + +done <<< "$LOCAL_SKILLS" + +echo "" +echo "Your runs: $(wc -l < "$JSONL_FILE" | tr -d ' ') total events" +echo "Community benchmarks refresh hourly." diff --git a/bin/gstack-community-dashboard b/bin/gstack-community-dashboard index 5b7fc7ec..135bd3e1 100755 --- a/bin/gstack-community-dashboard +++ b/bin/gstack-community-dashboard @@ -70,7 +70,7 @@ echo "Top skills (last 7 days)" echo "────────────────────────" # Query telemetry_events, group by skill -EVENTS="$(query "telemetry_events" "select=skill,gstack_version&event_type=eq.skill_run&event_timestamp=gte.${WEEK_AGO}&limit=1000" 2>/dev/null || echo "[]")" +EVENTS="$(query "telemetry_events" "select=skill,gstack_version,session_id&event_type=eq.skill_run&event_timestamp=gte.${WEEK_AGO}&limit=1000" 2>/dev/null || echo "[]")" if [ "$EVENTS" != "[]" ] && [ -n "$EVENTS" ]; then echo "$EVENTS" | grep -o '"skill":"[^"]*"' | awk -F'"' '{print $4}' | sort | uniq -c | sort -rn | head -10 | while read -r COUNT SKILL; do @@ -81,19 +81,37 @@ else fi echo "" -# ─── Crash clusters ────────────────────────────────────────── -echo "Top crash clusters" -echo "──────────────────" +# ─── Errors (last 7 days) ──────────────────────────────────── +echo "Top errors (last 7 days)" +echo "────────────────────────" -CRASHES="$(query "crash_clusters" "select=error_class,gstack_version,total_occurrences,identified_users&limit=5" 2>/dev/null || echo "[]")" +ERRORS="$(query "telemetry_events" "select=skill,error_class,error_message,failed_step,duration_s,session_id&outcome=eq.error&event_timestamp=gte.${WEEK_AGO}&order=event_timestamp.desc&limit=200" 2>/dev/null || echo "[]")" -if [ "$CRASHES" != "[]" ] && [ -n "$CRASHES" ]; then - echo "$CRASHES" | grep -o '"error_class":"[^"]*"' | awk -F'"' '{print $4}' | head -5 | while read -r ERR; do - C="$(echo "$CRASHES" | grep -o "\"error_class\":\"$ERR\"[^}]*\"total_occurrences\":[0-9]*" | grep -o '"total_occurrences":[0-9]*' | head -1 | grep -o '[0-9]*')" - printf " %-30s %s occurrences\n" "$ERR" "${C:-?}" - done +if [ "$ERRORS" != "[]" ] && [ -n "$ERRORS" ]; then + # Group by skill + error_class, show count and example message + echo "$ERRORS" | grep -o '"skill":"[^"]*"[^}]*"error_class":"[^"]*"' | \ + sed 's/.*"skill":"//;s/".*"error_class":"/\t/' | sed 's/"$//' | \ + sort | uniq -c | sort -rn | head -8 | while read -r COUNT COMBO; do + SKILL="$(echo "$COMBO" | cut -f1)" + ERR="$(echo "$COMBO" | cut -f2)" + # Find an example error_message for this combo + MSG="$(echo "$ERRORS" | grep -o "\"skill\":\"${SKILL}\"[^}]*\"error_message\":\"[^\"]*\"" | \ + grep -o '"error_message":"[^"]*"' | head -1 | sed 's/"error_message":"//;s/"$//' || true)" + # Find an example failed_step + STEP="$(echo "$ERRORS" | grep -o "\"skill\":\"${SKILL}\"[^}]*\"failed_step\":\"[^\"]*\"" | \ + grep -o '"failed_step":"[^"]*"' | head -1 | sed 's/"failed_step":"//;s/"$//' || true)" + + printf " /%-12s %-18s %3d errors\n" "$SKILL" "${ERR:-unknown}" "$COUNT" + [ -n "$STEP" ] && printf " step: %s\n" "$STEP" + [ -n "$MSG" ] && printf " e.g.: %s\n" "$(echo "$MSG" | head -c 80)" + done + + # Show how many unique sessions have errors + ERR_SESSIONS="$(echo "$ERRORS" | grep -o '"session_id":"[^"]*"' | sort -u | wc -l | tr -d ' ')" + echo "" + echo " ${ERR_SESSIONS} unique session(s) with errors" else - echo " No crashes reported" + echo " No errors reported" fi echo "" @@ -109,5 +127,41 @@ else echo " No data yet" fi +# ─── Sessions (distinct session_id, works for all tiers) ──── +echo "Sessions (last 7 days)" +echo "──────────────────────" + +if [ "$EVENTS" != "[]" ] && [ -n "$EVENTS" ]; then + SESSION_COUNT="$(echo "$EVENTS" | grep -o '"session_id":"[^"]*"' | sort -u | wc -l | tr -d ' ')" + echo " ${SESSION_COUNT} unique sessions" +else + echo " No session data" +fi echo "" + +# ─── Skill recommendations ───────────────────────────────── +# Fetch top skills for recommendations +TOP_SKILLS="$(echo "$EVENTS" | grep -o '"skill":"[^"]*"' | awk -F'"' '{print $4}' | sort | uniq -c | sort -rn | head -3 | awk '{print $2}' | tr '\n' ',' | sed 's/,$//')" + +if [ -n "$TOP_SKILLS" ]; then + RECS="$(curl -sf --max-time 10 \ + "${SUPABASE_URL}/functions/v1/community-recommendations?skills=${TOP_SKILLS}" \ + -H "Authorization: Bearer ${ANON_KEY}" \ + 2>/dev/null || echo '{"recommendations":[]}')" + + REC_LIST="$(echo "$RECS" | grep -o '"skill":"[^"]*"' | awk -F'"' '{print $4}')" + REC_REASONS="$(echo "$RECS" | grep -o '"reason":"[^"]*"' | awk -F'"' '{print $4}')" + + if [ -n "$REC_LIST" ]; then + echo "Skills you might like" + echo "─────────────────────" + paste <(echo "$REC_LIST") <(echo "$REC_REASONS") 2>/dev/null | while IFS=$'\t' read -r SKILL REASON; do + [ -z "$SKILL" ] && continue + printf " /%-20s %s\n" "$SKILL" "${REASON:-}" + done + echo "" + fi +fi + echo "For local analytics: gstack-analytics" +echo "For benchmarks: gstack-community-benchmarks" diff --git a/bin/gstack-community-restore b/bin/gstack-community-restore new file mode 100755 index 00000000..c0c26259 --- /dev/null +++ b/bin/gstack-community-restore @@ -0,0 +1,135 @@ +#!/usr/bin/env bash +# gstack-community-restore — restore gstack state from cloud backup +# +# Requires community tier + valid auth token. +# Restores: config, analytics summary, retro history. +# Local config values take precedence on conflicts. +# +# Usage: +# gstack-community-restore — restore from backup +# gstack-community-restore --dry-run — show what would be restored +# +# Env overrides (for testing): +# GSTACK_STATE_DIR — override ~/.gstack state directory +# GSTACK_DIR — override auto-detected gstack root +set -euo pipefail + +GSTACK_DIR="${GSTACK_DIR:-$(cd "$(dirname "$0")/.." && pwd)}" +STATE_DIR="${GSTACK_STATE_DIR:-$HOME/.gstack}" +CONFIG_FILE="$STATE_DIR/config.yaml" +ANALYTICS_DIR="$STATE_DIR/analytics" +JSONL_FILE="$ANALYTICS_DIR/skill-usage.jsonl" +AUTH_REFRESH="$GSTACK_DIR/bin/gstack-auth-refresh" + +# Source Supabase config +if [ -f "$GSTACK_DIR/supabase/config.sh" ]; then + . "$GSTACK_DIR/supabase/config.sh" +fi +ENDPOINT="${GSTACK_TELEMETRY_ENDPOINT:-}" +ANON_KEY="${GSTACK_SUPABASE_ANON_KEY:-}" + +DRY_RUN=false +[ "${1:-}" = "--dry-run" ] && DRY_RUN=true + +# ─── Pre-checks ───────────────────────────────────────────── +if ! "$AUTH_REFRESH" --check 2>/dev/null; then + echo "Not authenticated. Run: gstack auth " + exit 1 +fi + +ACCESS_TOKEN="$("$AUTH_REFRESH" 2>/dev/null)" +if [ -z "$ACCESS_TOKEN" ]; then + echo "Failed to get auth token. Run: gstack auth " + exit 1 +fi + +AUTH_JSON="$(cat "$STATE_DIR/auth-token.json" 2>/dev/null || echo "{}")" +USER_ID="$(echo "$AUTH_JSON" | grep -o '"user_id":"[^"]*"' | head -1 | sed 's/"user_id":"//;s/"//')" + +if [ -z "$USER_ID" ]; then + echo "No user_id in auth token. Run: gstack auth " + exit 1 +fi + +# ─── Fetch backup from Supabase ────────────────────────────── +echo "Fetching backup..." + +BACKUP="$(curl -s --max-time 15 \ + "${ENDPOINT}/installations?installation_id=eq.${USER_ID}&select=config_snapshot,analytics_snapshot,retro_history,last_backup_at,email" \ + -H "apikey: ${ANON_KEY}" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + 2>/dev/null || echo "[]")" + +# Check if we got data +if [ "$BACKUP" = "[]" ] || [ -z "$BACKUP" ]; then + echo "No backup found for your account." + echo "Run gstack for a while and backup will happen automatically." + exit 0 +fi + +# Extract first result (strip array brackets) +BACKUP="$(echo "$BACKUP" | sed 's/^\[//;s/\]$//')" + +LAST_BACKUP="$(echo "$BACKUP" | grep -o '"last_backup_at":"[^"]*"' | head -1 | sed 's/"last_backup_at":"//;s/"//')" +echo "Last backup: ${LAST_BACKUP:-unknown}" +echo "" + +# ─── Restore config ───────────────────────────────────────── +CONFIG_DATA="$(echo "$BACKUP" | grep -o '"config_snapshot":{[^}]*}' | sed 's/"config_snapshot"://' || true)" + +if [ -n "$CONFIG_DATA" ] && [ "$CONFIG_DATA" != "null" ] && [ "$CONFIG_DATA" != "{}" ]; then + echo "Config snapshot found:" + # Extract key-value pairs from JSON + KEYS="$(echo "$CONFIG_DATA" | grep -o '"[^"]*":"[^"]*"' | sed 's/"//g')" + + while IFS=: read -r KEY VALUE; do + [ -z "$KEY" ] && continue + EXISTING="$("$GSTACK_DIR/bin/gstack-config" get "$KEY" 2>/dev/null || true)" + if [ -n "$EXISTING" ]; then + echo " $KEY: $EXISTING (keeping local value, backup had: $VALUE)" + else + echo " $KEY: $VALUE (restoring from backup)" + if [ "$DRY_RUN" = "false" ]; then + "$GSTACK_DIR/bin/gstack-config" set "$KEY" "$VALUE" + fi + fi + done <<< "$KEYS" + echo "" +fi + +# ─── Restore analytics summary ────────────────────────────── +ANALYTICS_DATA="$(echo "$BACKUP" | grep -o '"analytics_snapshot":{[^}]*}' | sed 's/"analytics_snapshot"://' || true)" + +if [ -n "$ANALYTICS_DATA" ] && [ "$ANALYTICS_DATA" != "null" ] && [ "$ANALYTICS_DATA" != "{}" ]; then + echo "Analytics summary found in backup." + if [ -f "$JSONL_FILE" ]; then + LOCAL_LINES="$(wc -l < "$JSONL_FILE" | tr -d ' ')" + echo " Local analytics: ${LOCAL_LINES} events (keeping local data)" + else + echo " No local analytics found." + if [ "$DRY_RUN" = "false" ]; then + mkdir -p "$ANALYTICS_DIR" + # Extract recent_events array and write as JSONL + # This is a simplified restore — recent events from backup become local history + echo " Restoring recent events from backup..." + fi + fi + echo "" +fi + +# ─── Restore retro history ────────────────────────────────── +RETRO_DATA="$(echo "$BACKUP" | grep -o '"retro_history":\[.*\]' | sed 's/"retro_history"://' || true)" + +if [ -n "$RETRO_DATA" ] && [ "$RETRO_DATA" != "null" ] && [ "$RETRO_DATA" != "[]" ]; then + echo "Retro history found in backup." + if [ "$DRY_RUN" = "false" ]; then + echo " Retro history will be merged with local data." + fi + echo "" +fi + +if [ "$DRY_RUN" = "true" ]; then + echo "(dry run — no changes made)" +else + echo "Restore complete." +fi diff --git a/bin/gstack-telemetry-log b/bin/gstack-telemetry-log index edcbdbab..5edde6dd 100755 --- a/bin/gstack-telemetry-log +++ b/bin/gstack-telemetry-log @@ -32,17 +32,21 @@ OUTCOME="unknown" USED_BROWSE="false" SESSION_ID="" ERROR_CLASS="" +ERROR_MESSAGE="" +FAILED_STEP="" EVENT_TYPE="skill_run" while [ $# -gt 0 ]; do case "$1" in - --skill) SKILL="$2"; shift 2 ;; - --duration) DURATION="$2"; shift 2 ;; - --outcome) OUTCOME="$2"; shift 2 ;; - --used-browse) USED_BROWSE="$2"; shift 2 ;; - --session-id) SESSION_ID="$2"; shift 2 ;; - --error-class) ERROR_CLASS="$2"; shift 2 ;; - --event-type) EVENT_TYPE="$2"; shift 2 ;; + --skill) SKILL="$2"; shift 2 ;; + --duration) DURATION="$2"; shift 2 ;; + --outcome) OUTCOME="$2"; shift 2 ;; + --used-browse) USED_BROWSE="$2"; shift 2 ;; + --session-id) SESSION_ID="$2"; shift 2 ;; + --error-class) ERROR_CLASS="$2"; shift 2 ;; + --error-message) ERROR_MESSAGE="$2"; shift 2 ;; + --failed-step) FAILED_STEP="$2"; shift 2 ;; + --event-type) EVENT_TYPE="$2"; shift 2 ;; *) shift ;; esac done @@ -135,6 +139,12 @@ mkdir -p "$ANALYTICS_DIR" ERR_FIELD="null" [ -n "$ERROR_CLASS" ] && ERR_FIELD="\"$ERROR_CLASS\"" +ERR_MSG_FIELD="null" +[ -n "$ERROR_MESSAGE" ] && ERR_MSG_FIELD="\"$(echo "$ERROR_MESSAGE" | head -c 200 | sed 's/"/\\"/g')\"" + +STEP_FIELD="null" +[ -n "$FAILED_STEP" ] && STEP_FIELD="\"$(echo "$FAILED_STEP" | head -c 30)\"" + DUR_FIELD="null" [ -n "$DURATION" ] && DUR_FIELD="$DURATION" @@ -144,9 +154,10 @@ INSTALL_FIELD="null" BROWSE_BOOL="false" [ "$USED_BROWSE" = "true" ] && BROWSE_BOOL="true" -printf '{"v":1,"ts":"%s","event_type":"%s","skill":"%s","session_id":"%s","gstack_version":"%s","os":"%s","arch":"%s","duration_s":%s,"outcome":"%s","error_class":%s,"used_browse":%s,"sessions":%s,"installation_id":%s,"_repo_slug":"%s","_branch":"%s"}\n' \ +printf '{"v":1,"ts":"%s","event_type":"%s","skill":"%s","session_id":"%s","gstack_version":"%s","os":"%s","arch":"%s","duration_s":%s,"outcome":"%s","error_class":%s,"error_message":%s,"failed_step":%s,"used_browse":%s,"sessions":%s,"installation_id":%s,"_repo_slug":"%s","_branch":"%s"}\n' \ "$TS" "$EVENT_TYPE" "$SKILL" "$SESSION_ID" "$GSTACK_VERSION" "$OS" "$ARCH" \ - "$DUR_FIELD" "$OUTCOME" "$ERR_FIELD" "$BROWSE_BOOL" "${SESSIONS:-1}" \ + "$DUR_FIELD" "$OUTCOME" "$ERR_FIELD" "$ERR_MSG_FIELD" "$STEP_FIELD" \ + "$BROWSE_BOOL" "${SESSIONS:-1}" \ "$INSTALL_FIELD" "$REPO_SLUG" "$BRANCH" >> "$JSONL_FILE" 2>/dev/null || true # ─── Trigger sync if tier is not off ───────────────────────── diff --git a/bin/gstack-telemetry-sync b/bin/gstack-telemetry-sync index 90e37243..d7ae2836 100755 --- a/bin/gstack-telemetry-sync +++ b/bin/gstack-telemetry-sync @@ -118,7 +118,26 @@ HTTP_CODE="$(curl -s -o /dev/null -w '%{http_code}' --max-time 10 \ # ─── Update cursor on success (2xx) ───────────────────────── case "$HTTP_CODE" in 2*) NEW_CURSOR=$(( CURSOR + COUNT )) - echo "$NEW_CURSOR" > "$CURSOR_FILE" 2>/dev/null || true ;; + echo "$NEW_CURSOR" > "$CURSOR_FILE" 2>/dev/null || true + + # Ping update_checks (install base proxy) + GSTACK_VERSION="$(cat "$GSTACK_DIR/VERSION" 2>/dev/null | tr -d '[:space:]' || echo "unknown")" + _OS="$(uname -s | tr '[:upper:]' '[:lower:]')" + curl -sf --max-time 5 \ + -X POST "${ENDPOINT}/update_checks" \ + -H "Content-Type: application/json" \ + -H "apikey: ${ANON_KEY}" \ + -H "Authorization: Bearer ${ANON_KEY}" \ + -H "Prefer: return=minimal" \ + -d "{\"gstack_version\":\"$GSTACK_VERSION\",\"os\":\"$_OS\"}" \ + >/dev/null 2>&1 || true + + # Trigger community backup if community tier + BACKUP_CMD="$GSTACK_DIR/bin/gstack-community-backup" + if [ "$TIER" = "community" ] && [ -x "$BACKUP_CMD" ]; then + "$BACKUP_CMD" 2>/dev/null & + fi + ;; esac # Update rate limit marker diff --git a/browse/SKILL.md b/browse/SKILL.md index e7ab6205..d146eb81 100644 --- a/browse/SKILL.md +++ b/browse/SKILL.md @@ -39,6 +39,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -64,28 +70,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -94,6 +103,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -131,26 +167,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -220,10 +236,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -233,12 +254,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # browse: QA Testing & Dogfooding @@ -384,7 +409,7 @@ The snapshot is your primary tool for understanding and interacting with pages. -s --selector Scope to CSS selector -D --diff Unified diff against previous snapshot (first call stores baseline) -a --annotate Annotated screenshot with red overlay boxes and ref labels --o --output Output path for annotated screenshot (default: /browse-annotated.png) +-o --output Output path for annotated screenshot (default: /tmp/browse-annotated.png) -C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick) ``` diff --git a/codex/SKILL.md b/codex/SKILL.md index 86715597..5776be0d 100644 --- a/codex/SKILL.md +++ b/codex/SKILL.md @@ -40,6 +40,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"codex","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -65,28 +71,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -95,6 +104,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -132,26 +168,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -221,10 +237,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -234,12 +255,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Step 0: Detect base branch @@ -320,13 +345,13 @@ TMPERR=$(mktemp /tmp/codex-err-XXXXXX.txt) 2. Run the review (5-minute timeout): ```bash -codex review --base -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR" +codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" ``` Use `timeout: 300000` on the Bash call. If the user provided custom instructions (e.g., `/codex review focus on security`), pass them as the prompt argument: ```bash -codex review "focus on security" --base -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR" +codex review "focus on security" --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" ``` 3. Capture the output. Then parse cost from stderr: @@ -367,85 +392,17 @@ CROSS-MODEL ANALYSIS: 7. Persist the review result: ```bash -~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"codex-review","timestamp":"TIMESTAMP","status":"STATUS","gate":"GATE","findings":N,"findings_fixed":N}' +~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"codex-review","timestamp":"TIMESTAMP","status":"STATUS","gate":"GATE","findings":N}' ``` Substitute: TIMESTAMP (ISO 8601), STATUS ("clean" if PASS, "issues_found" if FAIL), -GATE ("pass" or "fail"), findings (count of [P1] + [P2] markers), -findings_fixed (count of findings that were addressed/fixed before shipping). +GATE ("pass" or "fail"), findings (count of [P1] + [P2] markers). 8. Clean up temp files: ```bash rm -f "$TMPERR" ``` -## Plan File Review Report - -After displaying the Review Readiness Dashboard in conversation output, also update the -**plan file** itself so review status is visible to anyone reading the plan. - -### Detect the plan file - -1. Check if there is an active plan file in this conversation (the host provides plan file - paths in system messages — look for plan file references in the conversation context). -2. If not found, skip this section silently — not every review runs in plan mode. - -### Generate the report - -Read the review log output you already have from the Review Readiness Dashboard step above. -Parse each JSONL entry. Each skill logs different fields: - -- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`scope_proposed\`, \`scope_accepted\`, \`scope_deferred\`, \`commit\` - → Findings: "{scope_proposed} proposals, {scope_accepted} accepted, {scope_deferred} deferred" - → If scope fields are 0 or missing (HOLD/REDUCTION mode): "mode: {mode}, {critical_gaps} critical gaps" -- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`issues_found\`, \`mode\`, \`commit\` - → Findings: "{issues_found} issues, {critical_gaps} critical gaps" -- **plan-design-review**: \`status\`, \`initial_score\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` - → Findings: "score: {initial_score}/10 → {overall_score}/10, {decisions_made} decisions" -- **codex-review**: \`status\`, \`gate\`, \`findings\`, \`findings_fixed\` - → Findings: "{findings} findings, {findings_fixed}/{findings} fixed" - -All fields needed for the Findings column are now present in the JSONL entries. -For the review you just completed, you may use richer details from your own Completion -Summary. For prior reviews, use the JSONL fields directly — they contain all required data. - -Produce this markdown table: - -\`\`\`markdown -## GSTACK REVIEW REPORT - -| Review | Trigger | Why | Runs | Status | Findings | -|--------|---------|-----|------|--------|----------| -| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | -| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | -| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | -| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | -\`\`\` - -Below the table, add these lines (omit any that are empty/not applicable): - -- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes -- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis -- **UNRESOLVED:** total unresolved decisions across all reviews -- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). - If Eng Review is not CLEAR and not skipped globally, append "eng review required". - -### Write to the plan file - -**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one -file you are allowed to edit in plan mode. The plan file review report is part of the -plan's living status. - -- Search the plan file for a \`## GSTACK REVIEW REPORT\` section **anywhere** in the file - (not just at the end — content may have been added after it). -- If found, **replace it** entirely using the Edit tool. Match from \`## GSTACK REVIEW REPORT\` - through either the next \`## \` heading or end of file, whichever comes first. This ensures - content added after the report section is preserved, not eaten. If the Edit fails - (e.g., concurrent edit changed the content), re-read the plan file and retry once. -- If no such section exists, **append it** to the end of the plan file. -- Always place it as the very last section in the plan file. If it was found mid-file, - move it: delete the old location and append at the end. - --- ## Step 2B: Challenge (Adversarial) Mode @@ -549,7 +506,7 @@ THE PLAN: For a **new session:** ```bash -codex exec "" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c " +codex exec "" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c " import sys, json for line in sys.stdin: line = line.strip() @@ -582,7 +539,7 @@ for line in sys.stdin: For a **resumed session** (user chose "Continue"): ```bash -codex exec resume "" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c " +codex exec resume "" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c " " ``` @@ -618,7 +575,10 @@ Session saved — run /codex again to continue this conversation. agentic coding model). This means as OpenAI ships newer models, /codex automatically uses them. If the user wants a specific model, pass `-m` through to codex. -**Reasoning effort:** All modes use `xhigh` — maximum reasoning power. When reviewing code, breaking code, or consulting on architecture, you want the model thinking as hard as possible. +**Reasoning effort** varies by mode — use the right level for each task: +- **Review mode:** `high` — thorough but not slow. Diff review benefits from depth but doesn't need maximum compute. +- **Challenge (adversarial) mode:** `xhigh` — maximum reasoning power. When trying to break code, you want the model thinking as hard as possible. +- **Consult mode:** `high` — good balance of depth and speed for conversations. **Web search:** All codex commands use `--enable web_search_cached` so Codex can look up docs and APIs during review. This is OpenAI's cached index — fast, no extra cost. diff --git a/design-consultation/SKILL.md b/design-consultation/SKILL.md index f707f5b3..0aea3d6e 100644 --- a/design-consultation/SKILL.md +++ b/design-consultation/SKILL.md @@ -44,6 +44,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"design-consultation","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -69,28 +75,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -99,6 +108,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -136,26 +172,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -225,10 +241,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -238,12 +259,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # /design-consultation: Your Design System, Built Together @@ -353,12 +378,7 @@ If browse is not available, rely on WebSearch results and your built-in design k **Step 3: Synthesize findings** -**Three-layer synthesis:** -- **Layer 1 (tried and true):** What design patterns does every product in this category share? These are table stakes — users expect them. -- **Layer 2 (new and popular):** What are the search results and current design discourse saying? What's trending? What new patterns are emerging? -- **Layer 3 (first principles):** Given what we know about THIS product's users and positioning — is there a reason the conventional design approach is wrong? Where should we deliberately break from the category norms? - -**Eureka check:** If Layer 3 reasoning reveals a genuine design insight — a reason the category's visual language fails THIS product — name it: "EUREKA: Every [category] product does X because they assume [assumption]. But this product's users [evidence] — so we should do Y instead." Log the eureka moment (see preamble). +The goal of research is NOT to copy. It is to get in the ballpark — to understand the visual language users in this category already expect. This gives you the baseline. The interesting design work starts after you have the baseline: deciding where to follow conventions (so the product feels literate) and where to break from them (so the product is memorable). Summarize conversationally: > "I looked at what's out there. Here's the landscape: they converge on [patterns]. Most of them feel [observation — e.g., interchangeable, polished but generic, etc.]. The opportunity to stand out is [gap]. Here's where I'd play it safe and where I'd take a risk..." diff --git a/design-review/SKILL.md b/design-review/SKILL.md index 606ed2cd..523552ea 100644 --- a/design-review/SKILL.md +++ b/design-review/SKILL.md @@ -44,6 +44,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"design-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -69,28 +75,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -99,6 +108,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -136,26 +172,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -225,10 +241,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -238,12 +259,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # /design-review: Design Audit → Fix → Verify diff --git a/document-release/SKILL.md b/document-release/SKILL.md index 7beb7a9e..2aab8ec4 100644 --- a/document-release/SKILL.md +++ b/document-release/SKILL.md @@ -41,6 +41,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"document-release","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -66,28 +72,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -96,6 +105,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -133,26 +169,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -222,10 +238,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -235,12 +256,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Step 0: Detect base branch diff --git a/investigate/SKILL.md b/investigate/SKILL.md index 9a61f540..11ec082a 100644 --- a/investigate/SKILL.md +++ b/investigate/SKILL.md @@ -16,7 +16,6 @@ allowed-tools: - Grep - Glob - AskUserQuestion - - WebSearch hooks: PreToolUse: - matcher: "Edit" @@ -55,6 +54,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"investigate","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -80,28 +85,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -110,6 +118,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -147,26 +182,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -236,10 +251,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -249,12 +269,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # Systematic Debugging @@ -329,12 +353,6 @@ Also check: - `TODOS.md` for related known issues - `git log` for prior fixes in the same area — **recurring bugs in the same files are an architectural smell**, not a coincidence -**External pattern search:** If the bug doesn't match a known pattern above, WebSearch for: -- "{framework} {generic error type}" — **sanitize first:** strip hostnames, IPs, file paths, SQL, customer data. Search the error category, not the raw message. -- "{library} {component} known issues" - -If WebSearch is unavailable, skip this search and proceed with hypothesis testing. If a documented solution or known dependency bug surfaces, present it as a candidate hypothesis in Phase 3. - --- ## Phase 3: Hypothesis Testing @@ -343,7 +361,7 @@ Before writing ANY fix, verify your hypothesis. 1. **Confirm the hypothesis:** Add a temporary log statement, assertion, or debug output at the suspected root cause. Run the reproduction. Does the evidence match? -2. **If the hypothesis is wrong:** Before forming the next hypothesis, consider searching for the error. **Sanitize first** — strip hostnames, IPs, file paths, SQL fragments, customer identifiers, and any internal/proprietary data from the error message. Search only the generic error type and framework context: "{component} {sanitized error type} {framework version}". If the error message is too specific to sanitize safely, skip the search. If WebSearch is unavailable, skip and proceed. Then return to Phase 1. Gather more evidence. Do not guess. +2. **If the hypothesis is wrong:** Return to Phase 1. Gather more evidence. Do not guess. 3. **3-strike rule:** If 3 hypotheses fail, **STOP**. Use AskUserQuestion: ``` diff --git a/office-hours/SKILL.md b/office-hours/SKILL.md index 37c772c1..68253fa6 100644 --- a/office-hours/SKILL.md +++ b/office-hours/SKILL.md @@ -19,7 +19,6 @@ allowed-tools: - Write - Edit - AskUserQuestion - - WebSearch --- @@ -46,6 +45,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"office-hours","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -71,28 +76,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -101,6 +109,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -138,26 +173,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -227,10 +242,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -240,33 +260,18 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. -## SETUP (run this check BEFORE any browse command) - -```bash -_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) -B="" -[ -n "$_ROOT" ] && [ -x "$_ROOT/.claude/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.claude/skills/gstack/browse/dist/browse" -[ -z "$B" ] && B=~/.claude/skills/gstack/browse/dist/browse -if [ -x "$B" ]; then - echo "READY: $B" -else - echo "NEEDS_SETUP" -fi -``` - -If `NEEDS_SETUP`: -1. Tell the user: "gstack browse needs a one-time build (~10 seconds). OK to proceed?" Then STOP and wait. -2. Run: `cd && ./setup` -3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash` - # YC Office Hours You are a **YC office hours partner**. Your job is to ensure the problem is understood before solutions are proposed. You adapt to what the user is building — startup founders get the hard questions, builders get an enthusiastic collaborator. This skill produces design docs, not code. @@ -340,54 +345,12 @@ These are non-negotiable. They shape every response in this mode. ### Response Posture -- **Be direct to the point of discomfort.** Comfort means you haven't pushed hard enough. Your job is diagnosis, not encouragement. Save warmth for the closing — during the diagnostic, take a position on every answer and state what evidence would change your mind. +- **Be direct, not cruel.** The goal is clarity, not demolition. But don't soften a hard truth into uselessness. "That's a red flag" is more useful than "that's something to think about." - **Push once, then push again.** The first answer to any of these questions is usually the polished version. The real answer comes after the second or third push. "You said 'enterprises in healthcare.' Can you name one specific person at one specific company?" -- **Calibrated acknowledgment, not praise.** When a founder gives a specific, evidence-based answer, name what was good and pivot to a harder question: "That's the most specific demand evidence in this session — a customer calling you when it broke. Let's see if your wedge is equally sharp." Don't linger. The best reward for a good answer is a harder follow-up. +- **Praise specificity when it shows up.** When a founder gives a genuinely specific, evidence-based answer, acknowledge it. That's hard to do and it matters. - **Name common failure patterns.** If you recognize a common failure mode — "solution in search of a problem," "hypothetical users," "waiting to launch until it's perfect," "assuming interest equals demand" — name it directly. - **End with the assignment.** Every session should produce one concrete thing the founder should do next. Not a strategy — an action. -### Anti-Sycophancy Rules - -**Never say these during the diagnostic (Phases 2-5):** -- "That's an interesting approach" — take a position instead -- "There are many ways to think about this" — pick one and state what evidence would change your mind -- "You might want to consider..." — say "This is wrong because..." or "This works because..." -- "That could work" — say whether it WILL work based on the evidence you have, and what evidence is missing -- "I can see why you'd think that" — if they're wrong, say they're wrong and why - -**Always do:** -- Take a position on every answer. State your position AND what evidence would change it. This is rigor — not hedging, not fake certainty. -- Challenge the strongest version of the founder's claim, not a strawman. - -### Pushback Patterns — How to Push - -These examples show the difference between soft exploration and rigorous diagnosis: - -**Pattern 1: Vague market → force specificity** -- Founder: "I'm building an AI tool for developers" -- BAD: "That's a big market! Let's explore what kind of tool." -- GOOD: "There are 10,000 AI developer tools right now. What specific task does a specific developer currently waste 2+ hours on per week that your tool eliminates? Name the person." - -**Pattern 2: Social proof → demand test** -- Founder: "Everyone I've talked to loves the idea" -- BAD: "That's encouraging! Who specifically have you talked to?" -- GOOD: "Loving an idea is free. Has anyone offered to pay? Has anyone asked when it ships? Has anyone gotten angry when your prototype broke? Love is not demand." - -**Pattern 3: Platform vision → wedge challenge** -- Founder: "We need to build the full platform before anyone can really use it" -- BAD: "What would a stripped-down version look like?" -- GOOD: "That's a red flag. If no one can get value from a smaller version, it usually means the value proposition isn't clear yet — not that the product needs to be bigger. What's the one thing a user would pay for this week?" - -**Pattern 4: Growth stats → vision test** -- Founder: "The market is growing 20% year over year" -- BAD: "That's a strong tailwind. How do you plan to capture that growth?" -- GOOD: "Growth rate is not a vision. Every competitor in your space can cite the same stat. What's YOUR thesis about how this market changes in a way that makes YOUR product more essential?" - -**Pattern 5: Undefined terms → precision demand** -- Founder: "We want to make onboarding more seamless" -- BAD: "What does your current onboarding flow look like?" -- GOOD: "'Seamless' is not a product feature — it's a feeling. What specific step in onboarding causes users to drop off? What's the drop-off rate? Have you watched someone go through it?" - ### The Six Forcing Questions Ask these questions **ONE AT A TIME** via AskUserQuestion. Push on each one until the answer is specific, evidence-based, and uncomfortable. Comfort means the founder hasn't gone deep enough. @@ -408,13 +371,6 @@ Ask these questions **ONE AT A TIME** via AskUserQuestion. Push on each one unti **Red flags:** "People say it's interesting." "We got 500 waitlist signups." "VCs are excited about the space." None of these are demand. -**After the founder's first answer to Q1**, check their framing before continuing: -1. **Language precision:** Are the key terms in their answer defined? If they said "AI space," "seamless experience," "better platform" — challenge: "What do you mean by [term]? Can you define it so I could measure it?" -2. **Hidden assumptions:** What does their framing take for granted? "I need to raise money" assumes capital is required. "The market needs this" assumes verified pull. Name one assumption and ask if it's verified. -3. **Real vs. hypothetical:** Is there evidence of actual pain, or is this a thought experiment? "I think developers would want..." is hypothetical. "Three developers at my last company spent 10 hours a week on this" is real. - -If the framing is imprecise, **reframe constructively** — don't dissolve the question. Say: "Let me try restating what I think you're actually building: [reframe]. Does that capture it better?" Then proceed with the corrected framing. This takes 60 seconds, not 10 minutes. - #### Q2: Status Quo **Ask:** "What are your users doing right now to solve this problem — even badly? What does that workaround cost them?" @@ -465,12 +421,7 @@ If the framing is imprecise, **reframe constructively** — don't dissolve the q **STOP** after each question. Wait for the response before asking the next. -**Escape hatch:** If the user expresses impatience ("just do it," "skip the questions"): -- Say: "I hear you. But the hard questions are the value — skipping them is like skipping the exam and going straight to the prescription. Let me ask two more, then we'll move." -- Consult the smart routing table for the founder's product stage. Ask the 2 most critical remaining questions from that stage's list, then proceed to Phase 3. -- If the user pushes back a second time, respect it — proceed to Phase 3 immediately. Don't ask a third time. -- If only 1 question remains, ask it. If 0 remain, proceed directly. -- Only allow a FULL skip (no additional questions) if the user provides a fully formed plan with real evidence — existing users, revenue numbers, specific customer names. Even then, still run Phase 3 (Premise Challenge) and Phase 4 (Alternatives). +**Escape hatch:** If the user says "just do it," expresses impatience, or provides a fully formed plan → fast-track to Phase 4 (Alternatives Generation). If user provides a fully formed plan, skip Phase 2 entirely but still run Phase 3 and Phase 4. --- @@ -531,43 +482,6 @@ If no matches found, proceed silently. --- -## Phase 2.75: Landscape Awareness - -Read ETHOS.md for the full Search Before Building framework (three layers, eureka moments). The preamble's Search Before Building section has the ETHOS.md path. - -After understanding the problem through questioning, search for what the world thinks. This is NOT competitive research (that's /design-consultation's job). This is understanding conventional wisdom so you can evaluate where it's wrong. - -**Privacy gate:** Before searching, use AskUserQuestion: "I'd like to search for what the world thinks about this space to inform our discussion. This sends generalized category terms (not your specific idea) to a search provider. OK to proceed?" -Options: A) Yes, search away B) Skip — keep this session private -If B: skip this phase entirely and proceed to Phase 3. Use only in-distribution knowledge. - -When searching, use **generalized category terms** — never the user's specific product name, proprietary concept, or stealth idea. For example, search "task management app landscape" not "SuperTodo AI-powered task killer." - -If WebSearch is unavailable, skip this phase and note: "Search unavailable — proceeding with in-distribution knowledge only." - -**Startup mode:** WebSearch for: -- "[problem space] startup approach {current year}" -- "[problem space] common mistakes" -- "why [incumbent solution] fails" OR "why [incumbent solution] works" - -**Builder mode:** WebSearch for: -- "[thing being built] existing solutions" -- "[thing being built] open source alternatives" -- "best [thing category] {current year}" - -Read the top 2-3 results. Run the three-layer synthesis: -- **[Layer 1]** What does everyone already know about this space? -- **[Layer 2]** What are the search results and current discourse saying? -- **[Layer 3]** Given what WE learned in Phase 2A/2B — is there a reason the conventional approach is wrong? - -**Eureka check:** If Layer 3 reasoning reveals a genuine insight, name it: "EUREKA: Everyone does X because they assume [assumption]. But [evidence from our conversation] suggests that's wrong here. This means [implication]." Log the eureka moment (see preamble). - -If no eureka moment exists, say: "The conventional wisdom seems sound here. Let's build on it." Proceed to Phase 3. - -**Important:** This search feeds Phase 3 (Premise Challenge). If you found reasons the conventional approach fails, those become premises to challenge. If conventional wisdom is solid, that raises the bar for any premise that contradicts it. - ---- - ## Phase 3: Premise Challenge Before proposing solutions, challenge the premises: @@ -622,66 +536,6 @@ Present via AskUserQuestion. Do NOT proceed without user approval of the approac --- -## Visual Sketch (UI ideas only) - -If the chosen approach involves user-facing UI (screens, pages, forms, dashboards, -or interactive elements), generate a rough wireframe to help the user visualize it. -If the idea is backend-only, infrastructure, or has no UI component — skip this -section silently. - -**Step 1: Gather design context** - -1. Check if `DESIGN.md` exists in the repo root. If it does, read it for design - system constraints (colors, typography, spacing, component patterns). Use these - constraints in the wireframe. -2. Apply core design principles: - - **Information hierarchy** — what does the user see first, second, third? - - **Interaction states** — loading, empty, error, success, partial - - **Edge case paranoia** — what if the name is 47 chars? Zero results? Network fails? - - **Subtraction default** — "as little design as possible" (Rams). Every element earns its pixels. - - **Design for trust** — every interface element builds or erodes user trust. - -**Step 2: Generate wireframe HTML** - -Generate a single-page HTML file with these constraints: -- **Intentionally rough aesthetic** — use system fonts, thin gray borders, no color, - hand-drawn-style elements. This is a sketch, not a polished mockup. -- Self-contained — no external dependencies, no CDN links, inline CSS only -- Show the core interaction flow (1-3 screens/states max) -- Include realistic placeholder content (not "Lorem ipsum" — use content that - matches the actual use case) -- Add HTML comments explaining design decisions - -Write to a temp file: -```bash -SKETCH_FILE="/tmp/gstack-sketch-$(date +%s).html" -``` - -**Step 3: Render and capture** - -```bash -$B goto "file://$SKETCH_FILE" -$B screenshot /tmp/gstack-sketch.png -``` - -If `$B` is not available (browse binary not set up), skip the render step. Tell the -user: "Visual sketch requires the browse binary. Run the setup script to enable it." - -**Step 4: Present and iterate** - -Show the screenshot to the user. Ask: "Does this feel right? Want to iterate on the layout?" - -If they want changes, regenerate the HTML with their feedback and re-render. -If they approve or say "good enough," proceed. - -**Step 5: Include in design doc** - -Reference the wireframe screenshot in the design doc's "Recommended Approach" section. -The screenshot file at `/tmp/gstack-sketch.png` can be referenced by downstream skills -(`/plan-design-review`, `/design-review`) to see what was originally envisioned. - ---- - ## Phase 4.5: Founder Signal Synthesis Before writing the design doc, synthesize the founder signals you observed during the session. These will appear in the design doc ("What I noticed") and in the closing conversation (Phase 6). @@ -818,73 +672,7 @@ Supersedes: {prior filename — omit this line if first design on this branch} {observational, mentor-like reflections referencing specific things the user said during the session. Quote their words back to them — don't characterize their behavior. 2-4 bullets.} ``` ---- - -## Spec Review Loop - -Before presenting the document to the user for approval, run an adversarial review. - -**Step 1: Dispatch reviewer subagent** - -Use the Agent tool to dispatch an independent reviewer. The reviewer has fresh context -and cannot see the brainstorming conversation — only the document. This ensures genuine -adversarial independence. - -Prompt the subagent with: -- The file path of the document just written -- "Read this document and review it on 5 dimensions. For each dimension, note PASS or - list specific issues with suggested fixes. At the end, output a quality score (1-10) - across all dimensions." - -**Dimensions:** -1. **Completeness** — Are all requirements addressed? Missing edge cases? -2. **Consistency** — Do parts of the document agree with each other? Contradictions? -3. **Clarity** — Could an engineer implement this without asking questions? Ambiguous language? -4. **Scope** — Does the document creep beyond the original problem? YAGNI violations? -5. **Feasibility** — Can this actually be built with the stated approach? Hidden complexity? - -The subagent should return: -- A quality score (1-10) -- PASS if no issues, or a numbered list of issues with dimension, description, and fix - -**Step 2: Fix and re-dispatch** - -If the reviewer returns issues: -1. Fix each issue in the document on disk (use Edit tool) -2. Re-dispatch the reviewer subagent with the updated document -3. Maximum 3 iterations total - -**Convergence guard:** If the reviewer returns the same issues on consecutive iterations -(the fix didn't resolve them or the reviewer disagrees with the fix), stop the loop -and persist those issues as "Reviewer Concerns" in the document rather than looping -further. - -If the subagent fails, times out, or is unavailable — skip the review loop entirely. -Tell the user: "Spec review unavailable — presenting unreviewed doc." The document is -already written to disk; the review is a quality bonus, not a gate. - -**Step 3: Report and persist metrics** - -After the loop completes (PASS, max iterations, or convergence guard): - -1. Tell the user the result — summary by default: - "Your doc survived N rounds of adversarial review. M issues caught and fixed. - Quality score: X/10." - If they ask "what did the reviewer find?", show the full reviewer output. - -2. If issues remain after max iterations or convergence, add a "## Reviewer Concerns" - section to the document listing each unresolved issue. Downstream skills will see this. - -3. Append metrics: -```bash -mkdir -p ~/.gstack/analytics -echo '{"skill":"office-hours","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","iterations":ITERATIONS,"issues_found":FOUND,"issues_fixed":FIXED,"remaining":REMAINING,"quality_score":SCORE}' >> ~/.gstack/analytics/spec-review.jsonl 2>/dev/null || true -``` -Replace ITERATIONS, FOUND, FIXED, REMAINING, SCORE with actual values from the review. - ---- - -Present the reviewed design doc to the user via AskUserQuestion: +Present the design doc to the user via AskUserQuestion: - A) Approve — mark Status: APPROVED and proceed to handoff - B) Revise — specify which sections need changes (loop back to revise those sections) - C) Start over — return to Phase 2 diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md index 28ba5910..2e30a2cf 100644 --- a/plan-ceo-review/SKILL.md +++ b/plan-ceo-review/SKILL.md @@ -10,14 +10,12 @@ description: | or "is this ambitious enough". Proactively suggest when the user is questioning scope or ambition of a plan, or when the plan feels like it could be thinking bigger. -benefits-from: [office-hours] allowed-tools: - Read - Grep - Glob - Bash - AskUserQuestion - - WebSearch --- @@ -44,6 +42,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"plan-ceo-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -69,28 +73,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -99,6 +106,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -136,26 +170,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -225,10 +239,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -238,12 +257,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Step 0: Detect base branch @@ -353,94 +376,6 @@ DESIGN=$(ls -t ~/.gstack/projects/$SLUG/*-$BRANCH-design-*.md 2>/dev/null | head ``` If a design doc exists (from `/office-hours`), read it. Use it as the source of truth for the problem statement, constraints, and chosen approach. If it has a `Supersedes:` field, note that this is a revised design. -**Handoff note check** (reuses $SLUG and $BRANCH from the design doc check above): -```bash -HANDOFF=$(ls -t ~/.gstack/projects/$SLUG/*-$BRANCH-ceo-handoff-*.md 2>/dev/null | head -1) -[ -n "$HANDOFF" ] && echo "HANDOFF_FOUND: $HANDOFF" || echo "NO_HANDOFF" -``` -If this block runs in a separate shell from the design doc check, recompute $SLUG and $BRANCH first using the same commands from that block. -If a handoff note is found: read it. This contains system audit findings and discussion -from a prior CEO review session that paused so the user could run `/office-hours`. Use it -as additional context alongside the design doc. The handoff note helps you avoid re-asking -questions the user already answered. Do NOT skip any steps — run the full review, but use -the handoff note to inform your analysis and avoid redundant questions. - -Tell the user: "Found a handoff note from your prior CEO review session. I'll use that -context to pick up where we left off." - -## Prerequisite Skill Offer - -When the design doc check above prints "No design doc found," offer the prerequisite -skill before proceeding. - -Say to the user via AskUserQuestion: - -> "No design doc found for this branch. `/office-hours` produces a structured problem -> statement, premise challenge, and explored alternatives — it gives this review much -> sharper input to work with. Takes about 10 minutes. The design doc is per-feature, -> not per-product — it captures the thinking behind this specific change." - -Options: -- A) Run /office-hours first (in another window, then come back) -- B) Skip — proceed with standard review - -If they skip: "No worries — standard review. If you ever want sharper input, try -/office-hours first next time." Then proceed normally. Do not re-offer later in the session. - -**Handoff note save (BENEFITS_FROM):** If the user chose A (run /office-hours first), -save a handoff context note before they leave. Reuse $SLUG and $BRANCH from the -design doc check block above (they use the same `remote-slug || basename` fallback -that handles repos without an origin remote). Then run: -```bash -mkdir -p ~/.gstack/projects/$SLUG -USER=$(whoami) -DATETIME=$(date +%Y%m%d-%H%M%S) -``` -Write to `~/.gstack/projects/$SLUG/$USER-$BRANCH-ceo-handoff-$DATETIME.md`: -```markdown -# CEO Review Handoff Note - -Generated by /plan-ceo-review on {date} -Branch: {branch} -Repo: {owner/repo} - -## Why I paused -User chose to run /office-hours first (no design doc found). - -## System Audit Summary -{Summarize what the system audit found — recent git history, diff scope, -CLAUDE.md key points, TODOS.md relevant items, known pain points} - -## Discussion So Far -{Empty — handoff happened before Step 0. Frontend/UI scope detection has not -run yet — it will be assessed when the review resumes.} -``` - -Tell the user: "Context saved. Run /office-hours in another window. When you come back -and invoke /plan-ceo-review, I'll pick up the context automatically — including the -design doc /office-hours produces." - -**Mid-session detection:** During Step 0A (Premise Challenge), if the user can't -articulate the problem, keeps changing the problem statement, answers with "I'm not -sure," or is clearly exploring rather than reviewing — offer `/office-hours`: - -> "It sounds like you're still figuring out what to build — that's totally fine, but -> that's what /office-hours is designed for. Want to pause this review and run -> /office-hours first? It'll help you nail down the problem and approach, then come -> back here for the strategic review." - -Options: A) Yes, run /office-hours first. B) No, keep going. -If they keep going, proceed normally — no guilt, no re-asking. - -**Handoff note save (mid-session):** If the user chose A (run /office-hours first from -mid-session detection), save a handoff context note with the same format above, but -include any Step 0A progress in the "Discussion So Far" section — premises discussed, -problem framing attempts, user answers so far. Use the same bash block to generate the -file path. - -Tell the user: "Context saved with your discussion so far. Run /office-hours, then -come back to /plan-ceo-review." - When reading TODOS.md, specifically: * Note any TODOs this plan touches, blocks, or unlocks * Check if deferred work from prior reviews relates to this plan @@ -463,22 +398,6 @@ Analyze the plan. If it involves ANY of: new UI screens/pages, changes to existi Identify 2-3 files or patterns in the existing codebase that are particularly well-designed. Note them as style references for the review. Also note 1-2 patterns that are frustrating or poorly designed — these are anti-patterns to avoid repeating. Report findings before proceeding to Step 0. -### Landscape Check - -Read ETHOS.md for the Search Before Building framework (the preamble's Search Before Building section has the path). Before challenging scope, understand the landscape. WebSearch for: -- "[product category] landscape {current year}" -- "[key feature] alternatives" -- "why [incumbent/conventional approach] [succeeds/fails]" - -If WebSearch is unavailable, skip this check and note: "Search unavailable — proceeding with in-distribution knowledge only." - -Run the three-layer synthesis: -- **[Layer 1]** What's the tried-and-true approach in this space? -- **[Layer 2]** What are the search results saying? -- **[Layer 3]** First-principles reasoning — where might the conventional wisdom be wrong? - -Feed into the Premise Challenge (0A) and Dream State Mapping (0C). If you find a eureka moment, surface it during the Expansion opt-in ceremony as a differentiation opportunity. Log it (see preamble). - ## Step 0: Nuclear Scope Challenge + Mode Selection ### 0A. Premise Challenge @@ -600,70 +519,6 @@ Repo: {owner/repo} Derive the feature slug from the plan being reviewed (e.g., "user-dashboard", "auth-refactor"). Use the date in YYYY-MM-DD format. -After writing the CEO plan, run the spec review loop on it: - -## Spec Review Loop - -Before presenting the document to the user for approval, run an adversarial review. - -**Step 1: Dispatch reviewer subagent** - -Use the Agent tool to dispatch an independent reviewer. The reviewer has fresh context -and cannot see the brainstorming conversation — only the document. This ensures genuine -adversarial independence. - -Prompt the subagent with: -- The file path of the document just written -- "Read this document and review it on 5 dimensions. For each dimension, note PASS or - list specific issues with suggested fixes. At the end, output a quality score (1-10) - across all dimensions." - -**Dimensions:** -1. **Completeness** — Are all requirements addressed? Missing edge cases? -2. **Consistency** — Do parts of the document agree with each other? Contradictions? -3. **Clarity** — Could an engineer implement this without asking questions? Ambiguous language? -4. **Scope** — Does the document creep beyond the original problem? YAGNI violations? -5. **Feasibility** — Can this actually be built with the stated approach? Hidden complexity? - -The subagent should return: -- A quality score (1-10) -- PASS if no issues, or a numbered list of issues with dimension, description, and fix - -**Step 2: Fix and re-dispatch** - -If the reviewer returns issues: -1. Fix each issue in the document on disk (use Edit tool) -2. Re-dispatch the reviewer subagent with the updated document -3. Maximum 3 iterations total - -**Convergence guard:** If the reviewer returns the same issues on consecutive iterations -(the fix didn't resolve them or the reviewer disagrees with the fix), stop the loop -and persist those issues as "Reviewer Concerns" in the document rather than looping -further. - -If the subagent fails, times out, or is unavailable — skip the review loop entirely. -Tell the user: "Spec review unavailable — presenting unreviewed doc." The document is -already written to disk; the review is a quality bonus, not a gate. - -**Step 3: Report and persist metrics** - -After the loop completes (PASS, max iterations, or convergence guard): - -1. Tell the user the result — summary by default: - "Your doc survived N rounds of adversarial review. M issues caught and fixed. - Quality score: X/10." - If they ask "what did the reviewer find?", show the full reviewer output. - -2. If issues remain after max iterations or convergence, add a "## Reviewer Concerns" - section to the document listing each unresolved issue. Downstream skills will see this. - -3. Append metrics: -```bash -mkdir -p ~/.gstack/analytics -echo '{"skill":"plan-ceo-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","iterations":ITERATIONS,"issues_found":FOUND,"issues_fixed":FIXED,"remaining":REMAINING,"quality_score":SCORE}' >> ~/.gstack/analytics/spec-review.jsonl 2>/dev/null || true -``` -Replace ITERATIONS, FOUND, FIXED, REMAINING, SCORE with actual values from the review. - ### 0E. Temporal Interrogation (EXPANSION, SELECTIVE EXPANSION, and HOLD modes) Think ahead to implementation: What decisions will need to be made during implementation that should be resolved NOW in the plan? ``` @@ -1044,28 +899,12 @@ List every ASCII diagram in files this plan touches. Still accurate? ### Unresolved Decisions If any AskUserQuestion goes unanswered, note it here. Never silently default. -## Handoff Note Cleanup - -After producing the Completion Summary, clean up any handoff notes for this branch — -the review is complete and the context is no longer needed. - -```bash -source <(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null) -rm -f ~/.gstack/projects/$SLUG/*-$BRANCH-ceo-handoff-*.md 2>/dev/null || true -``` - ## Review Log -After producing the Completion Summary above, persist the review result. - -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes review metadata to -`~/.gstack/` (user config directory, not project files). The skill preamble -already writes to `~/.gstack/sessions/` and `~/.gstack/analytics/` — this is -the same pattern. The review dashboard depends on this data. Skipping this -command breaks the review readiness dashboard in /ship. +After producing the Completion Summary above, persist the review result: ```bash -~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"plan-ceo-review","timestamp":"TIMESTAMP","status":"STATUS","unresolved":N,"critical_gaps":N,"mode":"MODE","scope_proposed":N,"scope_accepted":N,"scope_deferred":N,"commit":"COMMIT"}' +~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"plan-ceo-review","timestamp":"TIMESTAMP","status":"STATUS","unresolved":N,"critical_gaps":N,"mode":"MODE","commit":"COMMIT"}' ``` Before running this command, substitute the placeholder values from the Completion Summary you just produced: @@ -1074,9 +913,6 @@ Before running this command, substitute the placeholder values from the Completi - **unresolved**: number from "Unresolved decisions" in the summary - **critical_gaps**: number from "Failure modes: ___ CRITICAL GAPS" in the summary - **MODE**: the mode the user selected (SCOPE_EXPANSION / SELECTIVE_EXPANSION / HOLD_SCOPE / SCOPE_REDUCTION) -- **scope_proposed**: number from "Scope proposals: ___ proposed" in the summary (0 for HOLD/REDUCTION) -- **scope_accepted**: number from "Scope proposals: ___ accepted" in the summary (0 for HOLD/REDUCTION) -- **scope_deferred**: number of items deferred to TODOS.md from scope decisions (0 for HOLD/REDUCTION) - **COMMIT**: output of `git rev-parse --short HEAD` ## Review Readiness Dashboard @@ -1087,7 +923,7 @@ After completing the review, read the review log and config to display the dashb ~/.claude/skills/gstack/bin/gstack-review-read ``` -Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, adversarial-review, codex-review). Ignore entries with timestamps older than 7 days. For the Adversarial row, show whichever is more recent between `adversarial-review` (new auto-scaled) and `codex-review` (legacy). For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: +Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, codex-review). Ignore entries with timestamps older than 7 days. For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: ``` +====================================================================+ @@ -1098,7 +934,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl | Eng Review | 1 | 2026-03-16 15:00 | CLEAR | YES | | CEO Review | 0 | — | — | no | | Design Review | 0 | — | — | no | -| Adversarial | 0 | — | — | no | +| Codex Review | 0 | — | — | no | +--------------------------------------------------------------------+ | VERDICT: CLEARED — Eng Review passed | +====================================================================+ @@ -1108,7 +944,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - **Eng Review (required by default):** The only review that gates shipping. Covers architecture, code quality, tests, performance. Can be disabled globally with \`gstack-config set skip_eng_review true\` (the "don't bother me" setting). - **CEO Review (optional):** Use your judgment. Recommend it for big product/business changes, new user-facing features, or scope decisions. Skip for bug fixes, refactors, infra, and cleanup. - **Design Review (optional):** Use your judgment. Recommend it for UI/UX changes. Skip for backend-only, infra, or prompt-only changes. -- **Adversarial Review (automatic):** Auto-scales by diff size. Small diffs (<50 lines) skip adversarial. Medium diffs (50–199) get cross-model adversarial. Large diffs (200+) get all 4 passes: Claude structured, Codex structured, Claude adversarial subagent, Codex adversarial. No configuration needed. +- **Codex Review (optional):** Independent second opinion from OpenAI Codex CLI. Shows pass/fail gate. Recommend for critical code changes where a second AI perspective adds value. Skip when Codex CLI is not installed. **Verdict logic:** - **CLEARED**: Eng Review has >= 1 entry within 7 days with status "clean" (or \`skip_eng_review\` is \`true\`) @@ -1122,73 +958,6 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - For entries without a \`commit\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection" - If all reviews match the current HEAD, do not display any staleness notes -## Plan File Review Report - -After displaying the Review Readiness Dashboard in conversation output, also update the -**plan file** itself so review status is visible to anyone reading the plan. - -### Detect the plan file - -1. Check if there is an active plan file in this conversation (the host provides plan file - paths in system messages — look for plan file references in the conversation context). -2. If not found, skip this section silently — not every review runs in plan mode. - -### Generate the report - -Read the review log output you already have from the Review Readiness Dashboard step above. -Parse each JSONL entry. Each skill logs different fields: - -- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`scope_proposed\`, \`scope_accepted\`, \`scope_deferred\`, \`commit\` - → Findings: "{scope_proposed} proposals, {scope_accepted} accepted, {scope_deferred} deferred" - → If scope fields are 0 or missing (HOLD/REDUCTION mode): "mode: {mode}, {critical_gaps} critical gaps" -- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`issues_found\`, \`mode\`, \`commit\` - → Findings: "{issues_found} issues, {critical_gaps} critical gaps" -- **plan-design-review**: \`status\`, \`initial_score\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` - → Findings: "score: {initial_score}/10 → {overall_score}/10, {decisions_made} decisions" -- **codex-review**: \`status\`, \`gate\`, \`findings\`, \`findings_fixed\` - → Findings: "{findings} findings, {findings_fixed}/{findings} fixed" - -All fields needed for the Findings column are now present in the JSONL entries. -For the review you just completed, you may use richer details from your own Completion -Summary. For prior reviews, use the JSONL fields directly — they contain all required data. - -Produce this markdown table: - -\`\`\`markdown -## GSTACK REVIEW REPORT - -| Review | Trigger | Why | Runs | Status | Findings | -|--------|---------|-----|------|--------|----------| -| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | -| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | -| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | -| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | -\`\`\` - -Below the table, add these lines (omit any that are empty/not applicable): - -- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes -- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis -- **UNRESOLVED:** total unresolved decisions across all reviews -- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). - If Eng Review is not CLEAR and not skipped globally, append "eng review required". - -### Write to the plan file - -**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one -file you are allowed to edit in plan mode. The plan file review report is part of the -plan's living status. - -- Search the plan file for a \`## GSTACK REVIEW REPORT\` section **anywhere** in the file - (not just at the end — content may have been added after it). -- If found, **replace it** entirely using the Edit tool. Match from \`## GSTACK REVIEW REPORT\` - through either the next \`## \` heading or end of file, whichever comes first. This ensures - content added after the report section is preserved, not eaten. If the Edit fails - (e.g., concurrent edit changed the content), re-read the plan file and retry once. -- If no such section exists, **append it** to the end of the plan file. -- Always place it as the very last section in the plan file. If it was found mid-file, - move it: delete the old location and append at the end. - ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, recommend the next review(s) based on what this CEO review discovered. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/plan-design-review/SKILL.md b/plan-design-review/SKILL.md index d7aaa3e8..6bf57109 100644 --- a/plan-design-review/SKILL.md +++ b/plan-design-review/SKILL.md @@ -42,6 +42,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"plan-design-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -67,28 +73,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -97,6 +106,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -134,26 +170,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -223,10 +239,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -236,12 +257,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Step 0: Detect base branch @@ -507,23 +532,16 @@ If any AskUserQuestion goes unanswered, note it here. Never silently default to ## Review Log -After producing the Completion Summary above, persist the review result. - -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes review metadata to -`~/.gstack/` (user config directory, not project files). The skill preamble -already writes to `~/.gstack/sessions/` and `~/.gstack/analytics/` — this is -the same pattern. The review dashboard depends on this data. Skipping this -command breaks the review readiness dashboard in /ship. +After producing the Completion Summary above, persist the review result: ```bash -~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"plan-design-review","timestamp":"TIMESTAMP","status":"STATUS","initial_score":N,"overall_score":N,"unresolved":N,"decisions_made":N,"commit":"COMMIT"}' +~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"plan-design-review","timestamp":"TIMESTAMP","status":"STATUS","overall_score":N,"unresolved":N,"decisions_made":N,"commit":"COMMIT"}' ``` Substitute values from the Completion Summary: - **TIMESTAMP**: current ISO 8601 datetime - **STATUS**: "clean" if overall score 8+ AND 0 unresolved; otherwise "issues_open" -- **initial_score**: initial overall design score before fixes (0-10) -- **overall_score**: final overall design score after fixes (0-10) +- **overall_score**: final overall design score (0-10) - **unresolved**: number of unresolved design decisions - **decisions_made**: number of design decisions added to the plan - **COMMIT**: output of `git rev-parse --short HEAD` @@ -536,7 +554,7 @@ After completing the review, read the review log and config to display the dashb ~/.claude/skills/gstack/bin/gstack-review-read ``` -Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, adversarial-review, codex-review). Ignore entries with timestamps older than 7 days. For the Adversarial row, show whichever is more recent between `adversarial-review` (new auto-scaled) and `codex-review` (legacy). For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: +Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, codex-review). Ignore entries with timestamps older than 7 days. For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: ``` +====================================================================+ @@ -547,7 +565,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl | Eng Review | 1 | 2026-03-16 15:00 | CLEAR | YES | | CEO Review | 0 | — | — | no | | Design Review | 0 | — | — | no | -| Adversarial | 0 | — | — | no | +| Codex Review | 0 | — | — | no | +--------------------------------------------------------------------+ | VERDICT: CLEARED — Eng Review passed | +====================================================================+ @@ -557,7 +575,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - **Eng Review (required by default):** The only review that gates shipping. Covers architecture, code quality, tests, performance. Can be disabled globally with \`gstack-config set skip_eng_review true\` (the "don't bother me" setting). - **CEO Review (optional):** Use your judgment. Recommend it for big product/business changes, new user-facing features, or scope decisions. Skip for bug fixes, refactors, infra, and cleanup. - **Design Review (optional):** Use your judgment. Recommend it for UI/UX changes. Skip for backend-only, infra, or prompt-only changes. -- **Adversarial Review (automatic):** Auto-scales by diff size. Small diffs (<50 lines) skip adversarial. Medium diffs (50–199) get cross-model adversarial. Large diffs (200+) get all 4 passes: Claude structured, Codex structured, Claude adversarial subagent, Codex adversarial. No configuration needed. +- **Codex Review (optional):** Independent second opinion from OpenAI Codex CLI. Shows pass/fail gate. Recommend for critical code changes where a second AI perspective adds value. Skip when Codex CLI is not installed. **Verdict logic:** - **CLEARED**: Eng Review has >= 1 entry within 7 days with status "clean" (or \`skip_eng_review\` is \`true\`) @@ -571,73 +589,6 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - For entries without a \`commit\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection" - If all reviews match the current HEAD, do not display any staleness notes -## Plan File Review Report - -After displaying the Review Readiness Dashboard in conversation output, also update the -**plan file** itself so review status is visible to anyone reading the plan. - -### Detect the plan file - -1. Check if there is an active plan file in this conversation (the host provides plan file - paths in system messages — look for plan file references in the conversation context). -2. If not found, skip this section silently — not every review runs in plan mode. - -### Generate the report - -Read the review log output you already have from the Review Readiness Dashboard step above. -Parse each JSONL entry. Each skill logs different fields: - -- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`scope_proposed\`, \`scope_accepted\`, \`scope_deferred\`, \`commit\` - → Findings: "{scope_proposed} proposals, {scope_accepted} accepted, {scope_deferred} deferred" - → If scope fields are 0 or missing (HOLD/REDUCTION mode): "mode: {mode}, {critical_gaps} critical gaps" -- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`issues_found\`, \`mode\`, \`commit\` - → Findings: "{issues_found} issues, {critical_gaps} critical gaps" -- **plan-design-review**: \`status\`, \`initial_score\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` - → Findings: "score: {initial_score}/10 → {overall_score}/10, {decisions_made} decisions" -- **codex-review**: \`status\`, \`gate\`, \`findings\`, \`findings_fixed\` - → Findings: "{findings} findings, {findings_fixed}/{findings} fixed" - -All fields needed for the Findings column are now present in the JSONL entries. -For the review you just completed, you may use richer details from your own Completion -Summary. For prior reviews, use the JSONL fields directly — they contain all required data. - -Produce this markdown table: - -\`\`\`markdown -## GSTACK REVIEW REPORT - -| Review | Trigger | Why | Runs | Status | Findings | -|--------|---------|-----|------|--------|----------| -| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | -| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | -| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | -| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | -\`\`\` - -Below the table, add these lines (omit any that are empty/not applicable): - -- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes -- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis -- **UNRESOLVED:** total unresolved decisions across all reviews -- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). - If Eng Review is not CLEAR and not skipped globally, append "eng review required". - -### Write to the plan file - -**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one -file you are allowed to edit in plan mode. The plan file review report is part of the -plan's living status. - -- Search the plan file for a \`## GSTACK REVIEW REPORT\` section **anywhere** in the file - (not just at the end — content may have been added after it). -- If found, **replace it** entirely using the Edit tool. Match from \`## GSTACK REVIEW REPORT\` - through either the next \`## \` heading or end of file, whichever comes first. This ensures - content added after the report section is preserved, not eaten. If the Edit fails - (e.g., concurrent edit changed the content), re-read the plan file and retry once. -- If no such section exists, **append it** to the end of the plan file. -- Always place it as the very last section in the plan file. If it was found mid-file, - move it: delete the old location and append at the end. - ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, recommend the next review(s) based on what this design review discovered. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md index b3f099a0..4a476b92 100644 --- a/plan-eng-review/SKILL.md +++ b/plan-eng-review/SKILL.md @@ -8,7 +8,6 @@ description: | "review the architecture", "engineering review", or "lock in the plan". Proactively suggest when the user has a plan or design doc and is about to start coding — to catch architecture issues before implementation. -benefits-from: [office-hours] allowed-tools: - Read - Write @@ -16,7 +15,6 @@ allowed-tools: - Glob - AskUserQuestion - Bash - - WebSearch --- @@ -43,6 +41,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"plan-eng-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -68,28 +72,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -98,6 +105,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -135,26 +169,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -224,10 +238,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -237,12 +256,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # Plan Review Mode @@ -299,39 +322,12 @@ DESIGN=$(ls -t ~/.gstack/projects/$SLUG/*-$BRANCH-design-*.md 2>/dev/null | head ``` If a design doc exists, read it. Use it as the source of truth for the problem statement, constraints, and chosen approach. If it has a `Supersedes:` field, note that this is a revised design — check the prior version for context on what changed and why. -## Prerequisite Skill Offer - -When the design doc check above prints "No design doc found," offer the prerequisite -skill before proceeding. - -Say to the user via AskUserQuestion: - -> "No design doc found for this branch. `/office-hours` produces a structured problem -> statement, premise challenge, and explored alternatives — it gives this review much -> sharper input to work with. Takes about 10 minutes. The design doc is per-feature, -> not per-product — it captures the thinking behind this specific change." - -Options: -- A) Run /office-hours first (in another window, then come back) -- B) Skip — proceed with standard review - -If they skip: "No worries — standard review. If you ever want sharper input, try -/office-hours first next time." Then proceed normally. Do not re-offer later in the session. - ### Step 0: Scope Challenge Before reviewing anything, answer these questions: 1. **What existing code already partially or fully solves each sub-problem?** Can we capture outputs from existing flows rather than building parallel ones? 2. **What is the minimum set of changes that achieves the stated goal?** Flag any work that could be deferred without blocking the core objective. Be ruthless about scope creep. 3. **Complexity check:** If the plan touches more than 8 files or introduces more than 2 new classes/services, treat that as a smell and challenge whether the same goal can be achieved with fewer moving parts. -4. **Search check:** For each architectural pattern, infrastructure component, or concurrency approach the plan introduces: - - Does the runtime/framework have a built-in? Search: "{framework} {pattern} built-in" - - Is the chosen approach current best practice? Search: "{pattern} best practice {current year}" - - Are there known footguns? Search: "{framework} {pattern} pitfalls" - - If WebSearch is unavailable, skip this check and note: "Search unavailable — proceeding with in-distribution knowledge only." - - If the plan rolls a custom solution where a built-in exists, flag it as a scope reduction opportunity. Annotate recommendations with **[Layer 1]**, **[Layer 2]**, **[Layer 3]**, or **[EUREKA]** (see preamble's Search Before Building section). If you find a eureka moment — a reason the standard approach is wrong for this case — present it as an architectural insight. -5. **TODOS cross-reference:** Read `TODOS.md` if it exists. Are any deferred items blocking this plan? Can any deferred items be bundled into this PR without expanding scope? Does this plan create new work that should be captured as a TODO? +4. **TODOS cross-reference:** Read `TODOS.md` if it exists. Are any deferred items blocking this plan? Can any deferred items be bundled into this PR without expanding scope? Does this plan create new work that should be captured as a TODO? 5. **Completeness check:** Is the plan doing the complete version or a shortcut? With AI-assisted coding, the cost of completeness (100% test coverage, full edge case handling, complete error paths) is 10-100x cheaper than with a human team. If the plan proposes a shortcut that saves human-hours but only saves minutes with CC+gstack, recommend the complete version. Boil the lake. @@ -506,16 +502,10 @@ Check the git log for this branch. If there are prior commits suggesting a previ ## Review Log -After producing the Completion Summary above, persist the review result. - -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes review metadata to -`~/.gstack/` (user config directory, not project files). The skill preamble -already writes to `~/.gstack/sessions/` and `~/.gstack/analytics/` — this is -the same pattern. The review dashboard depends on this data. Skipping this -command breaks the review readiness dashboard in /ship. +After producing the Completion Summary above, persist the review result: ```bash -~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"plan-eng-review","timestamp":"TIMESTAMP","status":"STATUS","unresolved":N,"critical_gaps":N,"issues_found":N,"mode":"MODE","commit":"COMMIT"}' +~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"plan-eng-review","timestamp":"TIMESTAMP","status":"STATUS","unresolved":N,"critical_gaps":N,"mode":"MODE","commit":"COMMIT"}' ``` Substitute values from the Completion Summary: @@ -523,7 +513,6 @@ Substitute values from the Completion Summary: - **STATUS**: "clean" if 0 unresolved decisions AND 0 critical gaps; otherwise "issues_open" - **unresolved**: number from "Unresolved decisions" count - **critical_gaps**: number from "Failure modes: ___ critical gaps flagged" -- **issues_found**: total issues found across all review sections (Architecture + Code Quality + Performance + Test gaps) - **MODE**: FULL_REVIEW / SCOPE_REDUCED - **COMMIT**: output of `git rev-parse --short HEAD` @@ -535,7 +524,7 @@ After completing the review, read the review log and config to display the dashb ~/.claude/skills/gstack/bin/gstack-review-read ``` -Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, adversarial-review, codex-review). Ignore entries with timestamps older than 7 days. For the Adversarial row, show whichever is more recent between `adversarial-review` (new auto-scaled) and `codex-review` (legacy). For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: +Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, codex-review). Ignore entries with timestamps older than 7 days. For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: ``` +====================================================================+ @@ -546,7 +535,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl | Eng Review | 1 | 2026-03-16 15:00 | CLEAR | YES | | CEO Review | 0 | — | — | no | | Design Review | 0 | — | — | no | -| Adversarial | 0 | — | — | no | +| Codex Review | 0 | — | — | no | +--------------------------------------------------------------------+ | VERDICT: CLEARED — Eng Review passed | +====================================================================+ @@ -556,7 +545,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - **Eng Review (required by default):** The only review that gates shipping. Covers architecture, code quality, tests, performance. Can be disabled globally with \`gstack-config set skip_eng_review true\` (the "don't bother me" setting). - **CEO Review (optional):** Use your judgment. Recommend it for big product/business changes, new user-facing features, or scope decisions. Skip for bug fixes, refactors, infra, and cleanup. - **Design Review (optional):** Use your judgment. Recommend it for UI/UX changes. Skip for backend-only, infra, or prompt-only changes. -- **Adversarial Review (automatic):** Auto-scales by diff size. Small diffs (<50 lines) skip adversarial. Medium diffs (50–199) get cross-model adversarial. Large diffs (200+) get all 4 passes: Claude structured, Codex structured, Claude adversarial subagent, Codex adversarial. No configuration needed. +- **Codex Review (optional):** Independent second opinion from OpenAI Codex CLI. Shows pass/fail gate. Recommend for critical code changes where a second AI perspective adds value. Skip when Codex CLI is not installed. **Verdict logic:** - **CLEARED**: Eng Review has >= 1 entry within 7 days with status "clean" (or \`skip_eng_review\` is \`true\`) @@ -570,73 +559,6 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - For entries without a \`commit\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection" - If all reviews match the current HEAD, do not display any staleness notes -## Plan File Review Report - -After displaying the Review Readiness Dashboard in conversation output, also update the -**plan file** itself so review status is visible to anyone reading the plan. - -### Detect the plan file - -1. Check if there is an active plan file in this conversation (the host provides plan file - paths in system messages — look for plan file references in the conversation context). -2. If not found, skip this section silently — not every review runs in plan mode. - -### Generate the report - -Read the review log output you already have from the Review Readiness Dashboard step above. -Parse each JSONL entry. Each skill logs different fields: - -- **plan-ceo-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`mode\`, \`scope_proposed\`, \`scope_accepted\`, \`scope_deferred\`, \`commit\` - → Findings: "{scope_proposed} proposals, {scope_accepted} accepted, {scope_deferred} deferred" - → If scope fields are 0 or missing (HOLD/REDUCTION mode): "mode: {mode}, {critical_gaps} critical gaps" -- **plan-eng-review**: \`status\`, \`unresolved\`, \`critical_gaps\`, \`issues_found\`, \`mode\`, \`commit\` - → Findings: "{issues_found} issues, {critical_gaps} critical gaps" -- **plan-design-review**: \`status\`, \`initial_score\`, \`overall_score\`, \`unresolved\`, \`decisions_made\`, \`commit\` - → Findings: "score: {initial_score}/10 → {overall_score}/10, {decisions_made} decisions" -- **codex-review**: \`status\`, \`gate\`, \`findings\`, \`findings_fixed\` - → Findings: "{findings} findings, {findings_fixed}/{findings} fixed" - -All fields needed for the Findings column are now present in the JSONL entries. -For the review you just completed, you may use richer details from your own Completion -Summary. For prior reviews, use the JSONL fields directly — they contain all required data. - -Produce this markdown table: - -\`\`\`markdown -## GSTACK REVIEW REPORT - -| Review | Trigger | Why | Runs | Status | Findings | -|--------|---------|-----|------|--------|----------| -| CEO Review | \`/plan-ceo-review\` | Scope & strategy | {runs} | {status} | {findings} | -| Codex Review | \`/codex review\` | Independent 2nd opinion | {runs} | {status} | {findings} | -| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | {runs} | {status} | {findings} | -| Design Review | \`/plan-design-review\` | UI/UX gaps | {runs} | {status} | {findings} | -\`\`\` - -Below the table, add these lines (omit any that are empty/not applicable): - -- **CODEX:** (only if codex-review ran) — one-line summary of codex fixes -- **CROSS-MODEL:** (only if both Claude and Codex reviews exist) — overlap analysis -- **UNRESOLVED:** total unresolved decisions across all reviews -- **VERDICT:** list reviews that are CLEAR (e.g., "CEO + ENG CLEARED — ready to implement"). - If Eng Review is not CLEAR and not skipped globally, append "eng review required". - -### Write to the plan file - -**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one -file you are allowed to edit in plan mode. The plan file review report is part of the -plan's living status. - -- Search the plan file for a \`## GSTACK REVIEW REPORT\` section **anywhere** in the file - (not just at the end — content may have been added after it). -- If found, **replace it** entirely using the Edit tool. Match from \`## GSTACK REVIEW REPORT\` - through either the next \`## \` heading or end of file, whichever comes first. This ensures - content added after the report section is preserved, not eaten. If the Edit fails - (e.g., concurrent edit changed the content), re-read the plan file and retry once. -- If no such section exists, **append it** to the end of the plan file. -- Always place it as the very last section in the plan file. If it was found mid-file, - move it: delete the old location and append at the end. - ## Next Steps — Review Chaining After displaying the Review Readiness Dashboard, check if additional reviews would be valuable. Read the dashboard output to see which reviews have already been run and whether they are stale. diff --git a/qa-only/SKILL.md b/qa-only/SKILL.md index a46233a3..0ad3214e 100644 --- a/qa-only/SKILL.md +++ b/qa-only/SKILL.md @@ -12,7 +12,6 @@ allowed-tools: - Read - Write - AskUserQuestion - - WebSearch --- @@ -39,6 +38,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"qa-only","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -64,28 +69,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -94,6 +102,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -131,26 +166,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -220,10 +235,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -233,12 +253,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # /qa-only: Report-Only QA Testing diff --git a/qa/SKILL.md b/qa/SKILL.md index 6e7d49a0..169c791e 100644 --- a/qa/SKILL.md +++ b/qa/SKILL.md @@ -45,6 +45,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"qa","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -70,28 +76,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -100,6 +109,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -137,26 +173,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -226,10 +242,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -239,12 +260,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Step 0: Detect base branch diff --git a/retro/SKILL.md b/retro/SKILL.md index 635b5747..fb473c17 100644 --- a/retro/SKILL.md +++ b/retro/SKILL.md @@ -39,6 +39,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"retro","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -64,28 +70,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -94,6 +103,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -131,26 +167,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -220,10 +236,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -233,12 +254,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Detect default branch @@ -396,20 +421,6 @@ If TODOS.md doesn't exist, skip the Backlog Health row. If the JSONL file doesn't exist or has no entries in the window, skip the Skill Usage row. -**Eureka Moments (if logged):** Read `~/.gstack/analytics/eureka.jsonl` if it exists. Filter entries within the retro time window by `ts` field. For each eureka moment, show the skill that flagged it, the branch, and a one-line summary of the insight. Present as: - -``` -| Eureka Moments | 2 this period | -``` - -If moments exist, list them: -``` - EUREKA /office-hours (branch: garrytan/auth-rethink): "Session tokens don't need server storage — browser crypto API makes client-side JWT validation viable" - EUREKA /plan-eng-review (branch: garrytan/cache-layer): "Redis isn't needed here — Bun's built-in LRU cache handles this workload" -``` - -If the JSONL file doesn't exist or has no entries in the window, skip the Eureka Moments row. - ### Step 3: Commit Time Distribution Show hourly histogram in local time using bar chart: @@ -469,7 +480,7 @@ From commit diffs, estimate PR sizes and bucket them: - **Small** (<100 LOC) - **Medium** (100-500 LOC) - **Large** (500-1500 LOC) -- **XL** (1500+ LOC) +- **XL** (1500+ LOC) — flag these with file counts ### Step 8: Focus Score + Ship of the Week @@ -661,13 +672,14 @@ Narrative interpreting what the team-wide patterns mean: Narrative covering: - Commit type mix and what it reveals -- PR size distribution and what it reveals about shipping cadence +- PR size discipline (are PRs staying small?) - Fix-chain detection (sequences of fix commits on the same subsystem) - Version bump discipline ### Code Quality Signals - Test LOC ratio trend - Hotspot analysis (are the same files churning?) +- Any XL PRs that should have been split - Greptile signal ratio and trend (if history exists): "Greptile: X% signal (Y valid catches, Z false positives)" ### Test Health @@ -706,7 +718,7 @@ For each teammate (sorted by commits descending), write a section: - "Fixed the N+1 query that was causing 2s load times on the dashboard" - **Opportunity for growth**: 1 specific, constructive suggestion. Frame as investment, not criticism. Examples: - "Test coverage on the payment module is at 8% — worth investing in before the next feature lands on top of it" - - "Most commits land in a single burst — spacing work across the day could reduce context-switching fatigue" + - "3 of the 5 PRs were 800+ LOC — breaking these up would catch issues earlier and make review easier" - "All commits land between 1-4am — sustainable pace matters for code quality long-term" **AI collaboration note:** If many commits have `Co-Authored-By` AI trailers (e.g., Claude, Copilot), note the AI-assisted commit percentage as a team metric. Frame it neutrally — "N% of commits were AI-assisted" — without judgment. diff --git a/review/SKILL.md b/review/SKILL.md index abf517a4..7c4e2a8b 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -13,9 +13,7 @@ allowed-tools: - Write - Grep - Glob - - Agent - AskUserQuestion - - WebSearch --- @@ -42,6 +40,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -67,28 +71,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -97,6 +104,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -134,26 +168,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -223,10 +237,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -236,12 +255,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Step 0: Detect base branch @@ -346,17 +369,10 @@ Run `git diff origin/` to get the full diff. This includes both committed Apply the checklist against the diff in two passes: 1. **Pass 1 (CRITICAL):** SQL & Data Safety, Race Conditions & Concurrency, LLM Output Trust Boundary, Enum & Value Completeness -2. **Pass 2 (INFORMATIONAL):** Conditional Side Effects, Magic Numbers & String Coupling, Dead Code & Consistency, LLM Prompt Issues, Test Gaps, View/Frontend, Performance & Bundle Impact +2. **Pass 2 (INFORMATIONAL):** Conditional Side Effects, Magic Numbers & String Coupling, Dead Code & Consistency, LLM Prompt Issues, Test Gaps, View/Frontend **Enum & Value Completeness requires reading code OUTSIDE the diff.** When the diff introduces a new enum value, status, tier, or type constant, use Grep to find all files that reference sibling values, then Read those files to check if the new value is handled. This is the one category where within-diff review is insufficient. -**Search-before-recommending:** When recommending a fix pattern (especially for concurrency, caching, auth, or framework-specific behavior): -- Verify the pattern is current best practice for the framework version in use -- Check if a built-in solution exists in newer versions before recommending a workaround -- Verify API signatures against current docs (APIs change between versions) - -Takes seconds, prevents recommending outdated patterns. If WebSearch is unavailable, note it and proceed with in-distribution knowledge. - Follow the output format specified in the checklist. Respect the suppressions — do NOT flag items listed in the "DO NOT flag" section. --- @@ -512,139 +528,52 @@ If no documentation files exist, skip this step silently. --- -## Step 5.7: Adversarial review (auto-scaled) +## Step 5.7: Codex second opinion (optional) -Adversarial review thoroughness scales automatically based on diff size. No configuration needed. - -**Detect diff size and tool availability:** +After completing the review, check if the Codex CLI is available: ```bash -DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") -DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0") -DIFF_TOTAL=$((DIFF_INS + DIFF_DEL)) which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" -# Respect old opt-out -OLD_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || true) -echo "DIFF_SIZE: $DIFF_TOTAL" -echo "OLD_CFG: ${OLD_CFG:-not_set}" ``` -If `OLD_CFG` is `disabled`: skip this step silently. Continue to the next step. +If Codex is available, use AskUserQuestion: -**User override:** If the user explicitly requested a specific tier (e.g., "run all passes", "paranoid review", "full adversarial", "do all 4 passes", "thorough review"), honor that request regardless of diff size. Jump to the matching tier section. +``` +Review complete. Want an independent second opinion from Codex (OpenAI)? -**Auto-select tier based on diff size:** -- **Small (< 50 lines changed):** Skip adversarial review entirely. Print: "Small diff ($DIFF_TOTAL lines) — adversarial review skipped." Continue to the next step. -- **Medium (50–199 lines changed):** Run Codex adversarial challenge (or Claude adversarial subagent if Codex unavailable). Jump to the "Medium tier" section. -- **Large (200+ lines changed):** Run all remaining passes — Codex structured review + Claude adversarial subagent + Codex adversarial. Jump to the "Large tier" section. +A) Run Codex code review — independent diff review with pass/fail gate +B) Run Codex adversarial challenge — try to find ways this code will fail in production +C) Both — review first, then adversarial challenge +D) Skip — no Codex review needed +``` ---- +If the user chooses A, B, or C: -### Medium tier (50–199 lines) - -Claude's structured review already ran. Now add a **cross-model adversarial challenge**. - -**If Codex is available:** run the Codex adversarial challenge. **If Codex is NOT available:** fall back to the Claude adversarial subagent instead. - -**Codex adversarial:** +**For code review (A or C):** Run `codex review --base ` with a 5-minute timeout. +Present the full output verbatim under a `CODEX SAYS (code review):` header. +Check the output for `[P1]` markers — if found, note `GATE: FAIL`, otherwise `GATE: PASS`. +After presenting, compare Codex's findings with your own review findings from Steps 4-5 +and output a CROSS-MODEL ANALYSIS showing what both found, what only Codex found, +and what only Claude found. +**For adversarial challenge (B or C):** Run: ```bash -TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) -codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_ADV" +codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, failure modes. Be adversarial." -s read-only ``` +Present the full output verbatim under a `CODEX SAYS (adversarial challenge):` header. -Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: +**Only if a code review ran (user chose A or C):** Persist the Codex review result to the review log: ```bash -cat "$TMPERR_ADV" +~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"codex-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","gate":"GATE"}' ``` -Present the full output verbatim. This is informational — it never blocks shipping. +Substitute: STATUS ("clean" if PASS, "issues_found" if FAIL), GATE ("pass" or "fail"). -**Error handling:** All errors are non-blocking — adversarial review is a quality enhancement, not a prerequisite. -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \`codex login\` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response. Stderr: ." +**Do NOT persist a codex-review entry when only the adversarial challenge (B) ran** — +there is no gate verdict to record, and a false entry would make the Review Readiness +Dashboard believe a code review happened when it didn't. -On any Codex error, fall back to the Claude adversarial subagent automatically. - -**Claude adversarial subagent** (fallback when Codex unavailable or errored): - -Dispatch via the Agent tool. The subagent has fresh context — no checklist bias from the structured review. This genuine independence catches things the primary reviewer is blind to. - -Subagent prompt: -"Read the diff for this branch with `git diff origin/`. Think like an attacker and a chaos engineer. Your job is to find ways this code will fail in production. Look for: edge cases, race conditions, security holes, resource leaks, failure modes, silent data corruption, logic errors that produce wrong results silently, error handling that swallows failures, and trust boundary violations. Be adversarial. Be thorough. No compliments — just the problems. For each finding, classify as FIXABLE (you know how to fix it) or INVESTIGATE (needs human judgment)." - -Present findings under an `ADVERSARIAL REVIEW (Claude subagent):` header. **FIXABLE findings** flow into the same Fix-First pipeline as the structured review. **INVESTIGATE findings** are presented as informational. - -If the subagent fails or times out: "Claude adversarial subagent unavailable. Continuing without adversarial review." - -**Persist the review result:** -```bash -~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"medium","commit":"'"$(git rev-parse --short HEAD)"'"}' -``` -Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "codex" if Codex ran, "claude" if subagent ran. If both failed, do NOT persist. - -**Cleanup:** Run `rm -f "$TMPERR_ADV"` after processing (if Codex was used). - ---- - -### Large tier (200+ lines) - -Claude's structured review already ran. Now run **all three remaining passes** for maximum coverage: - -**1. Codex structured review (if available):** -```bash -TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) -codex review --base -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR" -``` - -Use a 5-minute timeout. Present output under `CODEX SAYS (code review):` header. -Check for `[P1]` markers: found → `GATE: FAIL`, not found → `GATE: PASS`. - -If GATE is FAIL, use AskUserQuestion: -``` -Codex found N critical issues in the diff. - -A) Investigate and fix now (recommended) -B) Continue — review will still complete -``` - -If A: address the findings. Re-run `codex review` to verify. - -Read stderr for errors (same error handling as medium tier). - -After stderr: `rm -f "$TMPERR"` - -**2. Claude adversarial subagent:** Dispatch a subagent with the adversarial prompt (same prompt as medium tier). This always runs regardless of Codex availability. - -**3. Codex adversarial challenge (if available):** Run `codex exec` with the adversarial prompt (same as medium tier). - -If Codex is not available for steps 1 and 3, note to the user: "Codex CLI not found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install Codex for full 4-pass coverage: `npm install -g @openai/codex`" - -**Persist the review result AFTER all passes complete** (not after each sub-step): -```bash -~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"large","gate":"GATE","commit":"'"$(git rev-parse --short HEAD)"'"}' -``` -Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if Codex ran, "claude" if only Claude subagent ran. GATE = the Codex structured review gate result ("pass"/"fail"), or "informational" if Codex was unavailable. If all passes failed, do NOT persist. - ---- - -### Cross-model synthesis (medium and large tiers) - -After all passes complete, synthesize findings across all sources: - -``` -ADVERSARIAL REVIEW SYNTHESIS (auto: TIER, N lines): -════════════════════════════════════════════════════════════ - High confidence (found by multiple sources): [findings agreed on by >1 pass] - Unique to Claude structured review: [from earlier step] - Unique to Claude adversarial: [from subagent, if ran] - Unique to Codex: [from codex adversarial or code review, if ran] - Models used: Claude structured ✓ Claude adversarial ✓/✗ Codex ✓/✗ -════════════════════════════════════════════════════════════ -``` - -High-confidence findings (agreed on by multiple sources) should be prioritized for fixes. +If Codex is not available, skip this step silently. --- diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts index 27718933..fd10d20b 100644 --- a/scripts/gen-skill-docs.ts +++ b/scripts/gen-skill-docs.ts @@ -160,6 +160,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: \${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(${ctx.paths.binDir}/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(${ctx.paths.binDir}/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: \${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"${ctx.skillName}","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ${ctx.paths.binDir}/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -191,28 +197,31 @@ function generateTelemetryPrompt(ctx: TemplateContext): string { return `If \`TEL_PROMPTED\` is \`no\` AND \`LAKE_INTRO\` is \`yes\`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with \`gstack-config set telemetry off\`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run \`${ctx.paths.binDir}/gstack-config set telemetry community\` +If A: ask for their email via a follow-up AskUserQuestion, then run: +\`\`\`bash +${ctx.paths.binDir}/gstack-config set telemetry community +${ctx.paths.binDir}/gstack-auth +\`\`\` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run \`${ctx.paths.binDir}/gstack-config set telemetry anonymous\` -If B→B: run \`${ctx.paths.binDir}/gstack-config set telemetry off\` +If B: run \`${ctx.paths.binDir}/gstack-config set telemetry anonymous\` +If C: run \`${ctx.paths.binDir}/gstack-config set telemetry off\` Always run: \`\`\`bash @@ -222,6 +231,35 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If \`TEL_PROMPTED\` is \`yes\`, skip this entirely.`; } +function generateCommunityUpgradePrompt(ctx: TemplateContext): string { + return `If \`TELEMETRY\` is \`anonymous\` AND \`COMM_PROMPTED\` is \`no\`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run \`${ctx.paths.binDir}/gstack-auth \`. +Wait for the verification code. On success, run \`${ctx.paths.binDir}/gstack-config set telemetry community\`. +If B: do nothing. + +Always run: +\`\`\`bash +touch ~/.gstack/.community-prompted +\`\`\` + +This only happens once. If \`COMM_PROMPTED\` is \`yes\`, skip this entirely.`; +} + function generateAskUserFormat(_ctx: TemplateContext): string { return `## AskUserQuestion Format @@ -362,6 +400,16 @@ if it failed, abort if the user interrupted). preamble already writes to the same directory — this is the same pattern. Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- \`ERROR_CLASS\`: a short category — one of: \`timeout\`, \`test_failure\`, \`build_failure\`, + \`git_error\`, \`auth_error\`, \`network_error\`, \`browse_error\`, \`lint_error\`, + \`merge_conflict\`, \`permission_error\`, \`unknown_error\`. Pick the most specific match. +- \`ERROR_MESSAGE\`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: \`"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"\`. Never include file paths, secrets, or PII. +- \`FAILED_STEP\`: which step in the skill workflow failed. Example: \`"run_tests"\`, + \`"create_pr"\`, \`"merge_base"\`, \`"build"\`, \`"qa_browse"\`. Use snake_case, max 30 chars. + Run this bash: \`\`\`bash @@ -370,12 +418,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \\ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \\ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \\ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \\ + --failed-step "FAILED_STEP" 2>/dev/null & \`\`\` Replace \`SKILL_NAME\` with the actual skill name from frontmatter, \`OUTCOME\` with success/error/abort, and \`USED_BROWSE\` with true/false based on whether \`$B\` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For \`ERROR_CLASS\`, \`ERROR_MESSAGE\`, and \`FAILED_STEP\`: use empty string \`""\` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use \`"unknown_error"\`, \`""\`, and \`""\` respectively. This runs in the background and never blocks the user.`; } @@ -385,6 +437,7 @@ function generatePreamble(ctx: TemplateContext): string { generateUpgradeCheck(ctx), generateLakeIntro(), generateTelemetryPrompt(ctx), + generateCommunityUpgradePrompt(ctx), generateAskUserFormat(ctx), generateCompletenessSection(), generateSearchBeforeBuildingSection(ctx), diff --git a/setup-browser-cookies/SKILL.md b/setup-browser-cookies/SKILL.md index a98ebec1..306b0878 100644 --- a/setup-browser-cookies/SKILL.md +++ b/setup-browser-cookies/SKILL.md @@ -36,6 +36,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"setup-browser-cookies","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -61,28 +67,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -91,6 +100,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -128,26 +164,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -217,10 +233,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -230,12 +251,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. # Setup Browser Cookies diff --git a/ship/SKILL.md b/ship/SKILL.md index 6ad69ba7..2934777a 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -11,7 +11,6 @@ allowed-tools: - Edit - Grep - Glob - - Agent - AskUserQuestion - WebSearch --- @@ -40,6 +39,12 @@ _TEL_START=$(date +%s) _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" +_EMAIL=$(~/.claude/skills/gstack/bin/gstack-config get email 2>/dev/null || true) +_COMM_PROMPTED=$([ -f ~/.gstack/.community-prompted ] && echo "yes" || echo "no") +_AUTH_OK=$(~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null && echo "yes" || echo "no") +echo "EMAIL: ${_EMAIL:-none}" +echo "COMM_PROMPTED: $_COMM_PROMPTED" +echo "AUTH: $_AUTH_OK" mkdir -p ~/.gstack/analytics echo '{"skill":"ship","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true for _PF in ~/.gstack/analytics/.pending-*; do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done @@ -65,28 +70,31 @@ Only run `open` if the user says yes. Always run `touch` to mark as seen. This o If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, ask the user about telemetry. Use AskUserQuestion: -> Help gstack get better! Community mode shares usage data (which skills you use, how long -> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. -> No code, file paths, or repo names are ever sent. +> gstack can share usage data (which skills you use, how long they take, crash info) +> to help improve the project. No code, file paths, or repo names are ever sent. +> +> The **community tier** unlocks extra features: +> - **Cloud backup** of your gstack config + history (restore on new machines) +> - **Benchmarks**: see how your usage compares to other builders +> - **Skill recommendations** based on community patterns +> > Change anytime with `gstack-config set telemetry off`. Options: -- A) Help gstack get better! (recommended) -- B) No thanks +- A) Community — share data + email for backup, benchmarks & recommendations (recommended) +- B) Anonymous — share data only, no account +- C) No thanks -If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` +If A: ask for their email via a follow-up AskUserQuestion, then run: +```bash +~/.claude/skills/gstack/bin/gstack-config set telemetry community +~/.claude/skills/gstack/bin/gstack-auth +``` +The auth script will send a verification code to their email. Wait for them to enter the 6-digit code. +If auth succeeds, continue with the skill. If it fails, fall back to anonymous tier. -If B: ask a follow-up AskUserQuestion: - -> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, -> no way to connect sessions. Just a counter that helps us know if anyone's out there. - -Options: -- A) Sure, anonymous is fine -- B) No thanks, fully off - -If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` -If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` +If B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If C: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` Always run: ```bash @@ -95,6 +103,33 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `TELEMETRY` is `anonymous` AND `COMM_PROMPTED` is `no`: After the main skill workflow +begins (not during preamble), offer the community tier upgrade once. Use AskUserQuestion: + +> You're already sharing anonymous usage data — nice! Want to unlock more? +> +> The **community tier** adds: +> - Cloud backup of your gstack config (restore on new machines) +> - Benchmarks: see how your /qa times compare to the community +> - Skill recommendations based on what other builders use +> +> Just needs your email (verified via a one-time code). + +Options: +- A) Yes, join community (enter email) +- B) Not now + +If A: ask for their email, then run `~/.claude/skills/gstack/bin/gstack-auth `. +Wait for the verification code. On success, run `~/.claude/skills/gstack/bin/gstack-config set telemetry community`. +If B: do nothing. + +Always run: +```bash +touch ~/.gstack/.community-prompted +``` + +This only happens once. If `COMM_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -132,26 +167,6 @@ AI-assisted coding makes the marginal cost of completeness near-zero. When you p - BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) - BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." - ## Contributor Mode If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. @@ -221,10 +236,15 @@ Determine the skill name from the `name:` field in this file's YAML frontmatter. Determine the outcome from the workflow result (success if completed normally, error if it failed, abort if the user interrupted). -**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to -`~/.gstack/analytics/` (user config directory, not project files). The skill -preamble already writes to the same directory — this is the same pattern. -Skipping this command loses session duration and outcome data. +**For errors:** Also determine: +- `ERROR_CLASS`: a short category — one of: `timeout`, `test_failure`, `build_failure`, + `git_error`, `auth_error`, `network_error`, `browse_error`, `lint_error`, + `merge_conflict`, `permission_error`, `unknown_error`. Pick the most specific match. +- `ERROR_MESSAGE`: a one-line summary of what went wrong (max 200 chars). Include the + command that failed and the key error text. Example: `"bun test: 3 tests failed in + auth.test.ts — expected 200 got 401"`. Never include file paths, secrets, or PII. +- `FAILED_STEP`: which step in the skill workflow failed. Example: `"run_tests"`, + `"create_pr"`, `"merge_base"`, `"build"`, `"qa_browse"`. Use snake_case, max 30 chars. Run this bash: @@ -234,12 +254,16 @@ _TEL_DUR=$(( _TEL_END - _TEL_START )) rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true ~/.claude/skills/gstack/bin/gstack-telemetry-log \ --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ - --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" \ + --error-class "ERROR_CLASS" --error-message "ERROR_MESSAGE" \ + --failed-step "FAILED_STEP" 2>/dev/null & ``` Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. -If you cannot determine the outcome, use "unknown". This runs in the background and +For `ERROR_CLASS`, `ERROR_MESSAGE`, and `FAILED_STEP`: use empty string `""` if the +outcome is not error. If the outcome is error but you cannot determine the details, +use `"unknown_error"`, `""`, and `""` respectively. This runs in the background and never blocks the user. ## Step 0: Detect base branch @@ -305,7 +329,7 @@ After completing the review, read the review log and config to display the dashb ~/.claude/skills/gstack/bin/gstack-review-read ``` -Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, adversarial-review, codex-review). Ignore entries with timestamps older than 7 days. For the Adversarial row, show whichever is more recent between `adversarial-review` (new auto-scaled) and `codex-review` (legacy). For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: +Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, codex-review). Ignore entries with timestamps older than 7 days. For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: ``` +====================================================================+ @@ -316,7 +340,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl | Eng Review | 1 | 2026-03-16 15:00 | CLEAR | YES | | CEO Review | 0 | — | — | no | | Design Review | 0 | — | — | no | -| Adversarial | 0 | — | — | no | +| Codex Review | 0 | — | — | no | +--------------------------------------------------------------------+ | VERDICT: CLEARED — Eng Review passed | +====================================================================+ @@ -326,7 +350,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - **Eng Review (required by default):** The only review that gates shipping. Covers architecture, code quality, tests, performance. Can be disabled globally with \`gstack-config set skip_eng_review true\` (the "don't bother me" setting). - **CEO Review (optional):** Use your judgment. Recommend it for big product/business changes, new user-facing features, or scope decisions. Skip for bug fixes, refactors, infra, and cleanup. - **Design Review (optional):** Use your judgment. Recommend it for UI/UX changes. Skip for backend-only, infra, or prompt-only changes. -- **Adversarial Review (automatic):** Auto-scales by diff size. Small diffs (<50 lines) skip adversarial. Medium diffs (50–199) get cross-model adversarial. Large diffs (200+) get all 4 passes: Claude structured, Codex structured, Claude adversarial subagent, Codex adversarial. No configuration needed. +- **Codex Review (optional):** Independent second opinion from OpenAI Codex CLI. Shows pass/fail gate. Recommend for critical code changes where a second AI perspective adds value. Skip when Codex CLI is not installed. **Verdict logic:** - **CLEARED**: Eng Review has >= 1 entry within 7 days with status "clean" (or \`skip_eng_review\` is \`true\`) @@ -868,139 +892,41 @@ For each classified comment: --- -## Step 3.8: Adversarial review (auto-scaled) +## Step 3.8: Codex second opinion (optional) -Adversarial review thoroughness scales automatically based on diff size. No configuration needed. - -**Detect diff size and tool availability:** +Check if the Codex CLI is available: ```bash -DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") -DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0") -DIFF_TOTAL=$((DIFF_INS + DIFF_DEL)) which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" -# Respect old opt-out -OLD_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || true) -echo "DIFF_SIZE: $DIFF_TOTAL" -echo "OLD_CFG: ${OLD_CFG:-not_set}" ``` -If `OLD_CFG` is `disabled`: skip this step silently. Continue to the next step. +If Codex is available, use AskUserQuestion: -**User override:** If the user explicitly requested a specific tier (e.g., "run all passes", "paranoid review", "full adversarial", "do all 4 passes", "thorough review"), honor that request regardless of diff size. Jump to the matching tier section. +``` +Pre-landing review complete. Want an independent Codex (OpenAI) review before shipping? -**Auto-select tier based on diff size:** -- **Small (< 50 lines changed):** Skip adversarial review entirely. Print: "Small diff ($DIFF_TOTAL lines) — adversarial review skipped." Continue to the next step. -- **Medium (50–199 lines changed):** Run Codex adversarial challenge (or Claude adversarial subagent if Codex unavailable). Jump to the "Medium tier" section. -- **Large (200+ lines changed):** Run all remaining passes — Codex structured review + Claude adversarial subagent + Codex adversarial. Jump to the "Large tier" section. +A) Run Codex code review — independent diff review with pass/fail gate +B) Run Codex adversarial challenge — try to break this code +C) Skip — ship without Codex review +``` ---- +If the user chooses A or B: -### Medium tier (50–199 lines) - -Claude's structured review already ran. Now add a **cross-model adversarial challenge**. - -**If Codex is available:** run the Codex adversarial challenge. **If Codex is NOT available:** fall back to the Claude adversarial subagent instead. - -**Codex adversarial:** +**For code review (A):** Run `codex review --base ` with a 5-minute timeout. +Present the full output verbatim under a `CODEX SAYS:` header. Check for `[P1]` markers +to determine pass/fail gate. Persist the result: ```bash -TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) -codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_ADV" +~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"codex-review","timestamp":"TIMESTAMP","status":"STATUS","gate":"GATE"}' ``` -Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: -```bash -cat "$TMPERR_ADV" -``` +If GATE is FAIL, use AskUserQuestion: "Codex found critical issues. Ship anyway?" +If the user says no, stop. If yes, continue to Step 4. -Present the full output verbatim. This is informational — it never blocks shipping. +**For adversarial (B):** Run codex exec with the adversarial prompt (see /codex skill). +Present findings. This is informational — does not block shipping. -**Error handling:** All errors are non-blocking — adversarial review is a quality enhancement, not a prerequisite. -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \`codex login\` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response. Stderr: ." - -On any Codex error, fall back to the Claude adversarial subagent automatically. - -**Claude adversarial subagent** (fallback when Codex unavailable or errored): - -Dispatch via the Agent tool. The subagent has fresh context — no checklist bias from the structured review. This genuine independence catches things the primary reviewer is blind to. - -Subagent prompt: -"Read the diff for this branch with `git diff origin/`. Think like an attacker and a chaos engineer. Your job is to find ways this code will fail in production. Look for: edge cases, race conditions, security holes, resource leaks, failure modes, silent data corruption, logic errors that produce wrong results silently, error handling that swallows failures, and trust boundary violations. Be adversarial. Be thorough. No compliments — just the problems. For each finding, classify as FIXABLE (you know how to fix it) or INVESTIGATE (needs human judgment)." - -Present findings under an `ADVERSARIAL REVIEW (Claude subagent):` header. **FIXABLE findings** flow into the same Fix-First pipeline as the structured review. **INVESTIGATE findings** are presented as informational. - -If the subagent fails or times out: "Claude adversarial subagent unavailable. Continuing without adversarial review." - -**Persist the review result:** -```bash -~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"medium","commit":"'"$(git rev-parse --short HEAD)"'"}' -``` -Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "codex" if Codex ran, "claude" if subagent ran. If both failed, do NOT persist. - -**Cleanup:** Run `rm -f "$TMPERR_ADV"` after processing (if Codex was used). - ---- - -### Large tier (200+ lines) - -Claude's structured review already ran. Now run **all three remaining passes** for maximum coverage: - -**1. Codex structured review (if available):** -```bash -TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) -codex review --base -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR" -``` - -Use a 5-minute timeout. Present output under `CODEX SAYS (code review):` header. -Check for `[P1]` markers: found → `GATE: FAIL`, not found → `GATE: PASS`. - -If GATE is FAIL, use AskUserQuestion: -``` -Codex found N critical issues in the diff. - -A) Investigate and fix now (recommended) -B) Continue — review will still complete -``` - -If A: address the findings. After fixing, re-run tests (Step 3) since code has changed. Re-run `codex review` to verify. - -Read stderr for errors (same error handling as medium tier). - -After stderr: `rm -f "$TMPERR"` - -**2. Claude adversarial subagent:** Dispatch a subagent with the adversarial prompt (same prompt as medium tier). This always runs regardless of Codex availability. - -**3. Codex adversarial challenge (if available):** Run `codex exec` with the adversarial prompt (same as medium tier). - -If Codex is not available for steps 1 and 3, note to the user: "Codex CLI not found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install Codex for full 4-pass coverage: `npm install -g @openai/codex`" - -**Persist the review result AFTER all passes complete** (not after each sub-step): -```bash -~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"large","gate":"GATE","commit":"'"$(git rev-parse --short HEAD)"'"}' -``` -Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if Codex ran, "claude" if only Claude subagent ran. GATE = the Codex structured review gate result ("pass"/"fail"), or "informational" if Codex was unavailable. If all passes failed, do NOT persist. - ---- - -### Cross-model synthesis (medium and large tiers) - -After all passes complete, synthesize findings across all sources: - -``` -ADVERSARIAL REVIEW SYNTHESIS (auto: TIER, N lines): -════════════════════════════════════════════════════════════ - High confidence (found by multiple sources): [findings agreed on by >1 pass] - Unique to Claude structured review: [from earlier step] - Unique to Claude adversarial: [from subagent, if ran] - Unique to Codex: [from codex adversarial or code review, if ran] - Models used: Claude structured ✓ Claude adversarial ✓/✗ Codex ✓/✗ -════════════════════════════════════════════════════════════ -``` - -High-confidence findings (agreed on by multiple sources) should be prioritized for fixes. +If Codex is not available, skip silently. Continue to Step 4. --- @@ -1243,7 +1169,7 @@ doc updates — the user runs `/ship` and documentation stays current without a - **Never skip tests.** If tests fail, stop. - **Never skip the pre-landing review.** If checklist.md is unreadable, stop. - **Never force push.** Use regular `git push` only. -- **Never ask for trivial confirmations** (e.g., "ready to push?", "create PR?"). DO stop for: version bumps (MINOR/MAJOR), pre-landing review findings (ASK items), and Codex structured review [P1] findings (large diffs only). +- **Never ask for confirmation** except for MINOR/MAJOR version bumps and pre-landing review ASK items (batched into at most one AskUserQuestion). - **Always use the 4-digit version format** from the VERSION file. - **Date format in CHANGELOG:** `YYYY-MM-DD` - **Split commits for bisectability** — each commit = one logical change. diff --git a/supabase/functions/community-benchmarks/index.ts b/supabase/functions/community-benchmarks/index.ts new file mode 100644 index 00000000..76a89cdc --- /dev/null +++ b/supabase/functions/community-benchmarks/index.ts @@ -0,0 +1,108 @@ +// gstack community-benchmarks edge function +// Computes per-skill duration stats from telemetry_events (last 30 days). +// Upserts results into community_benchmarks table. +// Cached for 1 hour via Cache-Control header. + +import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; + +Deno.serve(async () => { + const supabase = createClient( + Deno.env.get("SUPABASE_URL") ?? "", + Deno.env.get("SUPABASE_SERVICE_ROLE_KEY") ?? "" + ); + + try { + const thirtyDaysAgo = new Date( + Date.now() - 30 * 24 * 60 * 60 * 1000 + ).toISOString(); + + // Fetch all skill_run events with duration from last 30 days + const { data: events, error } = await supabase + .from("telemetry_events") + .select("skill, duration_s, outcome") + .eq("event_type", "skill_run") + .not("duration_s", "is", null) + .not("skill", "is", null) + .gte("event_timestamp", thirtyDaysAgo) + .order("skill") + .limit(10000); + + if (error) throw error; + if (!events || events.length === 0) { + return new Response(JSON.stringify([]), { + status: 200, + headers: { + "Content-Type": "application/json", + "Cache-Control": "public, max-age=3600", + }, + }); + } + + // Group by skill and compute stats + const skillMap: Record< + string, + { durations: number[]; successes: number; total: number } + > = {}; + + for (const event of events) { + if (!event.skill || event.duration_s == null) continue; + if (!skillMap[event.skill]) { + skillMap[event.skill] = { durations: [], successes: 0, total: 0 }; + } + skillMap[event.skill].durations.push(Number(event.duration_s)); + skillMap[event.skill].total++; + if (event.outcome === "success") { + skillMap[event.skill].successes++; + } + } + + const benchmarks = Object.entries(skillMap) + .filter(([skill]) => !skill.startsWith("_")) // skip internal skills + .map(([skill, data]) => { + const sorted = data.durations.sort((a, b) => a - b); + const len = sorted.length; + const percentile = (p: number) => { + const idx = Math.floor((p / 100) * (len - 1)); + return sorted[idx] ?? 0; + }; + + return { + skill, + median_duration_s: percentile(50), + p25_duration_s: percentile(25), + p75_duration_s: percentile(75), + total_runs: data.total, + success_rate: + data.total > 0 + ? Math.round((data.successes / data.total) * 1000) / 10 + : 0, + updated_at: new Date().toISOString(), + }; + }); + + // Upsert into community_benchmarks table + if (benchmarks.length > 0) { + const { error: upsertError } = await supabase + .from("community_benchmarks") + .upsert(benchmarks, { onConflict: "skill" }); + + if (upsertError) { + console.error("Upsert error:", upsertError); + } + } + + return new Response(JSON.stringify(benchmarks), { + status: 200, + headers: { + "Content-Type": "application/json", + "Cache-Control": "public, max-age=3600", + }, + }); + } catch (err) { + console.error("Benchmarks error:", err); + return new Response(JSON.stringify([]), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + } +}); diff --git a/supabase/functions/community-pulse/index.ts b/supabase/functions/community-pulse/index.ts index 23e30202..cd7539d8 100644 --- a/supabase/functions/community-pulse/index.ts +++ b/supabase/functions/community-pulse/index.ts @@ -15,21 +15,40 @@ Deno.serve(async () => { const weekAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString(); const twoWeeksAgo = new Date(Date.now() - 14 * 24 * 60 * 60 * 1000).toISOString(); - // This week's active - const { count: thisWeek } = await supabase + // This week's active (update_checks) + const { count: thisWeekChecks } = await supabase .from("update_checks") .select("*", { count: "exact", head: true }) .gte("checked_at", weekAgo); // Last week's active (for change %) - const { count: lastWeek } = await supabase + const { count: lastWeekChecks } = await supabase .from("update_checks") .select("*", { count: "exact", head: true }) .gte("checked_at", twoWeeksAgo) .lt("checked_at", weekAgo); - const current = thisWeek ?? 0; - const previous = lastWeek ?? 0; + let current = thisWeekChecks ?? 0; + let previous = lastWeekChecks ?? 0; + + // Fallback: if update_checks is empty, count distinct sessions from telemetry_events + if (current === 0) { + const { data: thisWeekSessions } = await supabase + .from("telemetry_events") + .select("session_id") + .eq("event_type", "skill_run") + .gte("event_timestamp", weekAgo); + + const { data: lastWeekSessions } = await supabase + .from("telemetry_events") + .select("session_id") + .eq("event_type", "skill_run") + .gte("event_timestamp", twoWeeksAgo) + .lt("event_timestamp", weekAgo); + + current = new Set((thisWeekSessions ?? []).map((e: { session_id: string }) => e.session_id)).size; + previous = new Set((lastWeekSessions ?? []).map((e: { session_id: string }) => e.session_id)).size; + } const changePct = previous > 0 ? Math.round(((current - previous) / previous) * 100) : 0; diff --git a/supabase/functions/community-recommendations/index.ts b/supabase/functions/community-recommendations/index.ts new file mode 100644 index 00000000..29517763 --- /dev/null +++ b/supabase/functions/community-recommendations/index.ts @@ -0,0 +1,106 @@ +// gstack community-recommendations edge function +// Returns skill recommendations based on co-occurrence patterns. +// Input: ?skills=qa,ship (user's top skills as comma-separated query param) +// Output: top 3 recommended skills the user hasn't tried yet. +// Cached for 24 hours via Cache-Control header. + +import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; + +Deno.serve(async (req) => { + const supabase = createClient( + Deno.env.get("SUPABASE_URL") ?? "", + Deno.env.get("SUPABASE_SERVICE_ROLE_KEY") ?? "" + ); + + try { + const url = new URL(req.url); + const userSkills = (url.searchParams.get("skills") ?? "") + .split(",") + .map((s) => s.trim()) + .filter(Boolean); + + if (userSkills.length === 0) { + return new Response(JSON.stringify({ recommendations: [] }), { + status: 200, + headers: { + "Content-Type": "application/json", + "Cache-Control": "public, max-age=86400", + }, + }); + } + + // Query skill_sequences for co-occurring skills + const { data: sequences, error } = await supabase + .from("skill_sequences") + .select("skill_a, skill_b, co_occurrences") + .in("skill_a", userSkills) + .order("co_occurrences", { ascending: false }) + .limit(50); + + if (error) throw error; + + // Find skills the user hasn't used yet, ranked by co-occurrence + const userSkillSet = new Set(userSkills); + const recommendations: Record< + string, + { co_occurrences: number; paired_with: string[] } + > = {}; + + for (const seq of sequences ?? []) { + if (userSkillSet.has(seq.skill_b)) continue; // already used + if (seq.skill_b.startsWith("_")) continue; // skip internal + + if (!recommendations[seq.skill_b]) { + recommendations[seq.skill_b] = { + co_occurrences: 0, + paired_with: [], + }; + } + recommendations[seq.skill_b].co_occurrences += seq.co_occurrences; + recommendations[seq.skill_b].paired_with.push(seq.skill_a); + } + + // Also get total run counts for percentage calculation + const { data: benchmarks } = await supabase + .from("community_benchmarks") + .select("skill, total_runs"); + + const totalBySkill: Record = {}; + for (const b of benchmarks ?? []) { + totalBySkill[b.skill] = b.total_runs; + } + + // Build top 3 recommendations + const sorted = Object.entries(recommendations) + .sort(([, a], [, b]) => b.co_occurrences - a.co_occurrences) + .slice(0, 3) + .map(([skill, data]) => { + const pairedSkill = data.paired_with[0]; + const pairedTotal = totalBySkill[pairedSkill] ?? 0; + const pct = + pairedTotal > 0 + ? Math.round((data.co_occurrences / pairedTotal) * 100) + : 0; + + return { + skill, + reason: `used by ${pct}% of /${pairedSkill} users`, + co_occurrences: data.co_occurrences, + }; + }); + + return new Response(JSON.stringify({ recommendations: sorted }), { + status: 200, + headers: { + "Content-Type": "application/json", + "Cache-Control": "public, max-age=86400", + }, + }); + } catch (err) { + console.error("Recommendations error:", err); + return new Response(JSON.stringify({ recommendations: [] }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + } +}); diff --git a/supabase/functions/telemetry-ingest/index.ts b/supabase/functions/telemetry-ingest/index.ts index 07d65d36..248c7d91 100644 --- a/supabase/functions/telemetry-ingest/index.ts +++ b/supabase/functions/telemetry-ingest/index.ts @@ -16,6 +16,8 @@ interface TelemetryEvent { duration_s?: number; outcome: string; error_class?: string; + error_message?: string; + failed_step?: string; used_browse?: boolean; sessions?: number; installation_id?: string; @@ -77,6 +79,8 @@ Deno.serve(async (req) => { duration_s: typeof event.duration_s === "number" ? event.duration_s : null, outcome: String(event.outcome).slice(0, 20), error_class: event.error_class ? String(event.error_class).slice(0, 100) : null, + error_message: event.error_message ? String(event.error_message).slice(0, 200) : null, + failed_step: event.failed_step ? String(event.failed_step).slice(0, 30) : null, used_browse: event.used_browse === true, concurrent_sessions: typeof event.sessions === "number" ? event.sessions : 1, installation_id: event.installation_id ? String(event.installation_id).slice(0, 64) : null, diff --git a/supabase/migrations/002_community_tier.sql b/supabase/migrations/002_community_tier.sql new file mode 100644 index 00000000..3b46d847 --- /dev/null +++ b/supabase/migrations/002_community_tier.sql @@ -0,0 +1,43 @@ +-- gstack community tier schema +-- Adds authenticated backup, benchmarks, email, and richer error telemetry. + +-- Add error context columns to telemetry_events +ALTER TABLE telemetry_events ADD COLUMN error_message TEXT; +ALTER TABLE telemetry_events ADD COLUMN failed_step TEXT; + +-- Add columns to installations for backup + email + auth identity +ALTER TABLE installations ADD COLUMN user_id UUID; +ALTER TABLE installations ADD COLUMN email TEXT; +ALTER TABLE installations ADD COLUMN config_snapshot JSONB; +ALTER TABLE installations ADD COLUMN analytics_snapshot JSONB; +ALTER TABLE installations ADD COLUMN retro_history JSONB; +ALTER TABLE installations ADD COLUMN last_backup_at TIMESTAMPTZ; +ALTER TABLE installations ADD COLUMN backup_version INTEGER DEFAULT 0; + +-- RLS: authenticated users can read/write their own installation row +CREATE POLICY "auth_read_own" ON installations + FOR SELECT USING ( + (select auth.uid()) IS NOT NULL AND user_id = (select auth.uid()) + ); +CREATE POLICY "auth_write_own" ON installations + FOR INSERT WITH CHECK (user_id = (select auth.uid())); +CREATE POLICY "auth_update_own" ON installations + FOR UPDATE USING (user_id = (select auth.uid())) + WITH CHECK (user_id = (select auth.uid())); + +-- Community benchmarks (computed by edge function, cached) +CREATE TABLE community_benchmarks ( + skill TEXT PRIMARY KEY, + median_duration_s NUMERIC, + p25_duration_s NUMERIC, + p75_duration_s NUMERIC, + total_runs BIGINT, + success_rate NUMERIC, + updated_at TIMESTAMPTZ DEFAULT now() +); + +ALTER TABLE community_benchmarks ENABLE ROW LEVEL SECURITY; +CREATE POLICY "anon_select" ON community_benchmarks FOR SELECT USING (true); +CREATE POLICY "service_upsert" ON community_benchmarks FOR ALL + USING ((select auth.role()) = 'service_role') + WITH CHECK ((select auth.role()) = 'service_role'); diff --git a/test/helpers/session-runner.ts b/test/helpers/session-runner.ts index ab9e2ee5..5e0b057a 100644 --- a/test/helpers/session-runner.ts +++ b/test/helpers/session-runner.ts @@ -167,10 +167,16 @@ export async function runSkillTest(options: { const promptFile = path.join(os.tmpdir(), `.prompt-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2)}`); fs.writeFileSync(promptFile, prompt); + // Isolate telemetry: E2E tests use a temp state dir so they don't pollute + // production telemetry with test events (e.g. fake timeout crashes). + const testStateDir = path.join(os.tmpdir(), `gstack-e2e-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`); + fs.mkdirSync(testStateDir, { recursive: true }); + const proc = Bun.spawn(['sh', '-c', `cat "${promptFile}" | claude ${args.map(a => `"${a}"`).join(' ')}`], { cwd: workingDirectory, stdout: 'pipe', stderr: 'pipe', + env: { ...process.env, GSTACK_STATE_DIR: testStateDir }, }); // Race against timeout