diff --git a/SKILL.md b/SKILL.md index fa272905..ec0ed5aa 100644 --- a/SKILL.md +++ b/SKILL.md @@ -57,6 +57,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/autoplan/SKILL.md b/autoplan/SKILL.md index 50c2b30c..338a1af8 100644 --- a/autoplan/SKILL.md +++ b/autoplan/SKILL.md @@ -66,6 +66,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/benchmark/SKILL.md b/benchmark/SKILL.md index 51e39a10..10d71673 100644 --- a/benchmark/SKILL.md +++ b/benchmark/SKILL.md @@ -59,6 +59,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/browse/SKILL.md b/browse/SKILL.md index a9f95ec2..440871c8 100644 --- a/browse/SKILL.md +++ b/browse/SKILL.md @@ -59,6 +59,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/canary/SKILL.md b/canary/SKILL.md index ed814098..c91bf15d 100644 --- a/canary/SKILL.md +++ b/canary/SKILL.md @@ -59,6 +59,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/codex/SKILL.md b/codex/SKILL.md index 380382ff..a9f409cf 100644 --- a/codex/SKILL.md +++ b/codex/SKILL.md @@ -60,6 +60,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/connect-chrome/SKILL.md b/connect-chrome/SKILL.md index 57826bbd..58df84f9 100644 --- a/connect-chrome/SKILL.md +++ b/connect-chrome/SKILL.md @@ -57,6 +57,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/cso/SKILL.md b/cso/SKILL.md index 5e448639..14b712f4 100644 --- a/cso/SKILL.md +++ b/cso/SKILL.md @@ -63,6 +63,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not @@ -794,6 +803,31 @@ SECURITY FINDINGS 4 HIGH 9/10 UNVERIFIED Integrations Webhook w/o signature verify P6 api/webhooks.ts:24 ``` +## Confidence Calibration + +Every finding MUST include a confidence score (1-10): + +| Score | Meaning | Display rule | +|-------|---------|-------------| +| 9-10 | Verified by reading specific code. Concrete bug or exploit demonstrated. | Show normally | +| 7-8 | High confidence pattern match. Very likely correct. | Show normally | +| 5-6 | Moderate. Could be a false positive. | Show with caveat: "Medium confidence, verify this is actually an issue" | +| 3-4 | Low confidence. Pattern is suspicious but may be fine. | Suppress from main report. Include in appendix only. | +| 1-2 | Speculation. | Only report if severity would be P0. | + +**Finding format:** + +\`[SEVERITY] (confidence: N/10) file:line — description\` + +Example: +\`[P1] (confidence: 9/10) app/models/user.rb:42 — SQL injection via string interpolation in where clause\` +\`[P2] (confidence: 5/10) app/controllers/api/v1/users_controller.rb:18 — Possible N+1 query, verify with production logs\` + +**Calibration learning:** If you report a finding with confidence < 7 and the user +confirms it IS a real issue, that is a calibration event. Your initial confidence was +too low. Log the corrected pattern as a learning so future reviews catch it with +higher confidence. + For each finding: ``` ## Finding N: [Title] — [File:Line] diff --git a/cso/SKILL.md.tmpl b/cso/SKILL.md.tmpl index 676c1bd9..5bd86a9c 100644 --- a/cso/SKILL.md.tmpl +++ b/cso/SKILL.md.tmpl @@ -487,6 +487,8 @@ SECURITY FINDINGS 4 HIGH 9/10 UNVERIFIED Integrations Webhook w/o signature verify P6 api/webhooks.ts:24 ``` +{{CONFIDENCE_CALIBRATION}} + For each finding: ``` ## Finding N: [Title] — [File:Line] diff --git a/design-consultation/SKILL.md b/design-consultation/SKILL.md index 86971887..dc5c9ec7 100644 --- a/design-consultation/SKILL.md +++ b/design-consultation/SKILL.md @@ -64,6 +64,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/design-review/SKILL.md b/design-review/SKILL.md index fb082442..fa1f104f 100644 --- a/design-review/SKILL.md +++ b/design-review/SKILL.md @@ -64,6 +64,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/design-shotgun/SKILL.md b/design-shotgun/SKILL.md index 080754e6..e1e4d02d 100644 --- a/design-shotgun/SKILL.md +++ b/design-shotgun/SKILL.md @@ -61,6 +61,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/document-release/SKILL.md b/document-release/SKILL.md index 2758f0cd..f75962d7 100644 --- a/document-release/SKILL.md +++ b/document-release/SKILL.md @@ -61,6 +61,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/investigate/SKILL.md b/investigate/SKILL.md index 8e307dc0..365a9ca9 100644 --- a/investigate/SKILL.md +++ b/investigate/SKILL.md @@ -75,6 +75,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not @@ -367,6 +376,44 @@ Gather context before forming any hypothesis. 4. **Reproduce:** Can you trigger the bug deterministically? If not, gather more evidence before proceeding. +## Prior Learnings + +Search for relevant learnings from previous sessions: + +```bash +_CROSS_PROJ=$(~/.claude/skills/gstack/bin/gstack-config get cross_project_learnings 2>/dev/null || echo "unset") +echo "CROSS_PROJECT: $_CROSS_PROJ" +if [ "$_CROSS_PROJ" = "true" ]; then + ~/.claude/skills/gstack/bin/gstack-learnings-search --limit 10 --cross-project 2>/dev/null || true +else + ~/.claude/skills/gstack/bin/gstack-learnings-search --limit 10 2>/dev/null || true +fi +``` + +If `CROSS_PROJECT` is `unset` (first time): Use AskUserQuestion: + +> gstack can search learnings from your other projects on this machine to find +> patterns that might apply here. This stays local (no data leaves your machine). +> Recommended for solo developers. Skip if you work on multiple client codebases +> where cross-contamination would be a concern. + +Options: +- A) Enable cross-project learnings (recommended) +- B) Keep learnings project-scoped only + +If A: run `~/.claude/skills/gstack/bin/gstack-config set cross_project_learnings true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set cross_project_learnings false` + +Then re-run the search with the appropriate flag. + +If learnings are found, incorporate them into your analysis. When a review finding +matches a past learning, display: + +**"Prior learning applied: [key] (confidence N/10, from [date])"** + +This makes the compounding visible. The user should see that gstack is getting +smarter on their codebase over time. + Output: **"Root cause hypothesis: ..."** — a specific, testable claim about what is wrong and why. --- @@ -490,6 +537,30 @@ Status: DONE | DONE_WITH_CONCERNS | BLOCKED ════════════════════════════════════════ ``` +## Capture Learnings + +If you discovered a non-obvious pattern, pitfall, or architectural insight during +this session, log it for future sessions: + +```bash +~/.claude/skills/gstack/bin/gstack-learnings-log '{"skill":"investigate","type":"TYPE","key":"SHORT_KEY","insight":"DESCRIPTION","confidence":N,"source":"SOURCE","files":["path/to/relevant/file"]}' +``` + +**Types:** `pattern` (reusable approach), `pitfall` (what NOT to do), `preference` +(user stated), `architecture` (structural decision), `tool` (library/framework insight). + +**Sources:** `observed` (you found this in the code), `user-stated` (user told you), +`inferred` (AI deduction), `cross-model` (both Claude and Codex agree). + +**Confidence:** 1-10. Be honest. An observed pattern you verified in the code is 8-9. +An inference you're not sure about is 4-5. A user preference they explicitly stated is 10. + +**files:** Include the specific file paths this learning references. This enables +staleness detection: if those files are later deleted, the learning can be flagged. + +**Only log genuine discoveries.** Don't log obvious things. Don't log things the user +already knows. A good test: would this insight save time in a future session? If yes, log it. + --- ## Important Rules diff --git a/investigate/SKILL.md.tmpl b/investigate/SKILL.md.tmpl index d2eee63f..8ba17fb8 100644 --- a/investigate/SKILL.md.tmpl +++ b/investigate/SKILL.md.tmpl @@ -60,6 +60,8 @@ Gather context before forming any hypothesis. 4. **Reproduce:** Can you trigger the bug deterministically? If not, gather more evidence before proceeding. +{{LEARNINGS_SEARCH}} + Output: **"Root cause hypothesis: ..."** — a specific, testable claim about what is wrong and why. --- @@ -183,6 +185,8 @@ Status: DONE | DONE_WITH_CONCERNS | BLOCKED ════════════════════════════════════════ ``` +{{LEARNINGS_LOG}} + --- ## Important Rules diff --git a/land-and-deploy/SKILL.md b/land-and-deploy/SKILL.md index e54bb159..e36426f0 100644 --- a/land-and-deploy/SKILL.md +++ b/land-and-deploy/SKILL.md @@ -58,6 +58,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/office-hours/SKILL.md b/office-hours/SKILL.md index 34aa9070..d624dc37 100644 --- a/office-hours/SKILL.md +++ b/office-hours/SKILL.md @@ -66,6 +66,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not @@ -400,6 +409,44 @@ eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" ``` If design docs exist, list them: "Prior designs for this project: [titles + dates]" +## Prior Learnings + +Search for relevant learnings from previous sessions: + +```bash +_CROSS_PROJ=$(~/.claude/skills/gstack/bin/gstack-config get cross_project_learnings 2>/dev/null || echo "unset") +echo "CROSS_PROJECT: $_CROSS_PROJ" +if [ "$_CROSS_PROJ" = "true" ]; then + ~/.claude/skills/gstack/bin/gstack-learnings-search --limit 10 --cross-project 2>/dev/null || true +else + ~/.claude/skills/gstack/bin/gstack-learnings-search --limit 10 2>/dev/null || true +fi +``` + +If `CROSS_PROJECT` is `unset` (first time): Use AskUserQuestion: + +> gstack can search learnings from your other projects on this machine to find +> patterns that might apply here. This stays local (no data leaves your machine). +> Recommended for solo developers. Skip if you work on multiple client codebases +> where cross-contamination would be a concern. + +Options: +- A) Enable cross-project learnings (recommended) +- B) Keep learnings project-scoped only + +If A: run `~/.claude/skills/gstack/bin/gstack-config set cross_project_learnings true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set cross_project_learnings false` + +Then re-run the search with the appropriate flag. + +If learnings are found, incorporate them into your analysis. When a review finding +matches a past learning, display: + +**"Prior learning applied: [key] (confidence N/10, from [date])"** + +This makes the compounding visible. The user should see that gstack is getting +smarter on their codebase over time. + 5. **Ask: what's your goal with this?** This is a real question, not a formality. The answer determines everything about how the session runs. Via AskUserQuestion, ask: diff --git a/office-hours/SKILL.md.tmpl b/office-hours/SKILL.md.tmpl index 4b5a5e19..358f8290 100644 --- a/office-hours/SKILL.md.tmpl +++ b/office-hours/SKILL.md.tmpl @@ -53,6 +53,8 @@ Understand the project and the area the user wants to change. ``` If design docs exist, list them: "Prior designs for this project: [titles + dates]" +{{LEARNINGS_SEARCH}} + 5. **Ask: what's your goal with this?** This is a real question, not a formality. The answer determines everything about how the session runs. Via AskUserQuestion, ask: diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md index f208894c..0090752c 100644 --- a/plan-ceo-review/SKILL.md +++ b/plan-ceo-review/SKILL.md @@ -64,6 +64,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not @@ -603,6 +612,44 @@ Run the three-layer synthesis: Feed into the Premise Challenge (0A) and Dream State Mapping (0C). If you find a eureka moment, surface it during the Expansion opt-in ceremony as a differentiation opportunity. Log it (see preamble). +## Prior Learnings + +Search for relevant learnings from previous sessions: + +```bash +_CROSS_PROJ=$(~/.claude/skills/gstack/bin/gstack-config get cross_project_learnings 2>/dev/null || echo "unset") +echo "CROSS_PROJECT: $_CROSS_PROJ" +if [ "$_CROSS_PROJ" = "true" ]; then + ~/.claude/skills/gstack/bin/gstack-learnings-search --limit 10 --cross-project 2>/dev/null || true +else + ~/.claude/skills/gstack/bin/gstack-learnings-search --limit 10 2>/dev/null || true +fi +``` + +If `CROSS_PROJECT` is `unset` (first time): Use AskUserQuestion: + +> gstack can search learnings from your other projects on this machine to find +> patterns that might apply here. This stays local (no data leaves your machine). +> Recommended for solo developers. Skip if you work on multiple client codebases +> where cross-contamination would be a concern. + +Options: +- A) Enable cross-project learnings (recommended) +- B) Keep learnings project-scoped only + +If A: run `~/.claude/skills/gstack/bin/gstack-config set cross_project_learnings true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set cross_project_learnings false` + +Then re-run the search with the appropriate flag. + +If learnings are found, incorporate them into your analysis. When a review finding +matches a past learning, display: + +**"Prior learning applied: [key] (confidence N/10, from [date])"** + +This makes the compounding visible. The user should see that gstack is getting +smarter on their codebase over time. + ## Step 0: Nuclear Scope Challenge + Mode Selection ### 0A. Premise Challenge diff --git a/plan-ceo-review/SKILL.md.tmpl b/plan-ceo-review/SKILL.md.tmpl index 8f6aebe3..d32a076e 100644 --- a/plan-ceo-review/SKILL.md.tmpl +++ b/plan-ceo-review/SKILL.md.tmpl @@ -191,6 +191,8 @@ Run the three-layer synthesis: Feed into the Premise Challenge (0A) and Dream State Mapping (0C). If you find a eureka moment, surface it during the Expansion opt-in ceremony as a differentiation opportunity. Log it (see preamble). +{{LEARNINGS_SEARCH}} + ## Step 0: Nuclear Scope Challenge + Mode Selection ### 0A. Premise Challenge diff --git a/plan-design-review/SKILL.md b/plan-design-review/SKILL.md index 902055a0..a6be9782 100644 --- a/plan-design-review/SKILL.md +++ b/plan-design-review/SKILL.md @@ -62,6 +62,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md index c0086931..52b6993d 100644 --- a/plan-eng-review/SKILL.md +++ b/plan-eng-review/SKILL.md @@ -63,6 +63,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not @@ -485,6 +494,44 @@ Always work through the full interactive review: one section at a time (Architec ## Review Sections (after scope is agreed) +## Prior Learnings + +Search for relevant learnings from previous sessions: + +```bash +_CROSS_PROJ=$(~/.claude/skills/gstack/bin/gstack-config get cross_project_learnings 2>/dev/null || echo "unset") +echo "CROSS_PROJECT: $_CROSS_PROJ" +if [ "$_CROSS_PROJ" = "true" ]; then + ~/.claude/skills/gstack/bin/gstack-learnings-search --limit 10 --cross-project 2>/dev/null || true +else + ~/.claude/skills/gstack/bin/gstack-learnings-search --limit 10 2>/dev/null || true +fi +``` + +If `CROSS_PROJECT` is `unset` (first time): Use AskUserQuestion: + +> gstack can search learnings from your other projects on this machine to find +> patterns that might apply here. This stays local (no data leaves your machine). +> Recommended for solo developers. Skip if you work on multiple client codebases +> where cross-contamination would be a concern. + +Options: +- A) Enable cross-project learnings (recommended) +- B) Keep learnings project-scoped only + +If A: run `~/.claude/skills/gstack/bin/gstack-config set cross_project_learnings true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set cross_project_learnings false` + +Then re-run the search with the appropriate flag. + +If learnings are found, incorporate them into your analysis. When a review finding +matches a past learning, display: + +**"Prior learning applied: [key] (confidence N/10, from [date])"** + +This makes the compounding visible. The user should see that gstack is getting +smarter on their codebase over time. + ### 1. Architecture review Evaluate: * Overall system design and component boundaries. @@ -498,6 +545,31 @@ Evaluate: **STOP.** For each issue found in this section, call AskUserQuestion individually. One issue per call. Present options, state your recommendation, explain WHY. Do NOT batch multiple issues into one AskUserQuestion. Only proceed to the next section after ALL issues in this section are resolved. +## Confidence Calibration + +Every finding MUST include a confidence score (1-10): + +| Score | Meaning | Display rule | +|-------|---------|-------------| +| 9-10 | Verified by reading specific code. Concrete bug or exploit demonstrated. | Show normally | +| 7-8 | High confidence pattern match. Very likely correct. | Show normally | +| 5-6 | Moderate. Could be a false positive. | Show with caveat: "Medium confidence, verify this is actually an issue" | +| 3-4 | Low confidence. Pattern is suspicious but may be fine. | Suppress from main report. Include in appendix only. | +| 1-2 | Speculation. | Only report if severity would be P0. | + +**Finding format:** + +\`[SEVERITY] (confidence: N/10) file:line — description\` + +Example: +\`[P1] (confidence: 9/10) app/models/user.rb:42 — SQL injection via string interpolation in where clause\` +\`[P2] (confidence: 5/10) app/controllers/api/v1/users_controller.rb:18 — Possible N+1 query, verify with production logs\` + +**Calibration learning:** If you report a finding with confidence < 7 and the user +confirms it IS a real issue, that is a calibration event. Your initial confidence was +too low. Log the corrected pattern as a learning so future reviews catch it with +higher confidence. + ### 2. Code quality review Evaluate: * Code organization and module structure. diff --git a/plan-eng-review/SKILL.md.tmpl b/plan-eng-review/SKILL.md.tmpl index c91e96d7..415315a8 100644 --- a/plan-eng-review/SKILL.md.tmpl +++ b/plan-eng-review/SKILL.md.tmpl @@ -110,6 +110,8 @@ Always work through the full interactive review: one section at a time (Architec ## Review Sections (after scope is agreed) +{{LEARNINGS_SEARCH}} + ### 1. Architecture review Evaluate: * Overall system design and component boundaries. @@ -123,6 +125,8 @@ Evaluate: **STOP.** For each issue found in this section, call AskUserQuestion individually. One issue per call. Present options, state your recommendation, explain WHY. Do NOT batch multiple issues into one AskUserQuestion. Only proceed to the next section after ALL issues in this section are resolved. +{{CONFIDENCE_CALIBRATION}} + ### 2. Code quality review Evaluate: * Code organization and module structure. diff --git a/qa-only/SKILL.md b/qa-only/SKILL.md index 6161dc31..3aa96042 100644 --- a/qa-only/SKILL.md +++ b/qa-only/SKILL.md @@ -59,6 +59,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/qa/SKILL.md b/qa/SKILL.md index bf532784..89d281e4 100644 --- a/qa/SKILL.md +++ b/qa/SKILL.md @@ -65,6 +65,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/retro/SKILL.md b/retro/SKILL.md index 3ebc40fe..41058c8c 100644 --- a/retro/SKILL.md +++ b/retro/SKILL.md @@ -59,6 +59,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not @@ -621,6 +630,30 @@ For each contributor (including the current user), compute: **If there are Co-Authored-By trailers:** Parse `Co-Authored-By:` lines in commit messages. Credit those authors for the commit alongside the primary author. Note AI co-authors (e.g., `noreply@anthropic.com`) but do not include them as team members — instead, track "AI-assisted commits" as a separate metric. +## Capture Learnings + +If you discovered a non-obvious pattern, pitfall, or architectural insight during +this session, log it for future sessions: + +```bash +~/.claude/skills/gstack/bin/gstack-learnings-log '{"skill":"retro","type":"TYPE","key":"SHORT_KEY","insight":"DESCRIPTION","confidence":N,"source":"SOURCE","files":["path/to/relevant/file"]}' +``` + +**Types:** `pattern` (reusable approach), `pitfall` (what NOT to do), `preference` +(user stated), `architecture` (structural decision), `tool` (library/framework insight). + +**Sources:** `observed` (you found this in the code), `user-stated` (user told you), +`inferred` (AI deduction), `cross-model` (both Claude and Codex agree). + +**Confidence:** 1-10. Be honest. An observed pattern you verified in the code is 8-9. +An inference you're not sure about is 4-5. A user preference they explicitly stated is 10. + +**files:** Include the specific file paths this learning references. This enables +staleness detection: if those files are later deleted, the learning can be flagged. + +**Only log genuine discoveries.** Don't log obvious things. Don't log things the user +already knows. A good test: would this insight save time in a future session? If yes, log it. + ### Step 10: Week-over-Week Trends (if window >= 14d) If the time window is 14 days or more, split into weekly buckets and show trends: diff --git a/retro/SKILL.md.tmpl b/retro/SKILL.md.tmpl index 5463d07a..b1b44ee1 100644 --- a/retro/SKILL.md.tmpl +++ b/retro/SKILL.md.tmpl @@ -277,6 +277,8 @@ For each contributor (including the current user), compute: **If there are Co-Authored-By trailers:** Parse `Co-Authored-By:` lines in commit messages. Credit those authors for the commit alongside the primary author. Note AI co-authors (e.g., `noreply@anthropic.com`) but do not include them as team members — instead, track "AI-assisted commits" as a separate metric. +{{LEARNINGS_LOG}} + ### Step 10: Week-over-Week Trends (if window >= 14d) If the time window is 14 days or more, split into weekly buckets and show trends: diff --git a/review/SKILL.md b/review/SKILL.md index 9b47b690..52560d77 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -62,6 +62,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not @@ -582,6 +591,44 @@ Run `git diff origin/` to get the full diff. This includes both committed --- +## Prior Learnings + +Search for relevant learnings from previous sessions: + +```bash +_CROSS_PROJ=$(~/.claude/skills/gstack/bin/gstack-config get cross_project_learnings 2>/dev/null || echo "unset") +echo "CROSS_PROJECT: $_CROSS_PROJ" +if [ "$_CROSS_PROJ" = "true" ]; then + ~/.claude/skills/gstack/bin/gstack-learnings-search --limit 10 --cross-project 2>/dev/null || true +else + ~/.claude/skills/gstack/bin/gstack-learnings-search --limit 10 2>/dev/null || true +fi +``` + +If `CROSS_PROJECT` is `unset` (first time): Use AskUserQuestion: + +> gstack can search learnings from your other projects on this machine to find +> patterns that might apply here. This stays local (no data leaves your machine). +> Recommended for solo developers. Skip if you work on multiple client codebases +> where cross-contamination would be a concern. + +Options: +- A) Enable cross-project learnings (recommended) +- B) Keep learnings project-scoped only + +If A: run `~/.claude/skills/gstack/bin/gstack-config set cross_project_learnings true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set cross_project_learnings false` + +Then re-run the search with the appropriate flag. + +If learnings are found, incorporate them into your analysis. When a review finding +matches a past learning, display: + +**"Prior learning applied: [key] (confidence N/10, from [date])"** + +This makes the compounding visible. The user should see that gstack is getting +smarter on their codebase over time. + ## Step 4: Two-pass review Apply the checklist against the diff in two passes: @@ -600,6 +647,31 @@ Takes seconds, prevents recommending outdated patterns. If WebSearch is unavaila Follow the output format specified in the checklist. Respect the suppressions — do NOT flag items listed in the "DO NOT flag" section. +## Confidence Calibration + +Every finding MUST include a confidence score (1-10): + +| Score | Meaning | Display rule | +|-------|---------|-------------| +| 9-10 | Verified by reading specific code. Concrete bug or exploit demonstrated. | Show normally | +| 7-8 | High confidence pattern match. Very likely correct. | Show normally | +| 5-6 | Moderate. Could be a false positive. | Show with caveat: "Medium confidence, verify this is actually an issue" | +| 3-4 | Low confidence. Pattern is suspicious but may be fine. | Suppress from main report. Include in appendix only. | +| 1-2 | Speculation. | Only report if severity would be P0. | + +**Finding format:** + +\`[SEVERITY] (confidence: N/10) file:line — description\` + +Example: +\`[P1] (confidence: 9/10) app/models/user.rb:42 — SQL injection via string interpolation in where clause\` +\`[P2] (confidence: 5/10) app/controllers/api/v1/users_controller.rb:18 — Possible N+1 query, verify with production logs\` + +**Calibration learning:** If you report a finding with confidence < 7 and the user +confirms it IS a real issue, that is a calibration event. Your initial confidence was +too low. Log the corrected pattern as a learning so future reviews catch it with +higher confidence. + --- ## Step 4.5: Design Review (conditional) @@ -1127,6 +1199,30 @@ Substitute: - `informational` = remaining unresolved informational findings - `COMMIT` = output of `git rev-parse --short HEAD` +## Capture Learnings + +If you discovered a non-obvious pattern, pitfall, or architectural insight during +this session, log it for future sessions: + +```bash +~/.claude/skills/gstack/bin/gstack-learnings-log '{"skill":"review","type":"TYPE","key":"SHORT_KEY","insight":"DESCRIPTION","confidence":N,"source":"SOURCE","files":["path/to/relevant/file"]}' +``` + +**Types:** `pattern` (reusable approach), `pitfall` (what NOT to do), `preference` +(user stated), `architecture` (structural decision), `tool` (library/framework insight). + +**Sources:** `observed` (you found this in the code), `user-stated` (user told you), +`inferred` (AI deduction), `cross-model` (both Claude and Codex agree). + +**Confidence:** 1-10. Be honest. An observed pattern you verified in the code is 8-9. +An inference you're not sure about is 4-5. A user preference they explicitly stated is 10. + +**files:** Include the specific file paths this learning references. This enables +staleness detection: if those files are later deleted, the learning can be flagged. + +**Only log genuine discoveries.** Don't log obvious things. Don't log things the user +already knows. A good test: would this insight save time in a future session? If yes, log it. + If the review exits early before a real review completes (for example, no diff against the base branch), do **not** write this entry. ## Important Rules diff --git a/review/SKILL.md.tmpl b/review/SKILL.md.tmpl index bb9a3bc7..fa14f26a 100644 --- a/review/SKILL.md.tmpl +++ b/review/SKILL.md.tmpl @@ -104,6 +104,8 @@ Run `git diff origin/` to get the full diff. This includes both committed --- +{{LEARNINGS_SEARCH}} + ## Step 4: Two-pass review Apply the checklist against the diff in two passes: @@ -122,6 +124,8 @@ Takes seconds, prevents recommending outdated patterns. If WebSearch is unavaila Follow the output format specified in the checklist. Respect the suppressions — do NOT flag items listed in the "DO NOT flag" section. +{{CONFIDENCE_CALIBRATION}} + --- ## Step 4.5: Design Review (conditional) @@ -273,6 +277,8 @@ Substitute: - `informational` = remaining unresolved informational findings - `COMMIT` = output of `git rev-parse --short HEAD` +{{LEARNINGS_LOG}} + If the review exits early before a real review completes (for example, no diff against the base branch), do **not** write this entry. ## Important Rules diff --git a/setup-browser-cookies/SKILL.md b/setup-browser-cookies/SKILL.md index 69617692..3272d613 100644 --- a/setup-browser-cookies/SKILL.md +++ b/setup-browser-cookies/SKILL.md @@ -56,6 +56,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/setup-deploy/SKILL.md b/setup-deploy/SKILL.md index a0ff129c..3c353e91 100644 --- a/setup-deploy/SKILL.md +++ b/setup-deploy/SKILL.md @@ -62,6 +62,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not diff --git a/ship/SKILL.md b/ship/SKILL.md index de2743f8..a4ff1bd2 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -60,6 +60,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null fi break done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" +else + echo "LEARNINGS: 0" +fi ``` If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not @@ -1318,6 +1327,44 @@ Add a `## Verification Results` section to the PR body (Step 8): - If verification ran: summary of results (N PASS, M FAIL, K SKIPPED) - If skipped: reason for skipping (no plan, no server, no verification section) +## Prior Learnings + +Search for relevant learnings from previous sessions: + +```bash +_CROSS_PROJ=$(~/.claude/skills/gstack/bin/gstack-config get cross_project_learnings 2>/dev/null || echo "unset") +echo "CROSS_PROJECT: $_CROSS_PROJ" +if [ "$_CROSS_PROJ" = "true" ]; then + ~/.claude/skills/gstack/bin/gstack-learnings-search --limit 10 --cross-project 2>/dev/null || true +else + ~/.claude/skills/gstack/bin/gstack-learnings-search --limit 10 2>/dev/null || true +fi +``` + +If `CROSS_PROJECT` is `unset` (first time): Use AskUserQuestion: + +> gstack can search learnings from your other projects on this machine to find +> patterns that might apply here. This stays local (no data leaves your machine). +> Recommended for solo developers. Skip if you work on multiple client codebases +> where cross-contamination would be a concern. + +Options: +- A) Enable cross-project learnings (recommended) +- B) Keep learnings project-scoped only + +If A: run `~/.claude/skills/gstack/bin/gstack-config set cross_project_learnings true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set cross_project_learnings false` + +Then re-run the search with the appropriate flag. + +If learnings are found, incorporate them into your analysis. When a review finding +matches a past learning, display: + +**"Prior learning applied: [key] (confidence N/10, from [date])"** + +This makes the compounding visible. The user should see that gstack is getting +smarter on their codebase over time. + --- ## Step 3.5: Pre-Landing Review @@ -1332,6 +1379,31 @@ Review the diff for structural issues that tests don't catch. - **Pass 1 (CRITICAL):** SQL & Data Safety, LLM Output Trust Boundary - **Pass 2 (INFORMATIONAL):** All remaining categories +## Confidence Calibration + +Every finding MUST include a confidence score (1-10): + +| Score | Meaning | Display rule | +|-------|---------|-------------| +| 9-10 | Verified by reading specific code. Concrete bug or exploit demonstrated. | Show normally | +| 7-8 | High confidence pattern match. Very likely correct. | Show normally | +| 5-6 | Moderate. Could be a false positive. | Show with caveat: "Medium confidence, verify this is actually an issue" | +| 3-4 | Low confidence. Pattern is suspicious but may be fine. | Suppress from main report. Include in appendix only. | +| 1-2 | Speculation. | Only report if severity would be P0. | + +**Finding format:** + +\`[SEVERITY] (confidence: N/10) file:line — description\` + +Example: +\`[P1] (confidence: 9/10) app/models/user.rb:42 — SQL injection via string interpolation in where clause\` +\`[P2] (confidence: 5/10) app/controllers/api/v1/users_controller.rb:18 — Possible N+1 query, verify with production logs\` + +**Calibration learning:** If you report a finding with confidence < 7 and the user +confirms it IS a real issue, that is a calibration event. Your initial confidence was +too low. Log the corrected pattern as a learning so future reviews catch it with +higher confidence. + ## Design Review (conditional, diff-scoped) Check if the diff touches frontend files using `gstack-diff-scope`: @@ -1599,6 +1671,30 @@ High-confidence findings (agreed on by multiple sources) should be prioritized f --- +## Capture Learnings + +If you discovered a non-obvious pattern, pitfall, or architectural insight during +this session, log it for future sessions: + +```bash +~/.claude/skills/gstack/bin/gstack-learnings-log '{"skill":"ship","type":"TYPE","key":"SHORT_KEY","insight":"DESCRIPTION","confidence":N,"source":"SOURCE","files":["path/to/relevant/file"]}' +``` + +**Types:** `pattern` (reusable approach), `pitfall` (what NOT to do), `preference` +(user stated), `architecture` (structural decision), `tool` (library/framework insight). + +**Sources:** `observed` (you found this in the code), `user-stated` (user told you), +`inferred` (AI deduction), `cross-model` (both Claude and Codex agree). + +**Confidence:** 1-10. Be honest. An observed pattern you verified in the code is 8-9. +An inference you're not sure about is 4-5. A user preference they explicitly stated is 10. + +**files:** Include the specific file paths this learning references. This enables +staleness detection: if those files are later deleted, the learning can be flagged. + +**Only log genuine discoveries.** Don't log obvious things. Don't log things the user +already knows. A good test: would this insight save time in a future session? If yes, log it. + ## Step 4: Version bump (auto-decide) 1. Read the current `VERSION` file (4-digit format: `MAJOR.MINOR.PATCH.MICRO`) diff --git a/ship/SKILL.md.tmpl b/ship/SKILL.md.tmpl index 62842fc5..09f3c3e2 100644 --- a/ship/SKILL.md.tmpl +++ b/ship/SKILL.md.tmpl @@ -226,6 +226,8 @@ If multiple suites need to run, run them sequentially (each needs a test lane). {{PLAN_VERIFICATION_EXEC}} +{{LEARNINGS_SEARCH}} + --- ## Step 3.5: Pre-Landing Review @@ -240,6 +242,8 @@ Review the diff for structural issues that tests don't catch. - **Pass 1 (CRITICAL):** SQL & Data Safety, LLM Output Trust Boundary - **Pass 2 (INFORMATIONAL):** All remaining categories +{{CONFIDENCE_CALIBRATION}} + {{DESIGN_REVIEW_LITE}} Include any design findings alongside the code review findings. They follow the same Fix-First flow below. @@ -316,6 +320,8 @@ For each classified comment: {{ADVERSARIAL_STEP}} +{{LEARNINGS_LOG}} + ## Step 4: Version bump (auto-decide) 1. Read the current `VERSION` file (4-digit format: `MAJOR.MINOR.PATCH.MICRO`)