Merge origin/main, resolve CHANGELOG conflict, bump to v0.13.7.0

Main landed v0.13.6.0 (GStack Learns) while this branch had v0.13.6.0 (Community Wave). Resolved by keeping both entries and bumping this branch to v0.13.7.0. Regenerated SKILL.md files to pick up new learn skill and apply the find -exec rm fix from this branch. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-06 21:46:40 +02:00 · 2026-03-29 20:03:18 -07:00
parent fed92d053c ae0a9ad195
commit f21edb5943
49 changed files with 2379 additions and 3 deletions
@@ -0,0 +1,37 @@
+/**
+ * Confidence calibration resolver
+ *
+ * Adds confidence scoring rubric to review-producing skills.
+ * Every finding includes a 1-10 score that gates display:
+ *   7+: show normally
+ *   5-6: show with caveat
+ *   <5: suppress from main report
+ */
+import type { TemplateContext } from './types';
+
+export function generateConfidenceCalibration(_ctx: TemplateContext): string {
+  return `## Confidence Calibration
+
+Every finding MUST include a confidence score (1-10):
+
+| Score | Meaning | Display rule |
+|-------|---------|-------------|
+| 9-10 | Verified by reading specific code. Concrete bug or exploit demonstrated. | Show normally |
+| 7-8 | High confidence pattern match. Very likely correct. | Show normally |
+| 5-6 | Moderate. Could be a false positive. | Show with caveat: "Medium confidence, verify this is actually an issue" |
+| 3-4 | Low confidence. Pattern is suspicious but may be fine. | Suppress from main report. Include in appendix only. |
+| 1-2 | Speculation. | Only report if severity would be P0. |
+
+**Finding format:**
+
+\\\`[SEVERITY] (confidence: N/10) file:line — description\\\`
+
+Example:
+\\\`[P1] (confidence: 9/10) app/models/user.rb:42 — SQL injection via string interpolation in where clause\\\`
+\\\`[P2] (confidence: 5/10) app/controllers/api/v1/users_controller.rb:18 — Possible N+1 query, verify with production logs\\\`
+
+**Calibration learning:** If you report a finding with confidence < 7 and the user
+confirms it IS a real issue, that is a calibration event. Your initial confidence was
+too low. Log the corrected pattern as a learning so future reviews catch it with
+higher confidence.`;
+}
@@ -13,6 +13,8 @@ import { generateDesignMethodology, generateDesignHardRules, generateDesignOutsi
 import { generateTestBootstrap, generateTestCoverageAuditPlan, generateTestCoverageAuditShip, generateTestCoverageAuditReview } from './testing';
 import { generateReviewDashboard, generatePlanFileReviewReport, generateSpecReviewLoop, generateBenefitsFrom, generateCodexSecondOpinion, generateAdversarialStep, generateCodexPlanReview, generatePlanCompletionAuditShip, generatePlanCompletionAuditReview, generatePlanVerificationExec } from './review';
 import { generateSlugEval, generateSlugSetup, generateBaseBranchDetect, generateDeployBootstrap, generateQAMethodology, generateCoAuthorTrailer } from './utility';
+import { generateLearningsSearch, generateLearningsLog } from './learnings';
+import { generateConfidenceCalibration } from './confidence';

 export const RESOLVERS: Record<string, (ctx: TemplateContext) => string> = {
  SLUG_EVAL: generateSlugEval,
@@ -48,4 +50,7 @@ export const RESOLVERS: Record<string, (ctx: TemplateContext) => string> = {
  PLAN_COMPLETION_AUDIT_REVIEW: generatePlanCompletionAuditReview,
  PLAN_VERIFICATION_EXEC: generatePlanVerificationExec,
  CO_AUTHOR_TRAILER: generateCoAuthorTrailer,
+  LEARNINGS_SEARCH: generateLearningsSearch,
+  LEARNINGS_LOG: generateLearningsLog,
+  CONFIDENCE_CALIBRATION: generateConfidenceCalibration,
 };
@@ -0,0 +1,96 @@
+/**
+ * Learnings resolver — cross-skill institutional memory
+ *
+ * Learnings are stored per-project at ~/.gstack/projects/{slug}/learnings.jsonl.
+ * Each entry is a JSONL line with: ts, skill, type, key, insight, confidence,
+ * source, branch, commit, files[].
+ *
+ * Storage is append-only. Duplicates (same key+type) are resolved at read time
+ * by gstack-learnings-search ("latest winner" per key+type).
+ *
+ * Cross-project discovery is opt-in. The resolver asks the user once via
+ * AskUserQuestion and persists the preference via gstack-config.
+ */
+import type { TemplateContext } from './types';
+
+export function generateLearningsSearch(ctx: TemplateContext): string {
+  if (ctx.host === 'codex') {
+    // Codex: simpler version, no cross-project, uses $GSTACK_BIN
+    return `## Prior Learnings
+
+Search for relevant learnings from previous sessions on this project:
+
+\`\`\`bash
+$GSTACK_BIN/gstack-learnings-search --limit 10 2>/dev/null || true
+\`\`\`
+
+If learnings are found, incorporate them into your analysis. When a review finding
+matches a past learning, note it: "Prior learning applied: [key] (confidence N, from [date])"`;
+  }
+
+  return `## Prior Learnings
+
+Search for relevant learnings from previous sessions:
+
+\`\`\`bash
+_CROSS_PROJ=$(${ctx.paths.binDir}/gstack-config get cross_project_learnings 2>/dev/null || echo "unset")
+echo "CROSS_PROJECT: $_CROSS_PROJ"
+if [ "$_CROSS_PROJ" = "true" ]; then
+  ${ctx.paths.binDir}/gstack-learnings-search --limit 10 --cross-project 2>/dev/null || true
+else
+  ${ctx.paths.binDir}/gstack-learnings-search --limit 10 2>/dev/null || true
+fi
+\`\`\`
+
+If \`CROSS_PROJECT\` is \`unset\` (first time): Use AskUserQuestion:
+
+> gstack can search learnings from your other projects on this machine to find
+> patterns that might apply here. This stays local (no data leaves your machine).
+> Recommended for solo developers. Skip if you work on multiple client codebases
+> where cross-contamination would be a concern.
+
+Options:
+- A) Enable cross-project learnings (recommended)
+- B) Keep learnings project-scoped only
+
+If A: run \`${ctx.paths.binDir}/gstack-config set cross_project_learnings true\`
+If B: run \`${ctx.paths.binDir}/gstack-config set cross_project_learnings false\`
+
+Then re-run the search with the appropriate flag.
+
+If learnings are found, incorporate them into your analysis. When a review finding
+matches a past learning, display:
+
+**"Prior learning applied: [key] (confidence N/10, from [date])"**
+
+This makes the compounding visible. The user should see that gstack is getting
+smarter on their codebase over time.`;
+}
+
+export function generateLearningsLog(ctx: TemplateContext): string {
+  const binDir = ctx.host === 'codex' ? '$GSTACK_BIN' : ctx.paths.binDir;
+
+  return `## Capture Learnings
+
+If you discovered a non-obvious pattern, pitfall, or architectural insight during
+this session, log it for future sessions:
+
+\`\`\`bash
+${binDir}/gstack-learnings-log '{"skill":"${ctx.skillName}","type":"TYPE","key":"SHORT_KEY","insight":"DESCRIPTION","confidence":N,"source":"SOURCE","files":["path/to/relevant/file"]}'
+\`\`\`
+
+**Types:** \`pattern\` (reusable approach), \`pitfall\` (what NOT to do), \`preference\`
+(user stated), \`architecture\` (structural decision), \`tool\` (library/framework insight).
+
+**Sources:** \`observed\` (you found this in the code), \`user-stated\` (user told you),
+\`inferred\` (AI deduction), \`cross-model\` (both Claude and Codex agree).
+
+**Confidence:** 1-10. Be honest. An observed pattern you verified in the code is 8-9.
+An inference you're not sure about is 4-5. A user preference they explicitly stated is 10.
+
+**files:** Include the specific file paths this learning references. This enables
+staleness detection: if those files are later deleted, the learning can be flagged.
+
+**Only log genuine discoveries.** Don't log obvious things. Don't log things the user
+already knows. A good test: would this insight save time in a future session? If yes, log it.`;
+}
@@ -68,6 +68,15 @@ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null
  fi
  break
 done
+# Learnings count
+eval "$(${ctx.paths.binDir}/gstack-slug 2>/dev/null)" 2>/dev/null || true
+_LEARN_FILE="\${GSTACK_HOME:-$HOME/.gstack}/projects/\${SLUG:-unknown}/learnings.jsonl"
+if [ -f "$_LEARN_FILE" ]; then
+  _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ')
+  echo "LEARNINGS: $_LEARN_COUNT entries loaded"
+else
+  echo "LEARNINGS: 0"
+fi
 \`\`\``;
 }