diff --git a/scripts/resolvers/confidence.ts b/scripts/resolvers/confidence.ts new file mode 100644 index 00000000..e5539f73 --- /dev/null +++ b/scripts/resolvers/confidence.ts @@ -0,0 +1,37 @@ +/** + * Confidence calibration resolver + * + * Adds confidence scoring rubric to review-producing skills. + * Every finding includes a 1-10 score that gates display: + * 7+: show normally + * 5-6: show with caveat + * <5: suppress from main report + */ +import type { TemplateContext } from './types'; + +export function generateConfidenceCalibration(_ctx: TemplateContext): string { + return `## Confidence Calibration + +Every finding MUST include a confidence score (1-10): + +| Score | Meaning | Display rule | +|-------|---------|-------------| +| 9-10 | Verified by reading specific code. Concrete bug or exploit demonstrated. | Show normally | +| 7-8 | High confidence pattern match. Very likely correct. | Show normally | +| 5-6 | Moderate. Could be a false positive. | Show with caveat: "Medium confidence, verify this is actually an issue" | +| 3-4 | Low confidence. Pattern is suspicious but may be fine. | Suppress from main report. Include in appendix only. | +| 1-2 | Speculation. | Only report if severity would be P0. | + +**Finding format:** + +\\\`[SEVERITY] (confidence: N/10) file:line — description\\\` + +Example: +\\\`[P1] (confidence: 9/10) app/models/user.rb:42 — SQL injection via string interpolation in where clause\\\` +\\\`[P2] (confidence: 5/10) app/controllers/api/v1/users_controller.rb:18 — Possible N+1 query, verify with production logs\\\` + +**Calibration learning:** If you report a finding with confidence < 7 and the user +confirms it IS a real issue, that is a calibration event. Your initial confidence was +too low. Log the corrected pattern as a learning so future reviews catch it with +higher confidence.`; +} diff --git a/scripts/resolvers/index.ts b/scripts/resolvers/index.ts index 3d2b9dbb..6b5a9e4e 100644 --- a/scripts/resolvers/index.ts +++ b/scripts/resolvers/index.ts @@ -13,6 +13,8 @@ import { generateDesignMethodology, generateDesignHardRules, generateDesignOutsi import { generateTestBootstrap, generateTestCoverageAuditPlan, generateTestCoverageAuditShip, generateTestCoverageAuditReview } from './testing'; import { generateReviewDashboard, generatePlanFileReviewReport, generateSpecReviewLoop, generateBenefitsFrom, generateCodexSecondOpinion, generateAdversarialStep, generateCodexPlanReview, generatePlanCompletionAuditShip, generatePlanCompletionAuditReview, generatePlanVerificationExec } from './review'; import { generateSlugEval, generateSlugSetup, generateBaseBranchDetect, generateDeployBootstrap, generateQAMethodology, generateCoAuthorTrailer } from './utility'; +import { generateLearningsSearch, generateLearningsLog } from './learnings'; +import { generateConfidenceCalibration } from './confidence'; export const RESOLVERS: Record string> = { SLUG_EVAL: generateSlugEval, @@ -48,4 +50,7 @@ export const RESOLVERS: Record string> = { PLAN_COMPLETION_AUDIT_REVIEW: generatePlanCompletionAuditReview, PLAN_VERIFICATION_EXEC: generatePlanVerificationExec, CO_AUTHOR_TRAILER: generateCoAuthorTrailer, + LEARNINGS_SEARCH: generateLearningsSearch, + LEARNINGS_LOG: generateLearningsLog, + CONFIDENCE_CALIBRATION: generateConfidenceCalibration, }; diff --git a/scripts/resolvers/learnings.ts b/scripts/resolvers/learnings.ts new file mode 100644 index 00000000..3bcba7b1 --- /dev/null +++ b/scripts/resolvers/learnings.ts @@ -0,0 +1,96 @@ +/** + * Learnings resolver — cross-skill institutional memory + * + * Learnings are stored per-project at ~/.gstack/projects/{slug}/learnings.jsonl. + * Each entry is a JSONL line with: ts, skill, type, key, insight, confidence, + * source, branch, commit, files[]. + * + * Storage is append-only. Duplicates (same key+type) are resolved at read time + * by gstack-learnings-search ("latest winner" per key+type). + * + * Cross-project discovery is opt-in. The resolver asks the user once via + * AskUserQuestion and persists the preference via gstack-config. + */ +import type { TemplateContext } from './types'; + +export function generateLearningsSearch(ctx: TemplateContext): string { + if (ctx.host === 'codex') { + // Codex: simpler version, no cross-project, uses $GSTACK_BIN + return `## Prior Learnings + +Search for relevant learnings from previous sessions on this project: + +\`\`\`bash +$GSTACK_BIN/gstack-learnings-search --limit 10 2>/dev/null || true +\`\`\` + +If learnings are found, incorporate them into your analysis. When a review finding +matches a past learning, note it: "Prior learning applied: [key] (confidence N, from [date])"`; + } + + return `## Prior Learnings + +Search for relevant learnings from previous sessions: + +\`\`\`bash +_CROSS_PROJ=$(${ctx.paths.binDir}/gstack-config get cross_project_learnings 2>/dev/null || echo "unset") +echo "CROSS_PROJECT: $_CROSS_PROJ" +if [ "$_CROSS_PROJ" = "true" ]; then + ${ctx.paths.binDir}/gstack-learnings-search --limit 10 --cross-project 2>/dev/null || true +else + ${ctx.paths.binDir}/gstack-learnings-search --limit 10 2>/dev/null || true +fi +\`\`\` + +If \`CROSS_PROJECT\` is \`unset\` (first time): Use AskUserQuestion: + +> gstack can search learnings from your other projects on this machine to find +> patterns that might apply here. This stays local (no data leaves your machine). +> Recommended for solo developers. Skip if you work on multiple client codebases +> where cross-contamination would be a concern. + +Options: +- A) Enable cross-project learnings (recommended) +- B) Keep learnings project-scoped only + +If A: run \`${ctx.paths.binDir}/gstack-config set cross_project_learnings true\` +If B: run \`${ctx.paths.binDir}/gstack-config set cross_project_learnings false\` + +Then re-run the search with the appropriate flag. + +If learnings are found, incorporate them into your analysis. When a review finding +matches a past learning, display: + +**"Prior learning applied: [key] (confidence N/10, from [date])"** + +This makes the compounding visible. The user should see that gstack is getting +smarter on their codebase over time.`; +} + +export function generateLearningsLog(ctx: TemplateContext): string { + const binDir = ctx.host === 'codex' ? '$GSTACK_BIN' : ctx.paths.binDir; + + return `## Capture Learnings + +If you discovered a non-obvious pattern, pitfall, or architectural insight during +this session, log it for future sessions: + +\`\`\`bash +${binDir}/gstack-learnings-log '{"skill":"${ctx.skillName}","type":"TYPE","key":"SHORT_KEY","insight":"DESCRIPTION","confidence":N,"source":"SOURCE","files":["path/to/relevant/file"]}' +\`\`\` + +**Types:** \`pattern\` (reusable approach), \`pitfall\` (what NOT to do), \`preference\` +(user stated), \`architecture\` (structural decision), \`tool\` (library/framework insight). + +**Sources:** \`observed\` (you found this in the code), \`user-stated\` (user told you), +\`inferred\` (AI deduction), \`cross-model\` (both Claude and Codex agree). + +**Confidence:** 1-10. Be honest. An observed pattern you verified in the code is 8-9. +An inference you're not sure about is 4-5. A user preference they explicitly stated is 10. + +**files:** Include the specific file paths this learning references. This enables +staleness detection: if those files are later deleted, the learning can be flagged. + +**Only log genuine discoveries.** Don't log obvious things. Don't log things the user +already knows. A good test: would this insight save time in a future session? If yes, log it.`; +}