feat: GStack Learns — per-project self-learning infrastructure (v0.13.4.0) (#622)

* feat: learnings + confidence resolvers — cross-skill memory infrastructure Three new resolvers for the self-learning system: - LEARNINGS_SEARCH: tells skills to load prior learnings before analysis - LEARNINGS_LOG: tells skills to capture discoveries after completing work - CONFIDENCE_CALIBRATION: adds 1-10 confidence scoring to all review findings Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: learnings bin scripts — append-only JSONL read/write gstack-learnings-log: validates JSON, auto-injects timestamp, appends to ~/.gstack/projects/$SLUG/learnings.jsonl. Append-only (no mutation). gstack-learnings-search: reads/filters/dedupes learnings with confidence decay (observed/inferred lose 1pt/30d), cross-project discovery, and "latest winner" resolution per key+type. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: learnings count in preamble output Every skill now prints "LEARNINGS: N entries loaded" during preamble, making the compounding loop visible to the user. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: integrate learnings + confidence into 9 skill templates Add {{LEARNINGS_SEARCH}}, {{LEARNINGS_LOG}}, and {{CONFIDENCE_CALIBRATION}} placeholders to review, ship, plan-eng-review, plan-ceo-review, office-hours, investigate, retro, and cso templates. Regenerated all SKILL.md files. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: /learn skill — manage project learnings New skill for reviewing, searching, pruning, and exporting what gstack has learned across sessions. Commands: /learn, /learn search, /learn prune, /learn export, /learn stats, /learn add. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * docs: self-learning roadmap — 5-release design doc Covers: R1 GStack Learns (v0.14), R2 Review Army (v0.15), R3 Smart Ceremony (v0.16), R4 /autoship (v0.17), R5 Studio (v0.18). Inspired by Compound Engineering, adapted to GStack's architecture. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: learnings bin script unit tests — 13 tests, free Tests gstack-learnings-log (valid/invalid JSON, timestamp injection, append-only) and gstack-learnings-search (dedup, type/query/limit filters, confidence decay, user-stated no-decay, malformed JSONL skip). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * chore: bump version and changelog (v0.13.4.0) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: learnings resolver + bin script edge case tests — 21 new tests, free Adds gen-skill-docs coverage for LEARNINGS_SEARCH, LEARNINGS_LOG, and CONFIDENCE_CALIBRATION resolvers. Adds bin script edge cases: timestamp preservation, special characters, files array, sort order, type grouping, combined filtering, missing fields, confidence floor at 0. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: sync package.json version with VERSION file (0.13.4.0) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * chore: gitignore .factory/ — generated output, not source Same pattern as .claude/skills/ and .agents/. These SKILL.md files are generated from .tmpl templates by gen:skill-docs --host factory. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: /learn E2E — seed 3 learnings, verify agent surfaces them Seeds N+1 query pattern, stale cache pitfall, and rubocop preference into learnings.jsonl, then runs /learn and checks that at least 2/3 appear in the agent's output. Gate tier, ~$0.25/run. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-01 19:25:10 +02:00 · 2026-03-29 17:02:01 -06:00
parent 66894601e3
commit ae0a9ad195
49 changed files with 2374 additions and 2 deletions
@@ -2123,3 +2123,113 @@ describe('codex commands must not use inline $(git rev-parse --show-toplevel) fo
    expect(violations).toEqual([]);
  });
 });
+
+// ─── Learnings + Confidence Resolver Tests ─────────────────────
+
+describe('LEARNINGS_SEARCH resolver', () => {
+  const SEARCH_SKILLS = ['review', 'ship', 'plan-eng-review', 'investigate', 'office-hours', 'plan-ceo-review'];
+
+  for (const skill of SEARCH_SKILLS) {
+    test(`${skill} generated SKILL.md contains learnings search`, () => {
+      const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
+      expect(content).toContain('Prior Learnings');
+      expect(content).toContain('gstack-learnings-search');
+    });
+  }
+
+  test('learnings search includes cross-project config check', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
+    expect(content).toContain('cross_project_learnings');
+    expect(content).toContain('--cross-project');
+  });
+
+  test('learnings search includes AskUserQuestion for first-time cross-project opt-in', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
+    expect(content).toContain('Enable cross-project learnings');
+    expect(content).toContain('project-scoped only');
+  });
+
+  test('learnings search mentions prior learning applied display format', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
+    expect(content).toContain('Prior learning applied');
+  });
+});
+
+describe('LEARNINGS_LOG resolver', () => {
+  const LOG_SKILLS = ['review', 'retro', 'investigate'];
+
+  for (const skill of LOG_SKILLS) {
+    test(`${skill} generated SKILL.md contains learnings log`, () => {
+      const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
+      expect(content).toContain('Capture Learnings');
+      expect(content).toContain('gstack-learnings-log');
+    });
+  }
+
+  test('learnings log documents all type values', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
+    for (const type of ['pattern', 'pitfall', 'preference', 'architecture', 'tool']) {
+      expect(content).toContain(type);
+    }
+  });
+
+  test('learnings log documents all source values', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
+    for (const source of ['observed', 'user-stated', 'inferred', 'cross-model']) {
+      expect(content).toContain(source);
+    }
+  });
+
+  test('learnings log includes files field for staleness detection', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
+    expect(content).toContain('"files"');
+    expect(content).toContain('staleness detection');
+  });
+});
+
+describe('CONFIDENCE_CALIBRATION resolver', () => {
+  const CONFIDENCE_SKILLS = ['review', 'ship', 'plan-eng-review', 'cso'];
+
+  for (const skill of CONFIDENCE_SKILLS) {
+    test(`${skill} generated SKILL.md contains confidence calibration`, () => {
+      const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
+      expect(content).toContain('Confidence Calibration');
+      expect(content).toContain('confidence score');
+    });
+  }
+
+  test('confidence calibration includes scoring rubric with all tiers', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
+    expect(content).toContain('9-10');
+    expect(content).toContain('7-8');
+    expect(content).toContain('5-6');
+    expect(content).toContain('3-4');
+    expect(content).toContain('1-2');
+  });
+
+  test('confidence calibration includes display rules', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
+    expect(content).toContain('Show normally');
+    expect(content).toContain('Suppress from main report');
+  });
+
+  test('confidence calibration includes finding format example', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
+    expect(content).toContain('[P1] (confidence:');
+    expect(content).toContain('SQL injection');
+  });
+
+  test('confidence calibration includes calibration learning feedback loop', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
+    expect(content).toContain('calibration event');
+    expect(content).toContain('Log the corrected pattern');
+  });
+
+  test('skills without confidence calibration do NOT contain it', () => {
+    // office-hours and retro do NOT use confidence calibration
+    for (const skill of ['office-hours', 'retro']) {
+      const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
+      expect(content).not.toContain('## Confidence Calibration');
+    }
+  });
+});
@@ -95,6 +95,9 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
  'cso-diff-mode':    ['cso/**'],
  'cso-infra-scope':  ['cso/**'],

+  // Learnings
+  'learnings-show': ['learn/**', 'bin/gstack-learnings-search', 'bin/gstack-learnings-log', 'scripts/resolvers/learnings.ts'],
+
  // Document-release
  'document-release': ['document-release/**'],

@@ -238,6 +241,9 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
  'cso-diff-mode': 'gate',
  'cso-infra-scope': 'periodic',

+  // Learnings — gate (functional guardrail: seeded learnings must appear)
+  'learnings-show': 'gate',
+
  // Document-release — gate (CHANGELOG guardrail)
  'document-release': 'gate',

@@ -0,0 +1,283 @@
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
+import { execSync, ExecSyncOptionsWithStringEncoding } from 'child_process';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+
+const ROOT = path.resolve(import.meta.dir, '..');
+const BIN = path.join(ROOT, 'bin');
+
+let tmpDir: string;
+let slugDir: string;
+let learningsFile: string;
+
+function runLog(input: string, opts: { expectFail?: boolean } = {}): { stdout: string; exitCode: number } {
+  const execOpts: ExecSyncOptionsWithStringEncoding = {
+    cwd: ROOT,
+    env: { ...process.env, GSTACK_HOME: tmpDir },
+    encoding: 'utf-8',
+    timeout: 15000,
+  };
+  try {
+    const stdout = execSync(`${BIN}/gstack-learnings-log '${input.replace(/'/g, "'\\''")}'`, execOpts).trim();
+    return { stdout, exitCode: 0 };
+  } catch (e: any) {
+    if (opts.expectFail) {
+      return { stdout: e.stderr?.toString() || '', exitCode: e.status || 1 };
+    }
+    throw e;
+  }
+}
+
+function runSearch(args: string = ''): string {
+  const execOpts: ExecSyncOptionsWithStringEncoding = {
+    cwd: ROOT,
+    env: { ...process.env, GSTACK_HOME: tmpDir },
+    encoding: 'utf-8',
+    timeout: 15000,
+  };
+  try {
+    return execSync(`${BIN}/gstack-learnings-search ${args}`, execOpts).trim();
+  } catch {
+    return '';
+  }
+}
+
+beforeEach(() => {
+  tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-learn-'));
+  slugDir = path.join(tmpDir, 'projects');
+  fs.mkdirSync(slugDir, { recursive: true });
+});
+
+afterEach(() => {
+  fs.rmSync(tmpDir, { recursive: true, force: true });
+});
+
+function findLearningsFile(): string | null {
+  const projectDirs = fs.readdirSync(slugDir);
+  if (projectDirs.length === 0) return null;
+  const f = path.join(slugDir, projectDirs[0], 'learnings.jsonl');
+  return fs.existsSync(f) ? f : null;
+}
+
+describe('gstack-learnings-log', () => {
+  test('appends valid JSON to learnings.jsonl', () => {
+    const input = '{"skill":"review","type":"pattern","key":"test-key","insight":"test insight","confidence":8,"source":"observed"}';
+    const result = runLog(input);
+    expect(result.exitCode).toBe(0);
+
+    const f = findLearningsFile();
+    expect(f).not.toBeNull();
+    const content = fs.readFileSync(f!, 'utf-8').trim();
+    const parsed = JSON.parse(content);
+    expect(parsed.skill).toBe('review');
+    expect(parsed.key).toBe('test-key');
+    expect(parsed.confidence).toBe(8);
+  });
+
+  test('auto-injects timestamp when ts is missing', () => {
+    const input = '{"skill":"review","type":"pattern","key":"ts-test","insight":"test","confidence":5,"source":"observed"}';
+    runLog(input);
+
+    const f = findLearningsFile();
+    expect(f).not.toBeNull();
+    const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
+    expect(parsed.ts).toBeDefined();
+    expect(new Date(parsed.ts).getTime()).toBeGreaterThan(0);
+  });
+
+  test('rejects non-JSON input with non-zero exit code', () => {
+    const result = runLog('not json at all', { expectFail: true });
+    expect(result.exitCode).not.toBe(0);
+  });
+
+  test('append-only: duplicate keys create multiple entries', () => {
+    const input1 = '{"skill":"review","type":"pattern","key":"dup-key","insight":"first version","confidence":6,"source":"observed"}';
+    const input2 = '{"skill":"review","type":"pattern","key":"dup-key","insight":"second version","confidence":8,"source":"observed"}';
+    runLog(input1);
+    runLog(input2);
+
+    const f = findLearningsFile();
+    expect(f).not.toBeNull();
+    const lines = fs.readFileSync(f!, 'utf-8').trim().split('\n');
+    expect(lines.length).toBe(2);
+  });
+});
+
+describe('gstack-learnings-search', () => {
+  test('returns empty and exits 0 when no learnings file exists', () => {
+    const output = runSearch();
+    expect(output).toBe('');
+  });
+
+  test('returns formatted output when learnings exist', () => {
+    runLog('{"skill":"review","type":"pattern","key":"test-search","insight":"search test insight","confidence":7,"source":"observed"}');
+    const output = runSearch();
+    expect(output).toContain('LEARNINGS:');
+    expect(output).toContain('test-search');
+    expect(output).toContain('search test insight');
+  });
+
+  test('deduplicates entries by key+type (latest wins)', () => {
+    const old = JSON.stringify({ skill: 'review', type: 'pattern', key: 'dedup-test', insight: 'old version', confidence: 5, source: 'observed', ts: '2026-01-01T00:00:00Z' });
+    const newer = JSON.stringify({ skill: 'review', type: 'pattern', key: 'dedup-test', insight: 'new version', confidence: 8, source: 'observed', ts: '2026-03-28T00:00:00Z' });
+    runLog(old);
+    runLog(newer);
+
+    const output = runSearch();
+    expect(output).toContain('new version');
+    expect(output).not.toContain('old version');
+    expect(output).toContain('1 loaded');
+  });
+
+  test('filters by --type', () => {
+    runLog('{"skill":"review","type":"pattern","key":"p1","insight":"a pattern","confidence":7,"source":"observed"}');
+    runLog('{"skill":"review","type":"pitfall","key":"p2","insight":"a pitfall","confidence":7,"source":"observed"}');
+
+    const patternOnly = runSearch('--type pattern');
+    expect(patternOnly).toContain('p1');
+    expect(patternOnly).not.toContain('p2');
+  });
+
+  test('filters by --query', () => {
+    runLog('{"skill":"review","type":"pattern","key":"auth-bypass","insight":"check session tokens","confidence":7,"source":"observed"}');
+    runLog('{"skill":"review","type":"pattern","key":"n-plus-one","insight":"use includes for associations","confidence":7,"source":"observed"}');
+
+    const authOnly = runSearch('--query auth');
+    expect(authOnly).toContain('auth-bypass');
+    expect(authOnly).not.toContain('n-plus-one');
+  });
+
+  test('respects --limit', () => {
+    for (let i = 0; i < 5; i++) {
+      runLog(`{"skill":"review","type":"pattern","key":"limit-${i}","insight":"insight ${i}","confidence":7,"source":"observed"}`);
+    }
+
+    const limited = runSearch('--limit 2');
+    // Should show 2, not 5
+    expect(limited).toContain('2 loaded');
+  });
+
+  test('applies confidence decay for observed/inferred sources', () => {
+    // Entry from 90 days ago with source=observed, confidence=8
+    // Should decay to 8 - floor(90/30) = 8 - 3 = 5
+    const ts = new Date(Date.now() - 90 * 86400000).toISOString();
+    runLog(`{"skill":"review","type":"pattern","key":"decay-test","insight":"old observation","confidence":8,"source":"observed","ts":"${ts}"}`);
+
+    const output = runSearch();
+    // Should show confidence 5 (decayed from 8)
+    expect(output).toContain('confidence: 5/10');
+  });
+
+  test('does NOT decay user-stated learnings', () => {
+    const ts = new Date(Date.now() - 90 * 86400000).toISOString();
+    runLog(`{"skill":"review","type":"preference","key":"no-decay-test","insight":"user preference","confidence":9,"source":"user-stated","ts":"${ts}"}`);
+
+    const output = runSearch();
+    // Should still show confidence 9 (no decay for user-stated)
+    expect(output).toContain('confidence: 9/10');
+  });
+
+  test('skips malformed JSONL lines gracefully', () => {
+    // Write a valid entry, then manually append a bad line
+    runLog('{"skill":"review","type":"pattern","key":"valid-entry","insight":"valid","confidence":7,"source":"observed"}');
+    const f = findLearningsFile();
+    expect(f).not.toBeNull();
+    fs.appendFileSync(f!, '\nthis is not json\n');
+    fs.appendFileSync(f!, '{"skill":"review","type":"pattern","key":"also-valid","insight":"also valid","confidence":6,"source":"observed","ts":"2026-03-28T00:00:00Z"}\n');
+
+    const output = runSearch();
+    expect(output).toContain('valid-entry');
+    expect(output).toContain('also-valid');
+  });
+});
+
+describe('gstack-learnings-log edge cases', () => {
+  test('preserves existing timestamp when ts is present', () => {
+    const input = '{"skill":"review","type":"pattern","key":"ts-preserve","insight":"test","confidence":5,"source":"observed","ts":"2025-06-15T10:00:00Z"}';
+    runLog(input);
+
+    const f = findLearningsFile();
+    expect(f).not.toBeNull();
+    const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
+    expect(parsed.ts).toBe('2025-06-15T10:00:00Z');
+  });
+
+  test('handles JSON with special characters in insight', () => {
+    const input = JSON.stringify({ skill: 'review', type: 'pattern', key: 'special-chars', insight: 'Use "quotes" and \\backslashes', confidence: 7, source: 'observed' });
+    runLog(input);
+
+    const f = findLearningsFile();
+    expect(f).not.toBeNull();
+    const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
+    expect(parsed.insight).toContain('quotes');
+    expect(parsed.insight).toContain('backslashes');
+  });
+
+  test('handles JSON with files array field', () => {
+    const input = JSON.stringify({ skill: 'review', type: 'architecture', key: 'with-files', insight: 'test', confidence: 8, source: 'observed', files: ['src/auth.ts', 'src/db.ts'] });
+    runLog(input);
+
+    const f = findLearningsFile();
+    expect(f).not.toBeNull();
+    const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
+    expect(parsed.files).toEqual(['src/auth.ts', 'src/db.ts']);
+  });
+});
+
+describe('gstack-learnings-search edge cases', () => {
+  test('sorts by confidence then recency', () => {
+    // Two entries: one high confidence old, one lower confidence recent
+    runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'high-conf', insight: 'high confidence entry', confidence: 9, source: 'user-stated', ts: '2026-01-01T00:00:00Z' }));
+    runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'recent', insight: 'recent entry', confidence: 5, source: 'observed', ts: '2026-03-28T00:00:00Z' }));
+
+    const output = runSearch();
+    const highIdx = output.indexOf('high-conf');
+    const recentIdx = output.indexOf('recent');
+    // High confidence should appear first
+    expect(highIdx).toBeLessThan(recentIdx);
+  });
+
+  test('groups output by type', () => {
+    runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'p1', insight: 'a pattern', confidence: 7, source: 'observed' }));
+    runLog(JSON.stringify({ skill: 'review', type: 'pitfall', key: 'pit1', insight: 'a pitfall', confidence: 7, source: 'observed' }));
+
+    const output = runSearch();
+    expect(output).toContain('## Patterns');
+    expect(output).toContain('## Pitfalls');
+  });
+
+  test('combined --type and --query filtering', () => {
+    runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'auth-token', insight: 'check token expiry', confidence: 7, source: 'observed' }));
+    runLog(JSON.stringify({ skill: 'review', type: 'pitfall', key: 'auth-leak', insight: 'auth token in logs', confidence: 7, source: 'observed' }));
+    runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'cache-key', insight: 'cache invalidation', confidence: 7, source: 'observed' }));
+
+    const output = runSearch('--type pattern --query auth');
+    expect(output).toContain('auth-token');
+    expect(output).not.toContain('auth-leak');  // wrong type
+    expect(output).not.toContain('cache-key');  // wrong query
+  });
+
+  test('entries with missing key or type are skipped', () => {
+    runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'valid', insight: 'valid entry', confidence: 7, source: 'observed' }));
+    const f = findLearningsFile();
+    expect(f).not.toBeNull();
+    // Append entries missing key and type
+    fs.appendFileSync(f!, JSON.stringify({ skill: 'review', type: 'pattern', insight: 'no key', confidence: 7, source: 'observed' }) + '\n');
+    fs.appendFileSync(f!, JSON.stringify({ skill: 'review', key: 'no-type', insight: 'no type', confidence: 7, source: 'observed' }) + '\n');
+
+    const output = runSearch();
+    expect(output).toContain('valid');
+    expect(output).not.toContain('no key');
+    expect(output).not.toContain('no-type');
+  });
+
+  test('confidence decay floors at 0 (never negative)', () => {
+    // Entry from 1 year ago with confidence 3 — decay would be 12, clamped to 0
+    const ts = new Date(Date.now() - 365 * 86400000).toISOString();
+    runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'ancient', insight: 'very old', confidence: 3, source: 'observed', ts }));
+
+    const output = runSearch();
+    expect(output).toContain('confidence: 0/10');
+  });
+});
@@ -0,0 +1,132 @@
+import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
+import { runSkillTest } from './helpers/session-runner';
+import {
+  ROOT, runId, evalsEnabled,
+  describeIfSelected, testConcurrentIfSelected,
+  copyDirSync, logCost, recordE2E,
+  createEvalCollector, finalizeEvalCollector,
+} from './helpers/e2e-helpers';
+import { spawnSync } from 'child_process';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+
+const evalCollector = createEvalCollector('e2e-learnings');
+
+// --- Learnings E2E: seed learnings, run /learn, verify output ---
+
+describeIfSelected('Learnings E2E', ['learnings-show'], () => {
+  let workDir: string;
+  let gstackHome: string;
+
+  beforeAll(() => {
+    workDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-learnings-'));
+    gstackHome = path.join(workDir, '.gstack-home');
+
+    // Init git repo
+    const run = (cmd: string, args: string[]) =>
+      spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 });
+    run('git', ['init', '-b', 'main']);
+    run('git', ['config', 'user.email', 'test@test.com']);
+    run('git', ['config', 'user.name', 'Test']);
+    fs.writeFileSync(path.join(workDir, 'app.ts'), 'console.log("hello");\n');
+    run('git', ['add', '.']);
+    run('git', ['commit', '-m', 'initial']);
+
+    // Copy the /learn skill
+    copyDirSync(path.join(ROOT, 'learn'), path.join(workDir, 'learn'));
+
+    // Copy bin scripts needed by /learn
+    const binDir = path.join(workDir, 'bin');
+    fs.mkdirSync(binDir, { recursive: true });
+    for (const script of ['gstack-learnings-search', 'gstack-learnings-log', 'gstack-slug']) {
+      fs.copyFileSync(path.join(ROOT, 'bin', script), path.join(binDir, script));
+      fs.chmodSync(path.join(binDir, script), 0o755);
+    }
+
+    // Seed learnings JSONL with 3 entries of different types
+    const slug = 'test-project';
+    const projectDir = path.join(gstackHome, 'projects', slug);
+    fs.mkdirSync(projectDir, { recursive: true });
+
+    const learnings = [
+      {
+        skill: 'review', type: 'pattern', key: 'n-plus-one-queries',
+        insight: 'ActiveRecord associations in loops cause N+1 queries. Always use includes/preload.',
+        confidence: 9, source: 'observed', ts: new Date().toISOString(),
+        files: ['app/models/user.rb'],
+      },
+      {
+        skill: 'investigate', type: 'pitfall', key: 'stale-cache-after-deploy',
+        insight: 'Redis cache not invalidated on deploy causes stale data for 5 minutes.',
+        confidence: 7, source: 'observed', ts: new Date().toISOString(),
+        files: ['config/redis.yml'],
+      },
+      {
+        skill: 'ship', type: 'preference', key: 'always-run-rubocop',
+        insight: 'User wants rubocop to run before every commit, no exceptions.',
+        confidence: 10, source: 'user-stated', ts: new Date().toISOString(),
+      },
+    ];
+
+    fs.writeFileSync(
+      path.join(projectDir, 'learnings.jsonl'),
+      learnings.map(l => JSON.stringify(l)).join('\n') + '\n',
+    );
+  });
+
+  afterAll(() => {
+    try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
+    finalizeEvalCollector(evalCollector);
+  });
+
+  testConcurrentIfSelected('learnings-show', async () => {
+    const result = await runSkillTest({
+      prompt: `Read the file learn/SKILL.md for the /learn skill instructions.
+
+Run the /learn command (no arguments — show recent learnings).
+
+IMPORTANT:
+- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
+- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
+  Replace any references to ~/.claude/skills/gstack/bin/ with ./bin/ when running commands.
+- Replace any references to ~/.claude/skills/gstack/bin/gstack-slug with ./bin/gstack-slug.
+- Do NOT use AskUserQuestion.
+- Do NOT implement code changes.
+- Just show the learnings and summarize what you found.`,
+      workingDirectory: workDir,
+      maxTurns: 15,
+      allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
+      timeout: 120_000,
+      testName: 'learnings-show',
+      runId,
+    });
+
+    logCost('/learn show', result);
+
+    const output = result.output.toLowerCase();
+
+    // The agent should have found and displayed the seeded learnings
+    const mentionsNPlusOne = output.includes('n-plus-one') || output.includes('n+1');
+    const mentionsCache = output.includes('stale') || output.includes('cache');
+    const mentionsRubocop = output.includes('rubocop');
+
+    // At least 2 of 3 learnings should appear in the output
+    const foundCount = [mentionsNPlusOne, mentionsCache, mentionsRubocop].filter(Boolean).length;
+
+    const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
+
+    recordE2E(evalCollector, '/learn', 'Learnings show E2E', result, {
+      passed: exitOk && foundCount >= 2,
+    });
+
+    expect(exitOk).toBe(true);
+    expect(foundCount).toBeGreaterThanOrEqual(2);
+
+    if (foundCount === 3) {
+      console.log('All 3 seeded learnings found in output');
+    } else {
+      console.warn(`Only ${foundCount}/3 learnings found (N+1: ${mentionsNPlusOne}, cache: ${mentionsCache}, rubocop: ${mentionsRubocop})`);
+    }
+  }, 180_000);
+});