mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-01 19:25:10 +02:00
feat: GStack Learns — per-project self-learning infrastructure (v0.13.4.0) (#622)
* feat: learnings + confidence resolvers — cross-skill memory infrastructure Three new resolvers for the self-learning system: - LEARNINGS_SEARCH: tells skills to load prior learnings before analysis - LEARNINGS_LOG: tells skills to capture discoveries after completing work - CONFIDENCE_CALIBRATION: adds 1-10 confidence scoring to all review findings Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: learnings bin scripts — append-only JSONL read/write gstack-learnings-log: validates JSON, auto-injects timestamp, appends to ~/.gstack/projects/$SLUG/learnings.jsonl. Append-only (no mutation). gstack-learnings-search: reads/filters/dedupes learnings with confidence decay (observed/inferred lose 1pt/30d), cross-project discovery, and "latest winner" resolution per key+type. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: learnings count in preamble output Every skill now prints "LEARNINGS: N entries loaded" during preamble, making the compounding loop visible to the user. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: integrate learnings + confidence into 9 skill templates Add {{LEARNINGS_SEARCH}}, {{LEARNINGS_LOG}}, and {{CONFIDENCE_CALIBRATION}} placeholders to review, ship, plan-eng-review, plan-ceo-review, office-hours, investigate, retro, and cso templates. Regenerated all SKILL.md files. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: /learn skill — manage project learnings New skill for reviewing, searching, pruning, and exporting what gstack has learned across sessions. Commands: /learn, /learn search, /learn prune, /learn export, /learn stats, /learn add. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * docs: self-learning roadmap — 5-release design doc Covers: R1 GStack Learns (v0.14), R2 Review Army (v0.15), R3 Smart Ceremony (v0.16), R4 /autoship (v0.17), R5 Studio (v0.18). Inspired by Compound Engineering, adapted to GStack's architecture. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: learnings bin script unit tests — 13 tests, free Tests gstack-learnings-log (valid/invalid JSON, timestamp injection, append-only) and gstack-learnings-search (dedup, type/query/limit filters, confidence decay, user-stated no-decay, malformed JSONL skip). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * chore: bump version and changelog (v0.13.4.0) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: learnings resolver + bin script edge case tests — 21 new tests, free Adds gen-skill-docs coverage for LEARNINGS_SEARCH, LEARNINGS_LOG, and CONFIDENCE_CALIBRATION resolvers. Adds bin script edge cases: timestamp preservation, special characters, files array, sort order, type grouping, combined filtering, missing fields, confidence floor at 0. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: sync package.json version with VERSION file (0.13.4.0) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * chore: gitignore .factory/ — generated output, not source Same pattern as .claude/skills/ and .agents/. These SKILL.md files are generated from .tmpl templates by gen:skill-docs --host factory. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: /learn E2E — seed 3 learnings, verify agent surfaces them Seeds N+1 query pattern, stale cache pitfall, and rubocop preference into learnings.jsonl, then runs /learn and checks that at least 2/3 appear in the agent's output. Gate tier, ~$0.25/run. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2123,3 +2123,113 @@ describe('codex commands must not use inline $(git rev-parse --show-toplevel) fo
|
||||
expect(violations).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Learnings + Confidence Resolver Tests ─────────────────────
|
||||
|
||||
describe('LEARNINGS_SEARCH resolver', () => {
|
||||
const SEARCH_SKILLS = ['review', 'ship', 'plan-eng-review', 'investigate', 'office-hours', 'plan-ceo-review'];
|
||||
|
||||
for (const skill of SEARCH_SKILLS) {
|
||||
test(`${skill} generated SKILL.md contains learnings search`, () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Prior Learnings');
|
||||
expect(content).toContain('gstack-learnings-search');
|
||||
});
|
||||
}
|
||||
|
||||
test('learnings search includes cross-project config check', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('cross_project_learnings');
|
||||
expect(content).toContain('--cross-project');
|
||||
});
|
||||
|
||||
test('learnings search includes AskUserQuestion for first-time cross-project opt-in', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Enable cross-project learnings');
|
||||
expect(content).toContain('project-scoped only');
|
||||
});
|
||||
|
||||
test('learnings search mentions prior learning applied display format', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Prior learning applied');
|
||||
});
|
||||
});
|
||||
|
||||
describe('LEARNINGS_LOG resolver', () => {
|
||||
const LOG_SKILLS = ['review', 'retro', 'investigate'];
|
||||
|
||||
for (const skill of LOG_SKILLS) {
|
||||
test(`${skill} generated SKILL.md contains learnings log`, () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Capture Learnings');
|
||||
expect(content).toContain('gstack-learnings-log');
|
||||
});
|
||||
}
|
||||
|
||||
test('learnings log documents all type values', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
for (const type of ['pattern', 'pitfall', 'preference', 'architecture', 'tool']) {
|
||||
expect(content).toContain(type);
|
||||
}
|
||||
});
|
||||
|
||||
test('learnings log documents all source values', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
for (const source of ['observed', 'user-stated', 'inferred', 'cross-model']) {
|
||||
expect(content).toContain(source);
|
||||
}
|
||||
});
|
||||
|
||||
test('learnings log includes files field for staleness detection', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('"files"');
|
||||
expect(content).toContain('staleness detection');
|
||||
});
|
||||
});
|
||||
|
||||
describe('CONFIDENCE_CALIBRATION resolver', () => {
|
||||
const CONFIDENCE_SKILLS = ['review', 'ship', 'plan-eng-review', 'cso'];
|
||||
|
||||
for (const skill of CONFIDENCE_SKILLS) {
|
||||
test(`${skill} generated SKILL.md contains confidence calibration`, () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Confidence Calibration');
|
||||
expect(content).toContain('confidence score');
|
||||
});
|
||||
}
|
||||
|
||||
test('confidence calibration includes scoring rubric with all tiers', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('9-10');
|
||||
expect(content).toContain('7-8');
|
||||
expect(content).toContain('5-6');
|
||||
expect(content).toContain('3-4');
|
||||
expect(content).toContain('1-2');
|
||||
});
|
||||
|
||||
test('confidence calibration includes display rules', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Show normally');
|
||||
expect(content).toContain('Suppress from main report');
|
||||
});
|
||||
|
||||
test('confidence calibration includes finding format example', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('[P1] (confidence:');
|
||||
expect(content).toContain('SQL injection');
|
||||
});
|
||||
|
||||
test('confidence calibration includes calibration learning feedback loop', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('calibration event');
|
||||
expect(content).toContain('Log the corrected pattern');
|
||||
});
|
||||
|
||||
test('skills without confidence calibration do NOT contain it', () => {
|
||||
// office-hours and retro do NOT use confidence calibration
|
||||
for (const skill of ['office-hours', 'retro']) {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
|
||||
expect(content).not.toContain('## Confidence Calibration');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -95,6 +95,9 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
'cso-diff-mode': ['cso/**'],
|
||||
'cso-infra-scope': ['cso/**'],
|
||||
|
||||
// Learnings
|
||||
'learnings-show': ['learn/**', 'bin/gstack-learnings-search', 'bin/gstack-learnings-log', 'scripts/resolvers/learnings.ts'],
|
||||
|
||||
// Document-release
|
||||
'document-release': ['document-release/**'],
|
||||
|
||||
@@ -238,6 +241,9 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
'cso-diff-mode': 'gate',
|
||||
'cso-infra-scope': 'periodic',
|
||||
|
||||
// Learnings — gate (functional guardrail: seeded learnings must appear)
|
||||
'learnings-show': 'gate',
|
||||
|
||||
// Document-release — gate (CHANGELOG guardrail)
|
||||
'document-release': 'gate',
|
||||
|
||||
|
||||
@@ -0,0 +1,283 @@
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import { execSync, ExecSyncOptionsWithStringEncoding } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BIN = path.join(ROOT, 'bin');
|
||||
|
||||
let tmpDir: string;
|
||||
let slugDir: string;
|
||||
let learningsFile: string;
|
||||
|
||||
function runLog(input: string, opts: { expectFail?: boolean } = {}): { stdout: string; exitCode: number } {
|
||||
const execOpts: ExecSyncOptionsWithStringEncoding = {
|
||||
cwd: ROOT,
|
||||
env: { ...process.env, GSTACK_HOME: tmpDir },
|
||||
encoding: 'utf-8',
|
||||
timeout: 15000,
|
||||
};
|
||||
try {
|
||||
const stdout = execSync(`${BIN}/gstack-learnings-log '${input.replace(/'/g, "'\\''")}'`, execOpts).trim();
|
||||
return { stdout, exitCode: 0 };
|
||||
} catch (e: any) {
|
||||
if (opts.expectFail) {
|
||||
return { stdout: e.stderr?.toString() || '', exitCode: e.status || 1 };
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
function runSearch(args: string = ''): string {
|
||||
const execOpts: ExecSyncOptionsWithStringEncoding = {
|
||||
cwd: ROOT,
|
||||
env: { ...process.env, GSTACK_HOME: tmpDir },
|
||||
encoding: 'utf-8',
|
||||
timeout: 15000,
|
||||
};
|
||||
try {
|
||||
return execSync(`${BIN}/gstack-learnings-search ${args}`, execOpts).trim();
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-learn-'));
|
||||
slugDir = path.join(tmpDir, 'projects');
|
||||
fs.mkdirSync(slugDir, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function findLearningsFile(): string | null {
|
||||
const projectDirs = fs.readdirSync(slugDir);
|
||||
if (projectDirs.length === 0) return null;
|
||||
const f = path.join(slugDir, projectDirs[0], 'learnings.jsonl');
|
||||
return fs.existsSync(f) ? f : null;
|
||||
}
|
||||
|
||||
describe('gstack-learnings-log', () => {
|
||||
test('appends valid JSON to learnings.jsonl', () => {
|
||||
const input = '{"skill":"review","type":"pattern","key":"test-key","insight":"test insight","confidence":8,"source":"observed"}';
|
||||
const result = runLog(input);
|
||||
expect(result.exitCode).toBe(0);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const content = fs.readFileSync(f!, 'utf-8').trim();
|
||||
const parsed = JSON.parse(content);
|
||||
expect(parsed.skill).toBe('review');
|
||||
expect(parsed.key).toBe('test-key');
|
||||
expect(parsed.confidence).toBe(8);
|
||||
});
|
||||
|
||||
test('auto-injects timestamp when ts is missing', () => {
|
||||
const input = '{"skill":"review","type":"pattern","key":"ts-test","insight":"test","confidence":5,"source":"observed"}';
|
||||
runLog(input);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.ts).toBeDefined();
|
||||
expect(new Date(parsed.ts).getTime()).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test('rejects non-JSON input with non-zero exit code', () => {
|
||||
const result = runLog('not json at all', { expectFail: true });
|
||||
expect(result.exitCode).not.toBe(0);
|
||||
});
|
||||
|
||||
test('append-only: duplicate keys create multiple entries', () => {
|
||||
const input1 = '{"skill":"review","type":"pattern","key":"dup-key","insight":"first version","confidence":6,"source":"observed"}';
|
||||
const input2 = '{"skill":"review","type":"pattern","key":"dup-key","insight":"second version","confidence":8,"source":"observed"}';
|
||||
runLog(input1);
|
||||
runLog(input2);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const lines = fs.readFileSync(f!, 'utf-8').trim().split('\n');
|
||||
expect(lines.length).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-learnings-search', () => {
|
||||
test('returns empty and exits 0 when no learnings file exists', () => {
|
||||
const output = runSearch();
|
||||
expect(output).toBe('');
|
||||
});
|
||||
|
||||
test('returns formatted output when learnings exist', () => {
|
||||
runLog('{"skill":"review","type":"pattern","key":"test-search","insight":"search test insight","confidence":7,"source":"observed"}');
|
||||
const output = runSearch();
|
||||
expect(output).toContain('LEARNINGS:');
|
||||
expect(output).toContain('test-search');
|
||||
expect(output).toContain('search test insight');
|
||||
});
|
||||
|
||||
test('deduplicates entries by key+type (latest wins)', () => {
|
||||
const old = JSON.stringify({ skill: 'review', type: 'pattern', key: 'dedup-test', insight: 'old version', confidence: 5, source: 'observed', ts: '2026-01-01T00:00:00Z' });
|
||||
const newer = JSON.stringify({ skill: 'review', type: 'pattern', key: 'dedup-test', insight: 'new version', confidence: 8, source: 'observed', ts: '2026-03-28T00:00:00Z' });
|
||||
runLog(old);
|
||||
runLog(newer);
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('new version');
|
||||
expect(output).not.toContain('old version');
|
||||
expect(output).toContain('1 loaded');
|
||||
});
|
||||
|
||||
test('filters by --type', () => {
|
||||
runLog('{"skill":"review","type":"pattern","key":"p1","insight":"a pattern","confidence":7,"source":"observed"}');
|
||||
runLog('{"skill":"review","type":"pitfall","key":"p2","insight":"a pitfall","confidence":7,"source":"observed"}');
|
||||
|
||||
const patternOnly = runSearch('--type pattern');
|
||||
expect(patternOnly).toContain('p1');
|
||||
expect(patternOnly).not.toContain('p2');
|
||||
});
|
||||
|
||||
test('filters by --query', () => {
|
||||
runLog('{"skill":"review","type":"pattern","key":"auth-bypass","insight":"check session tokens","confidence":7,"source":"observed"}');
|
||||
runLog('{"skill":"review","type":"pattern","key":"n-plus-one","insight":"use includes for associations","confidence":7,"source":"observed"}');
|
||||
|
||||
const authOnly = runSearch('--query auth');
|
||||
expect(authOnly).toContain('auth-bypass');
|
||||
expect(authOnly).not.toContain('n-plus-one');
|
||||
});
|
||||
|
||||
test('respects --limit', () => {
|
||||
for (let i = 0; i < 5; i++) {
|
||||
runLog(`{"skill":"review","type":"pattern","key":"limit-${i}","insight":"insight ${i}","confidence":7,"source":"observed"}`);
|
||||
}
|
||||
|
||||
const limited = runSearch('--limit 2');
|
||||
// Should show 2, not 5
|
||||
expect(limited).toContain('2 loaded');
|
||||
});
|
||||
|
||||
test('applies confidence decay for observed/inferred sources', () => {
|
||||
// Entry from 90 days ago with source=observed, confidence=8
|
||||
// Should decay to 8 - floor(90/30) = 8 - 3 = 5
|
||||
const ts = new Date(Date.now() - 90 * 86400000).toISOString();
|
||||
runLog(`{"skill":"review","type":"pattern","key":"decay-test","insight":"old observation","confidence":8,"source":"observed","ts":"${ts}"}`);
|
||||
|
||||
const output = runSearch();
|
||||
// Should show confidence 5 (decayed from 8)
|
||||
expect(output).toContain('confidence: 5/10');
|
||||
});
|
||||
|
||||
test('does NOT decay user-stated learnings', () => {
|
||||
const ts = new Date(Date.now() - 90 * 86400000).toISOString();
|
||||
runLog(`{"skill":"review","type":"preference","key":"no-decay-test","insight":"user preference","confidence":9,"source":"user-stated","ts":"${ts}"}`);
|
||||
|
||||
const output = runSearch();
|
||||
// Should still show confidence 9 (no decay for user-stated)
|
||||
expect(output).toContain('confidence: 9/10');
|
||||
});
|
||||
|
||||
test('skips malformed JSONL lines gracefully', () => {
|
||||
// Write a valid entry, then manually append a bad line
|
||||
runLog('{"skill":"review","type":"pattern","key":"valid-entry","insight":"valid","confidence":7,"source":"observed"}');
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
fs.appendFileSync(f!, '\nthis is not json\n');
|
||||
fs.appendFileSync(f!, '{"skill":"review","type":"pattern","key":"also-valid","insight":"also valid","confidence":6,"source":"observed","ts":"2026-03-28T00:00:00Z"}\n');
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('valid-entry');
|
||||
expect(output).toContain('also-valid');
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-learnings-log edge cases', () => {
|
||||
test('preserves existing timestamp when ts is present', () => {
|
||||
const input = '{"skill":"review","type":"pattern","key":"ts-preserve","insight":"test","confidence":5,"source":"observed","ts":"2025-06-15T10:00:00Z"}';
|
||||
runLog(input);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.ts).toBe('2025-06-15T10:00:00Z');
|
||||
});
|
||||
|
||||
test('handles JSON with special characters in insight', () => {
|
||||
const input = JSON.stringify({ skill: 'review', type: 'pattern', key: 'special-chars', insight: 'Use "quotes" and \\backslashes', confidence: 7, source: 'observed' });
|
||||
runLog(input);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.insight).toContain('quotes');
|
||||
expect(parsed.insight).toContain('backslashes');
|
||||
});
|
||||
|
||||
test('handles JSON with files array field', () => {
|
||||
const input = JSON.stringify({ skill: 'review', type: 'architecture', key: 'with-files', insight: 'test', confidence: 8, source: 'observed', files: ['src/auth.ts', 'src/db.ts'] });
|
||||
runLog(input);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.files).toEqual(['src/auth.ts', 'src/db.ts']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-learnings-search edge cases', () => {
|
||||
test('sorts by confidence then recency', () => {
|
||||
// Two entries: one high confidence old, one lower confidence recent
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'high-conf', insight: 'high confidence entry', confidence: 9, source: 'user-stated', ts: '2026-01-01T00:00:00Z' }));
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'recent', insight: 'recent entry', confidence: 5, source: 'observed', ts: '2026-03-28T00:00:00Z' }));
|
||||
|
||||
const output = runSearch();
|
||||
const highIdx = output.indexOf('high-conf');
|
||||
const recentIdx = output.indexOf('recent');
|
||||
// High confidence should appear first
|
||||
expect(highIdx).toBeLessThan(recentIdx);
|
||||
});
|
||||
|
||||
test('groups output by type', () => {
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'p1', insight: 'a pattern', confidence: 7, source: 'observed' }));
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pitfall', key: 'pit1', insight: 'a pitfall', confidence: 7, source: 'observed' }));
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('## Patterns');
|
||||
expect(output).toContain('## Pitfalls');
|
||||
});
|
||||
|
||||
test('combined --type and --query filtering', () => {
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'auth-token', insight: 'check token expiry', confidence: 7, source: 'observed' }));
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pitfall', key: 'auth-leak', insight: 'auth token in logs', confidence: 7, source: 'observed' }));
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'cache-key', insight: 'cache invalidation', confidence: 7, source: 'observed' }));
|
||||
|
||||
const output = runSearch('--type pattern --query auth');
|
||||
expect(output).toContain('auth-token');
|
||||
expect(output).not.toContain('auth-leak'); // wrong type
|
||||
expect(output).not.toContain('cache-key'); // wrong query
|
||||
});
|
||||
|
||||
test('entries with missing key or type are skipped', () => {
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'valid', insight: 'valid entry', confidence: 7, source: 'observed' }));
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
// Append entries missing key and type
|
||||
fs.appendFileSync(f!, JSON.stringify({ skill: 'review', type: 'pattern', insight: 'no key', confidence: 7, source: 'observed' }) + '\n');
|
||||
fs.appendFileSync(f!, JSON.stringify({ skill: 'review', key: 'no-type', insight: 'no type', confidence: 7, source: 'observed' }) + '\n');
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('valid');
|
||||
expect(output).not.toContain('no key');
|
||||
expect(output).not.toContain('no-type');
|
||||
});
|
||||
|
||||
test('confidence decay floors at 0 (never negative)', () => {
|
||||
// Entry from 1 year ago with confidence 3 — decay would be 12, clamped to 0
|
||||
const ts = new Date(Date.now() - 365 * 86400000).toISOString();
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'ancient', insight: 'very old', confidence: 3, source: 'observed', ts }));
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('confidence: 0/10');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,132 @@
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { runSkillTest } from './helpers/session-runner';
|
||||
import {
|
||||
ROOT, runId, evalsEnabled,
|
||||
describeIfSelected, testConcurrentIfSelected,
|
||||
copyDirSync, logCost, recordE2E,
|
||||
createEvalCollector, finalizeEvalCollector,
|
||||
} from './helpers/e2e-helpers';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const evalCollector = createEvalCollector('e2e-learnings');
|
||||
|
||||
// --- Learnings E2E: seed learnings, run /learn, verify output ---
|
||||
|
||||
describeIfSelected('Learnings E2E', ['learnings-show'], () => {
|
||||
let workDir: string;
|
||||
let gstackHome: string;
|
||||
|
||||
beforeAll(() => {
|
||||
workDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-learnings-'));
|
||||
gstackHome = path.join(workDir, '.gstack-home');
|
||||
|
||||
// Init git repo
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 });
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
fs.writeFileSync(path.join(workDir, 'app.ts'), 'console.log("hello");\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
// Copy the /learn skill
|
||||
copyDirSync(path.join(ROOT, 'learn'), path.join(workDir, 'learn'));
|
||||
|
||||
// Copy bin scripts needed by /learn
|
||||
const binDir = path.join(workDir, 'bin');
|
||||
fs.mkdirSync(binDir, { recursive: true });
|
||||
for (const script of ['gstack-learnings-search', 'gstack-learnings-log', 'gstack-slug']) {
|
||||
fs.copyFileSync(path.join(ROOT, 'bin', script), path.join(binDir, script));
|
||||
fs.chmodSync(path.join(binDir, script), 0o755);
|
||||
}
|
||||
|
||||
// Seed learnings JSONL with 3 entries of different types
|
||||
const slug = 'test-project';
|
||||
const projectDir = path.join(gstackHome, 'projects', slug);
|
||||
fs.mkdirSync(projectDir, { recursive: true });
|
||||
|
||||
const learnings = [
|
||||
{
|
||||
skill: 'review', type: 'pattern', key: 'n-plus-one-queries',
|
||||
insight: 'ActiveRecord associations in loops cause N+1 queries. Always use includes/preload.',
|
||||
confidence: 9, source: 'observed', ts: new Date().toISOString(),
|
||||
files: ['app/models/user.rb'],
|
||||
},
|
||||
{
|
||||
skill: 'investigate', type: 'pitfall', key: 'stale-cache-after-deploy',
|
||||
insight: 'Redis cache not invalidated on deploy causes stale data for 5 minutes.',
|
||||
confidence: 7, source: 'observed', ts: new Date().toISOString(),
|
||||
files: ['config/redis.yml'],
|
||||
},
|
||||
{
|
||||
skill: 'ship', type: 'preference', key: 'always-run-rubocop',
|
||||
insight: 'User wants rubocop to run before every commit, no exceptions.',
|
||||
confidence: 10, source: 'user-stated', ts: new Date().toISOString(),
|
||||
},
|
||||
];
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(projectDir, 'learnings.jsonl'),
|
||||
learnings.map(l => JSON.stringify(l)).join('\n') + '\n',
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
finalizeEvalCollector(evalCollector);
|
||||
});
|
||||
|
||||
testConcurrentIfSelected('learnings-show', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read the file learn/SKILL.md for the /learn skill instructions.
|
||||
|
||||
Run the /learn command (no arguments — show recent learnings).
|
||||
|
||||
IMPORTANT:
|
||||
- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
|
||||
- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
|
||||
Replace any references to ~/.claude/skills/gstack/bin/ with ./bin/ when running commands.
|
||||
- Replace any references to ~/.claude/skills/gstack/bin/gstack-slug with ./bin/gstack-slug.
|
||||
- Do NOT use AskUserQuestion.
|
||||
- Do NOT implement code changes.
|
||||
- Just show the learnings and summarize what you found.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 15,
|
||||
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
|
||||
timeout: 120_000,
|
||||
testName: 'learnings-show',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/learn show', result);
|
||||
|
||||
const output = result.output.toLowerCase();
|
||||
|
||||
// The agent should have found and displayed the seeded learnings
|
||||
const mentionsNPlusOne = output.includes('n-plus-one') || output.includes('n+1');
|
||||
const mentionsCache = output.includes('stale') || output.includes('cache');
|
||||
const mentionsRubocop = output.includes('rubocop');
|
||||
|
||||
// At least 2 of 3 learnings should appear in the output
|
||||
const foundCount = [mentionsNPlusOne, mentionsCache, mentionsRubocop].filter(Boolean).length;
|
||||
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, '/learn', 'Learnings show E2E', result, {
|
||||
passed: exitOk && foundCount >= 2,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(foundCount).toBeGreaterThanOrEqual(2);
|
||||
|
||||
if (foundCount === 3) {
|
||||
console.log('All 3 seeded learnings found in output');
|
||||
} else {
|
||||
console.warn(`Only ${foundCount}/3 learnings found (N+1: ${mentionsNPlusOne}, cache: ${mentionsCache}, rubocop: ${mentionsRubocop})`);
|
||||
}
|
||||
}, 180_000);
|
||||
});
|
||||
Reference in New Issue
Block a user