mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-07 14:06:42 +02:00
Merge origin/main, resolve CHANGELOG conflict, bump to v0.13.7.0
Main landed v0.13.6.0 (GStack Learns) while this branch had v0.13.6.0 (Community Wave). Resolved by keeping both entries and bumping this branch to v0.13.7.0. Regenerated SKILL.md files to pick up new learn skill and apply the find -exec rm fix from this branch. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2217,3 +2217,113 @@ describe('codex commands must not use inline $(git rev-parse --show-toplevel) fo
|
||||
expect(violations).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Learnings + Confidence Resolver Tests ─────────────────────
|
||||
|
||||
describe('LEARNINGS_SEARCH resolver', () => {
|
||||
const SEARCH_SKILLS = ['review', 'ship', 'plan-eng-review', 'investigate', 'office-hours', 'plan-ceo-review'];
|
||||
|
||||
for (const skill of SEARCH_SKILLS) {
|
||||
test(`${skill} generated SKILL.md contains learnings search`, () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Prior Learnings');
|
||||
expect(content).toContain('gstack-learnings-search');
|
||||
});
|
||||
}
|
||||
|
||||
test('learnings search includes cross-project config check', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('cross_project_learnings');
|
||||
expect(content).toContain('--cross-project');
|
||||
});
|
||||
|
||||
test('learnings search includes AskUserQuestion for first-time cross-project opt-in', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Enable cross-project learnings');
|
||||
expect(content).toContain('project-scoped only');
|
||||
});
|
||||
|
||||
test('learnings search mentions prior learning applied display format', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Prior learning applied');
|
||||
});
|
||||
});
|
||||
|
||||
describe('LEARNINGS_LOG resolver', () => {
|
||||
const LOG_SKILLS = ['review', 'retro', 'investigate'];
|
||||
|
||||
for (const skill of LOG_SKILLS) {
|
||||
test(`${skill} generated SKILL.md contains learnings log`, () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Capture Learnings');
|
||||
expect(content).toContain('gstack-learnings-log');
|
||||
});
|
||||
}
|
||||
|
||||
test('learnings log documents all type values', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
for (const type of ['pattern', 'pitfall', 'preference', 'architecture', 'tool']) {
|
||||
expect(content).toContain(type);
|
||||
}
|
||||
});
|
||||
|
||||
test('learnings log documents all source values', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
for (const source of ['observed', 'user-stated', 'inferred', 'cross-model']) {
|
||||
expect(content).toContain(source);
|
||||
}
|
||||
});
|
||||
|
||||
test('learnings log includes files field for staleness detection', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('"files"');
|
||||
expect(content).toContain('staleness detection');
|
||||
});
|
||||
});
|
||||
|
||||
describe('CONFIDENCE_CALIBRATION resolver', () => {
|
||||
const CONFIDENCE_SKILLS = ['review', 'ship', 'plan-eng-review', 'cso'];
|
||||
|
||||
for (const skill of CONFIDENCE_SKILLS) {
|
||||
test(`${skill} generated SKILL.md contains confidence calibration`, () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Confidence Calibration');
|
||||
expect(content).toContain('confidence score');
|
||||
});
|
||||
}
|
||||
|
||||
test('confidence calibration includes scoring rubric with all tiers', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('9-10');
|
||||
expect(content).toContain('7-8');
|
||||
expect(content).toContain('5-6');
|
||||
expect(content).toContain('3-4');
|
||||
expect(content).toContain('1-2');
|
||||
});
|
||||
|
||||
test('confidence calibration includes display rules', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Show normally');
|
||||
expect(content).toContain('Suppress from main report');
|
||||
});
|
||||
|
||||
test('confidence calibration includes finding format example', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('[P1] (confidence:');
|
||||
expect(content).toContain('SQL injection');
|
||||
});
|
||||
|
||||
test('confidence calibration includes calibration learning feedback loop', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('calibration event');
|
||||
expect(content).toContain('Log the corrected pattern');
|
||||
});
|
||||
|
||||
test('skills without confidence calibration do NOT contain it', () => {
|
||||
// office-hours and retro do NOT use confidence calibration
|
||||
for (const skill of ['office-hours', 'retro']) {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
|
||||
expect(content).not.toContain('## Confidence Calibration');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -95,6 +95,9 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
'cso-diff-mode': ['cso/**'],
|
||||
'cso-infra-scope': ['cso/**'],
|
||||
|
||||
// Learnings
|
||||
'learnings-show': ['learn/**', 'bin/gstack-learnings-search', 'bin/gstack-learnings-log', 'scripts/resolvers/learnings.ts'],
|
||||
|
||||
// Document-release
|
||||
'document-release': ['document-release/**'],
|
||||
|
||||
@@ -238,6 +241,9 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
'cso-diff-mode': 'gate',
|
||||
'cso-infra-scope': 'periodic',
|
||||
|
||||
// Learnings — gate (functional guardrail: seeded learnings must appear)
|
||||
'learnings-show': 'gate',
|
||||
|
||||
// Document-release — gate (CHANGELOG guardrail)
|
||||
'document-release': 'gate',
|
||||
|
||||
|
||||
@@ -0,0 +1,283 @@
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import { execSync, ExecSyncOptionsWithStringEncoding } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BIN = path.join(ROOT, 'bin');
|
||||
|
||||
let tmpDir: string;
|
||||
let slugDir: string;
|
||||
let learningsFile: string;
|
||||
|
||||
function runLog(input: string, opts: { expectFail?: boolean } = {}): { stdout: string; exitCode: number } {
|
||||
const execOpts: ExecSyncOptionsWithStringEncoding = {
|
||||
cwd: ROOT,
|
||||
env: { ...process.env, GSTACK_HOME: tmpDir },
|
||||
encoding: 'utf-8',
|
||||
timeout: 15000,
|
||||
};
|
||||
try {
|
||||
const stdout = execSync(`${BIN}/gstack-learnings-log '${input.replace(/'/g, "'\\''")}'`, execOpts).trim();
|
||||
return { stdout, exitCode: 0 };
|
||||
} catch (e: any) {
|
||||
if (opts.expectFail) {
|
||||
return { stdout: e.stderr?.toString() || '', exitCode: e.status || 1 };
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
function runSearch(args: string = ''): string {
|
||||
const execOpts: ExecSyncOptionsWithStringEncoding = {
|
||||
cwd: ROOT,
|
||||
env: { ...process.env, GSTACK_HOME: tmpDir },
|
||||
encoding: 'utf-8',
|
||||
timeout: 15000,
|
||||
};
|
||||
try {
|
||||
return execSync(`${BIN}/gstack-learnings-search ${args}`, execOpts).trim();
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-learn-'));
|
||||
slugDir = path.join(tmpDir, 'projects');
|
||||
fs.mkdirSync(slugDir, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function findLearningsFile(): string | null {
|
||||
const projectDirs = fs.readdirSync(slugDir);
|
||||
if (projectDirs.length === 0) return null;
|
||||
const f = path.join(slugDir, projectDirs[0], 'learnings.jsonl');
|
||||
return fs.existsSync(f) ? f : null;
|
||||
}
|
||||
|
||||
describe('gstack-learnings-log', () => {
|
||||
test('appends valid JSON to learnings.jsonl', () => {
|
||||
const input = '{"skill":"review","type":"pattern","key":"test-key","insight":"test insight","confidence":8,"source":"observed"}';
|
||||
const result = runLog(input);
|
||||
expect(result.exitCode).toBe(0);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const content = fs.readFileSync(f!, 'utf-8').trim();
|
||||
const parsed = JSON.parse(content);
|
||||
expect(parsed.skill).toBe('review');
|
||||
expect(parsed.key).toBe('test-key');
|
||||
expect(parsed.confidence).toBe(8);
|
||||
});
|
||||
|
||||
test('auto-injects timestamp when ts is missing', () => {
|
||||
const input = '{"skill":"review","type":"pattern","key":"ts-test","insight":"test","confidence":5,"source":"observed"}';
|
||||
runLog(input);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.ts).toBeDefined();
|
||||
expect(new Date(parsed.ts).getTime()).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test('rejects non-JSON input with non-zero exit code', () => {
|
||||
const result = runLog('not json at all', { expectFail: true });
|
||||
expect(result.exitCode).not.toBe(0);
|
||||
});
|
||||
|
||||
test('append-only: duplicate keys create multiple entries', () => {
|
||||
const input1 = '{"skill":"review","type":"pattern","key":"dup-key","insight":"first version","confidence":6,"source":"observed"}';
|
||||
const input2 = '{"skill":"review","type":"pattern","key":"dup-key","insight":"second version","confidence":8,"source":"observed"}';
|
||||
runLog(input1);
|
||||
runLog(input2);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const lines = fs.readFileSync(f!, 'utf-8').trim().split('\n');
|
||||
expect(lines.length).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-learnings-search', () => {
|
||||
test('returns empty and exits 0 when no learnings file exists', () => {
|
||||
const output = runSearch();
|
||||
expect(output).toBe('');
|
||||
});
|
||||
|
||||
test('returns formatted output when learnings exist', () => {
|
||||
runLog('{"skill":"review","type":"pattern","key":"test-search","insight":"search test insight","confidence":7,"source":"observed"}');
|
||||
const output = runSearch();
|
||||
expect(output).toContain('LEARNINGS:');
|
||||
expect(output).toContain('test-search');
|
||||
expect(output).toContain('search test insight');
|
||||
});
|
||||
|
||||
test('deduplicates entries by key+type (latest wins)', () => {
|
||||
const old = JSON.stringify({ skill: 'review', type: 'pattern', key: 'dedup-test', insight: 'old version', confidence: 5, source: 'observed', ts: '2026-01-01T00:00:00Z' });
|
||||
const newer = JSON.stringify({ skill: 'review', type: 'pattern', key: 'dedup-test', insight: 'new version', confidence: 8, source: 'observed', ts: '2026-03-28T00:00:00Z' });
|
||||
runLog(old);
|
||||
runLog(newer);
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('new version');
|
||||
expect(output).not.toContain('old version');
|
||||
expect(output).toContain('1 loaded');
|
||||
});
|
||||
|
||||
test('filters by --type', () => {
|
||||
runLog('{"skill":"review","type":"pattern","key":"p1","insight":"a pattern","confidence":7,"source":"observed"}');
|
||||
runLog('{"skill":"review","type":"pitfall","key":"p2","insight":"a pitfall","confidence":7,"source":"observed"}');
|
||||
|
||||
const patternOnly = runSearch('--type pattern');
|
||||
expect(patternOnly).toContain('p1');
|
||||
expect(patternOnly).not.toContain('p2');
|
||||
});
|
||||
|
||||
test('filters by --query', () => {
|
||||
runLog('{"skill":"review","type":"pattern","key":"auth-bypass","insight":"check session tokens","confidence":7,"source":"observed"}');
|
||||
runLog('{"skill":"review","type":"pattern","key":"n-plus-one","insight":"use includes for associations","confidence":7,"source":"observed"}');
|
||||
|
||||
const authOnly = runSearch('--query auth');
|
||||
expect(authOnly).toContain('auth-bypass');
|
||||
expect(authOnly).not.toContain('n-plus-one');
|
||||
});
|
||||
|
||||
test('respects --limit', () => {
|
||||
for (let i = 0; i < 5; i++) {
|
||||
runLog(`{"skill":"review","type":"pattern","key":"limit-${i}","insight":"insight ${i}","confidence":7,"source":"observed"}`);
|
||||
}
|
||||
|
||||
const limited = runSearch('--limit 2');
|
||||
// Should show 2, not 5
|
||||
expect(limited).toContain('2 loaded');
|
||||
});
|
||||
|
||||
test('applies confidence decay for observed/inferred sources', () => {
|
||||
// Entry from 90 days ago with source=observed, confidence=8
|
||||
// Should decay to 8 - floor(90/30) = 8 - 3 = 5
|
||||
const ts = new Date(Date.now() - 90 * 86400000).toISOString();
|
||||
runLog(`{"skill":"review","type":"pattern","key":"decay-test","insight":"old observation","confidence":8,"source":"observed","ts":"${ts}"}`);
|
||||
|
||||
const output = runSearch();
|
||||
// Should show confidence 5 (decayed from 8)
|
||||
expect(output).toContain('confidence: 5/10');
|
||||
});
|
||||
|
||||
test('does NOT decay user-stated learnings', () => {
|
||||
const ts = new Date(Date.now() - 90 * 86400000).toISOString();
|
||||
runLog(`{"skill":"review","type":"preference","key":"no-decay-test","insight":"user preference","confidence":9,"source":"user-stated","ts":"${ts}"}`);
|
||||
|
||||
const output = runSearch();
|
||||
// Should still show confidence 9 (no decay for user-stated)
|
||||
expect(output).toContain('confidence: 9/10');
|
||||
});
|
||||
|
||||
test('skips malformed JSONL lines gracefully', () => {
|
||||
// Write a valid entry, then manually append a bad line
|
||||
runLog('{"skill":"review","type":"pattern","key":"valid-entry","insight":"valid","confidence":7,"source":"observed"}');
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
fs.appendFileSync(f!, '\nthis is not json\n');
|
||||
fs.appendFileSync(f!, '{"skill":"review","type":"pattern","key":"also-valid","insight":"also valid","confidence":6,"source":"observed","ts":"2026-03-28T00:00:00Z"}\n');
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('valid-entry');
|
||||
expect(output).toContain('also-valid');
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-learnings-log edge cases', () => {
|
||||
test('preserves existing timestamp when ts is present', () => {
|
||||
const input = '{"skill":"review","type":"pattern","key":"ts-preserve","insight":"test","confidence":5,"source":"observed","ts":"2025-06-15T10:00:00Z"}';
|
||||
runLog(input);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.ts).toBe('2025-06-15T10:00:00Z');
|
||||
});
|
||||
|
||||
test('handles JSON with special characters in insight', () => {
|
||||
const input = JSON.stringify({ skill: 'review', type: 'pattern', key: 'special-chars', insight: 'Use "quotes" and \\backslashes', confidence: 7, source: 'observed' });
|
||||
runLog(input);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.insight).toContain('quotes');
|
||||
expect(parsed.insight).toContain('backslashes');
|
||||
});
|
||||
|
||||
test('handles JSON with files array field', () => {
|
||||
const input = JSON.stringify({ skill: 'review', type: 'architecture', key: 'with-files', insight: 'test', confidence: 8, source: 'observed', files: ['src/auth.ts', 'src/db.ts'] });
|
||||
runLog(input);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.files).toEqual(['src/auth.ts', 'src/db.ts']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-learnings-search edge cases', () => {
|
||||
test('sorts by confidence then recency', () => {
|
||||
// Two entries: one high confidence old, one lower confidence recent
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'high-conf', insight: 'high confidence entry', confidence: 9, source: 'user-stated', ts: '2026-01-01T00:00:00Z' }));
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'recent', insight: 'recent entry', confidence: 5, source: 'observed', ts: '2026-03-28T00:00:00Z' }));
|
||||
|
||||
const output = runSearch();
|
||||
const highIdx = output.indexOf('high-conf');
|
||||
const recentIdx = output.indexOf('recent');
|
||||
// High confidence should appear first
|
||||
expect(highIdx).toBeLessThan(recentIdx);
|
||||
});
|
||||
|
||||
test('groups output by type', () => {
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'p1', insight: 'a pattern', confidence: 7, source: 'observed' }));
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pitfall', key: 'pit1', insight: 'a pitfall', confidence: 7, source: 'observed' }));
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('## Patterns');
|
||||
expect(output).toContain('## Pitfalls');
|
||||
});
|
||||
|
||||
test('combined --type and --query filtering', () => {
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'auth-token', insight: 'check token expiry', confidence: 7, source: 'observed' }));
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pitfall', key: 'auth-leak', insight: 'auth token in logs', confidence: 7, source: 'observed' }));
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'cache-key', insight: 'cache invalidation', confidence: 7, source: 'observed' }));
|
||||
|
||||
const output = runSearch('--type pattern --query auth');
|
||||
expect(output).toContain('auth-token');
|
||||
expect(output).not.toContain('auth-leak'); // wrong type
|
||||
expect(output).not.toContain('cache-key'); // wrong query
|
||||
});
|
||||
|
||||
test('entries with missing key or type are skipped', () => {
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'valid', insight: 'valid entry', confidence: 7, source: 'observed' }));
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
// Append entries missing key and type
|
||||
fs.appendFileSync(f!, JSON.stringify({ skill: 'review', type: 'pattern', insight: 'no key', confidence: 7, source: 'observed' }) + '\n');
|
||||
fs.appendFileSync(f!, JSON.stringify({ skill: 'review', key: 'no-type', insight: 'no type', confidence: 7, source: 'observed' }) + '\n');
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('valid');
|
||||
expect(output).not.toContain('no key');
|
||||
expect(output).not.toContain('no-type');
|
||||
});
|
||||
|
||||
test('confidence decay floors at 0 (never negative)', () => {
|
||||
// Entry from 1 year ago with confidence 3 — decay would be 12, clamped to 0
|
||||
const ts = new Date(Date.now() - 365 * 86400000).toISOString();
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'ancient', insight: 'very old', confidence: 3, source: 'observed', ts }));
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('confidence: 0/10');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,132 @@
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { runSkillTest } from './helpers/session-runner';
|
||||
import {
|
||||
ROOT, runId, evalsEnabled,
|
||||
describeIfSelected, testConcurrentIfSelected,
|
||||
copyDirSync, logCost, recordE2E,
|
||||
createEvalCollector, finalizeEvalCollector,
|
||||
} from './helpers/e2e-helpers';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const evalCollector = createEvalCollector('e2e-learnings');
|
||||
|
||||
// --- Learnings E2E: seed learnings, run /learn, verify output ---
|
||||
|
||||
describeIfSelected('Learnings E2E', ['learnings-show'], () => {
|
||||
let workDir: string;
|
||||
let gstackHome: string;
|
||||
|
||||
beforeAll(() => {
|
||||
workDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-learnings-'));
|
||||
gstackHome = path.join(workDir, '.gstack-home');
|
||||
|
||||
// Init git repo
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 });
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
fs.writeFileSync(path.join(workDir, 'app.ts'), 'console.log("hello");\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
// Copy the /learn skill
|
||||
copyDirSync(path.join(ROOT, 'learn'), path.join(workDir, 'learn'));
|
||||
|
||||
// Copy bin scripts needed by /learn
|
||||
const binDir = path.join(workDir, 'bin');
|
||||
fs.mkdirSync(binDir, { recursive: true });
|
||||
for (const script of ['gstack-learnings-search', 'gstack-learnings-log', 'gstack-slug']) {
|
||||
fs.copyFileSync(path.join(ROOT, 'bin', script), path.join(binDir, script));
|
||||
fs.chmodSync(path.join(binDir, script), 0o755);
|
||||
}
|
||||
|
||||
// Seed learnings JSONL with 3 entries of different types
|
||||
const slug = 'test-project';
|
||||
const projectDir = path.join(gstackHome, 'projects', slug);
|
||||
fs.mkdirSync(projectDir, { recursive: true });
|
||||
|
||||
const learnings = [
|
||||
{
|
||||
skill: 'review', type: 'pattern', key: 'n-plus-one-queries',
|
||||
insight: 'ActiveRecord associations in loops cause N+1 queries. Always use includes/preload.',
|
||||
confidence: 9, source: 'observed', ts: new Date().toISOString(),
|
||||
files: ['app/models/user.rb'],
|
||||
},
|
||||
{
|
||||
skill: 'investigate', type: 'pitfall', key: 'stale-cache-after-deploy',
|
||||
insight: 'Redis cache not invalidated on deploy causes stale data for 5 minutes.',
|
||||
confidence: 7, source: 'observed', ts: new Date().toISOString(),
|
||||
files: ['config/redis.yml'],
|
||||
},
|
||||
{
|
||||
skill: 'ship', type: 'preference', key: 'always-run-rubocop',
|
||||
insight: 'User wants rubocop to run before every commit, no exceptions.',
|
||||
confidence: 10, source: 'user-stated', ts: new Date().toISOString(),
|
||||
},
|
||||
];
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(projectDir, 'learnings.jsonl'),
|
||||
learnings.map(l => JSON.stringify(l)).join('\n') + '\n',
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
finalizeEvalCollector(evalCollector);
|
||||
});
|
||||
|
||||
testConcurrentIfSelected('learnings-show', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read the file learn/SKILL.md for the /learn skill instructions.
|
||||
|
||||
Run the /learn command (no arguments — show recent learnings).
|
||||
|
||||
IMPORTANT:
|
||||
- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
|
||||
- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
|
||||
Replace any references to ~/.claude/skills/gstack/bin/ with ./bin/ when running commands.
|
||||
- Replace any references to ~/.claude/skills/gstack/bin/gstack-slug with ./bin/gstack-slug.
|
||||
- Do NOT use AskUserQuestion.
|
||||
- Do NOT implement code changes.
|
||||
- Just show the learnings and summarize what you found.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 15,
|
||||
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
|
||||
timeout: 120_000,
|
||||
testName: 'learnings-show',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/learn show', result);
|
||||
|
||||
const output = result.output.toLowerCase();
|
||||
|
||||
// The agent should have found and displayed the seeded learnings
|
||||
const mentionsNPlusOne = output.includes('n-plus-one') || output.includes('n+1');
|
||||
const mentionsCache = output.includes('stale') || output.includes('cache');
|
||||
const mentionsRubocop = output.includes('rubocop');
|
||||
|
||||
// At least 2 of 3 learnings should appear in the output
|
||||
const foundCount = [mentionsNPlusOne, mentionsCache, mentionsRubocop].filter(Boolean).length;
|
||||
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, '/learn', 'Learnings show E2E', result, {
|
||||
passed: exitOk && foundCount >= 2,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(foundCount).toBeGreaterThanOrEqual(2);
|
||||
|
||||
if (foundCount === 3) {
|
||||
console.log('All 3 seeded learnings found in output');
|
||||
} else {
|
||||
console.warn(`Only ${foundCount}/3 learnings found (N+1: ${mentionsNPlusOne}, cache: ${mentionsCache}, rubocop: ${mentionsRubocop})`);
|
||||
}
|
||||
}, 180_000);
|
||||
});
|
||||
Reference in New Issue
Block a user