diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts index 981459b2..b475daad 100644 --- a/test/helpers/touchfiles.ts +++ b/test/helpers/touchfiles.ts @@ -95,6 +95,9 @@ export const E2E_TOUCHFILES: Record = { 'cso-diff-mode': ['cso/**'], 'cso-infra-scope': ['cso/**'], + // Learnings + 'learnings-show': ['learn/**', 'bin/gstack-learnings-search', 'bin/gstack-learnings-log', 'scripts/resolvers/learnings.ts'], + // Document-release 'document-release': ['document-release/**'], @@ -238,6 +241,9 @@ export const E2E_TIERS: Record = { 'cso-diff-mode': 'gate', 'cso-infra-scope': 'periodic', + // Learnings — gate (functional guardrail: seeded learnings must appear) + 'learnings-show': 'gate', + // Document-release — gate (CHANGELOG guardrail) 'document-release': 'gate', diff --git a/test/skill-e2e-learnings.test.ts b/test/skill-e2e-learnings.test.ts new file mode 100644 index 00000000..dfd18513 --- /dev/null +++ b/test/skill-e2e-learnings.test.ts @@ -0,0 +1,132 @@ +import { describe, test, expect, beforeAll, afterAll } from 'bun:test'; +import { runSkillTest } from './helpers/session-runner'; +import { + ROOT, runId, evalsEnabled, + describeIfSelected, testConcurrentIfSelected, + copyDirSync, logCost, recordE2E, + createEvalCollector, finalizeEvalCollector, +} from './helpers/e2e-helpers'; +import { spawnSync } from 'child_process'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; + +const evalCollector = createEvalCollector('e2e-learnings'); + +// --- Learnings E2E: seed learnings, run /learn, verify output --- + +describeIfSelected('Learnings E2E', ['learnings-show'], () => { + let workDir: string; + let gstackHome: string; + + beforeAll(() => { + workDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-learnings-')); + gstackHome = path.join(workDir, '.gstack-home'); + + // Init git repo + const run = (cmd: string, args: string[]) => + spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 }); + run('git', ['init', '-b', 'main']); + run('git', ['config', 'user.email', 'test@test.com']); + run('git', ['config', 'user.name', 'Test']); + fs.writeFileSync(path.join(workDir, 'app.ts'), 'console.log("hello");\n'); + run('git', ['add', '.']); + run('git', ['commit', '-m', 'initial']); + + // Copy the /learn skill + copyDirSync(path.join(ROOT, 'learn'), path.join(workDir, 'learn')); + + // Copy bin scripts needed by /learn + const binDir = path.join(workDir, 'bin'); + fs.mkdirSync(binDir, { recursive: true }); + for (const script of ['gstack-learnings-search', 'gstack-learnings-log', 'gstack-slug']) { + fs.copyFileSync(path.join(ROOT, 'bin', script), path.join(binDir, script)); + fs.chmodSync(path.join(binDir, script), 0o755); + } + + // Seed learnings JSONL with 3 entries of different types + const slug = 'test-project'; + const projectDir = path.join(gstackHome, 'projects', slug); + fs.mkdirSync(projectDir, { recursive: true }); + + const learnings = [ + { + skill: 'review', type: 'pattern', key: 'n-plus-one-queries', + insight: 'ActiveRecord associations in loops cause N+1 queries. Always use includes/preload.', + confidence: 9, source: 'observed', ts: new Date().toISOString(), + files: ['app/models/user.rb'], + }, + { + skill: 'investigate', type: 'pitfall', key: 'stale-cache-after-deploy', + insight: 'Redis cache not invalidated on deploy causes stale data for 5 minutes.', + confidence: 7, source: 'observed', ts: new Date().toISOString(), + files: ['config/redis.yml'], + }, + { + skill: 'ship', type: 'preference', key: 'always-run-rubocop', + insight: 'User wants rubocop to run before every commit, no exceptions.', + confidence: 10, source: 'user-stated', ts: new Date().toISOString(), + }, + ]; + + fs.writeFileSync( + path.join(projectDir, 'learnings.jsonl'), + learnings.map(l => JSON.stringify(l)).join('\n') + '\n', + ); + }); + + afterAll(() => { + try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {} + finalizeEvalCollector(evalCollector); + }); + + testConcurrentIfSelected('learnings-show', async () => { + const result = await runSkillTest({ + prompt: `Read the file learn/SKILL.md for the /learn skill instructions. + +Run the /learn command (no arguments — show recent learnings). + +IMPORTANT: +- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts. +- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/. + Replace any references to ~/.claude/skills/gstack/bin/ with ./bin/ when running commands. +- Replace any references to ~/.claude/skills/gstack/bin/gstack-slug with ./bin/gstack-slug. +- Do NOT use AskUserQuestion. +- Do NOT implement code changes. +- Just show the learnings and summarize what you found.`, + workingDirectory: workDir, + maxTurns: 15, + allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'], + timeout: 120_000, + testName: 'learnings-show', + runId, + }); + + logCost('/learn show', result); + + const output = result.output.toLowerCase(); + + // The agent should have found and displayed the seeded learnings + const mentionsNPlusOne = output.includes('n-plus-one') || output.includes('n+1'); + const mentionsCache = output.includes('stale') || output.includes('cache'); + const mentionsRubocop = output.includes('rubocop'); + + // At least 2 of 3 learnings should appear in the output + const foundCount = [mentionsNPlusOne, mentionsCache, mentionsRubocop].filter(Boolean).length; + + const exitOk = ['success', 'error_max_turns'].includes(result.exitReason); + + recordE2E(evalCollector, '/learn', 'Learnings show E2E', result, { + passed: exitOk && foundCount >= 2, + }); + + expect(exitOk).toBe(true); + expect(foundCount).toBeGreaterThanOrEqual(2); + + if (foundCount === 3) { + console.log('All 3 seeded learnings found in output'); + } else { + console.warn(`Only ${foundCount}/3 learnings found (N+1: ${mentionsNPlusOne}, cache: ${mentionsCache}, rubocop: ${mentionsRubocop})`); + } + }, 180_000); +});