From 835cc172ef78954a02967a7cd42698785b7a1825 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 29 Mar 2026 21:09:11 -0700 Subject: [PATCH] test: add operational-learning E2E test (gate-tier) Validates the write path: agent encounters a CLI failure, logs an operational learning to JSONL via gstack-learnings-log. Replaces the removed contributor-mode E2E test. Setup: temp git repo, copy bin scripts, set GSTACK_HOME. Prompt: simulated npm test failure needing --experimental-vm-modules. Assert: learnings.jsonl exists with type=operational entry. Co-Authored-By: Claude Opus 4.6 (1M context) --- test/helpers/touchfiles.ts | 2 + test/skill-e2e-bws.test.ts | 93 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts index f28a933e..d21ef347 100644 --- a/test/helpers/touchfiles.ts +++ b/test/helpers/touchfiles.ts @@ -42,6 +42,7 @@ export const E2E_TOUCHFILES: Record = { 'skillmd-outside-git': ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'], 'session-awareness': ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'], + 'operational-learning': ['scripts/resolvers/preamble.ts', 'bin/gstack-learnings-log'], // QA (+ test-server dependency) 'qa-quick': ['qa/**', 'browse/src/**', 'browse/test/test-server.ts'], @@ -182,6 +183,7 @@ export const E2E_TIERS: Record = { 'skillmd-no-local-binary': 'gate', 'skillmd-outside-git': 'gate', 'session-awareness': 'gate', + 'operational-learning': 'gate', // QA — gate for functional, periodic for quality/benchmarks 'qa-quick': 'gate', diff --git a/test/skill-e2e-bws.test.ts b/test/skill-e2e-bws.test.ts index d207f416..c1a1be15 100644 --- a/test/skill-e2e-bws.test.ts +++ b/test/skill-e2e-bws.test.ts @@ -20,6 +20,7 @@ let tmpDir: string; describeIfSelected('Skill E2E tests', [ 'browse-basic', 'browse-snapshot', 'skillmd-setup-discovery', 'skillmd-no-local-binary', 'skillmd-outside-git', 'session-awareness', + 'operational-learning', ], () => { beforeAll(() => { testServer = startTestServer(); @@ -177,6 +178,98 @@ Report the exact output — either "READY: " or "NEEDS_SETUP".`, try { fs.rmSync(nonGitDir, { recursive: true, force: true }); } catch {} }, 60_000); + testConcurrentIfSelected('operational-learning', async () => { + const opDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-oplearn-')); + const gstackHome = path.join(opDir, '.gstack-home'); + + // Init git repo + const run = (cmd: string, args: string[]) => + spawnSync(cmd, args, { cwd: opDir, stdio: 'pipe', timeout: 5000 }); + run('git', ['init', '-b', 'main']); + run('git', ['config', 'user.email', 'test@test.com']); + run('git', ['config', 'user.name', 'Test']); + fs.writeFileSync(path.join(opDir, 'app.ts'), 'console.log("hello");\n'); + run('git', ['add', '.']); + run('git', ['commit', '-m', 'initial']); + + // Copy bin scripts + const binDir = path.join(opDir, 'bin'); + fs.mkdirSync(binDir, { recursive: true }); + for (const script of ['gstack-learnings-log', 'gstack-slug']) { + fs.copyFileSync(path.join(ROOT, 'bin', script), path.join(binDir, script)); + fs.chmodSync(path.join(binDir, script), 0o755); + } + + // gstack-learnings-log will create the project dir automatically via gstack-slug + + const result = await runSkillTest({ + prompt: `You just ran \`npm test\` in this project and it failed with this error: + +Error: --experimental-vm-modules flag is required for ESM support in this project. +Run: npm test --experimental-vm-modules + +Per the Operational Self-Improvement instructions below, log an operational learning about this failure. + +## Operational Self-Improvement + +Before completing, reflect on this session: +- Did any commands fail unexpectedly? + +If yes, log an operational learning for future sessions: + +\`\`\`bash +GSTACK_HOME="${gstackHome}" ${binDir}/gstack-learnings-log '{"skill":"qa","type":"operational","key":"SHORT_KEY","insight":"DESCRIPTION","confidence":N,"source":"observed"}' +\`\`\` + +Replace SHORT_KEY with a kebab-case key like "esm-vm-modules-flag". +Replace DESCRIPTION with a one-sentence description of what you learned. +Replace N with a confidence score 1-10. + +Log the operational learning now. Then say what you logged.`, + workingDirectory: opDir, + maxTurns: 5, + timeout: 30_000, + testName: 'operational-learning', + runId, + }); + + logCost('operational learning', result); + + const exitOk = ['success', 'error_max_turns'].includes(result.exitReason); + + // Check if learnings file was created with an operational entry + // The slug is derived from the git repo (dirname), so search all project dirs + let hasOperational = false; + const projectsDir = path.join(gstackHome, 'projects'); + if (fs.existsSync(projectsDir)) { + for (const slug of fs.readdirSync(projectsDir)) { + const lPath = path.join(projectsDir, slug, 'learnings.jsonl'); + if (fs.existsSync(lPath)) { + const jsonl = fs.readFileSync(lPath, 'utf-8').trim(); + if (jsonl) { + const entries = jsonl.split('\n').map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean); + const opEntry = entries.find(e => e.type === 'operational'); + if (opEntry) { + hasOperational = true; + console.log(`Operational learning logged: key="${opEntry.key}" insight="${opEntry.insight}" (slug: ${slug})`); + break; + } + } + } + } + } + + recordE2E(evalCollector, 'operational learning', 'Skill E2E tests', result, { + passed: exitOk && hasOperational, + }); + + expect(exitOk).toBe(true); + expect(hasOperational).toBe(true); + + // Clean up + try { fs.rmSync(opDir, { recursive: true, force: true }); } catch {} + }, 90_000); + testConcurrentIfSelected('session-awareness', async () => { const sessionDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-session-'));