mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-05 13:15:24 +02:00
test: add operational-learning E2E test (gate-tier)
Validates the write path: agent encounters a CLI failure, logs an operational learning to JSONL via gstack-learnings-log. Replaces the removed contributor-mode E2E test. Setup: temp git repo, copy bin scripts, set GSTACK_HOME. Prompt: simulated npm test failure needing --experimental-vm-modules. Assert: learnings.jsonl exists with type=operational entry. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -42,6 +42,7 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
'skillmd-outside-git': ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
|
||||
|
||||
'session-awareness': ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
|
||||
'operational-learning': ['scripts/resolvers/preamble.ts', 'bin/gstack-learnings-log'],
|
||||
|
||||
// QA (+ test-server dependency)
|
||||
'qa-quick': ['qa/**', 'browse/src/**', 'browse/test/test-server.ts'],
|
||||
@@ -182,6 +183,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
'skillmd-no-local-binary': 'gate',
|
||||
'skillmd-outside-git': 'gate',
|
||||
'session-awareness': 'gate',
|
||||
'operational-learning': 'gate',
|
||||
|
||||
// QA — gate for functional, periodic for quality/benchmarks
|
||||
'qa-quick': 'gate',
|
||||
|
||||
@@ -20,6 +20,7 @@ let tmpDir: string;
|
||||
describeIfSelected('Skill E2E tests', [
|
||||
'browse-basic', 'browse-snapshot', 'skillmd-setup-discovery',
|
||||
'skillmd-no-local-binary', 'skillmd-outside-git', 'session-awareness',
|
||||
'operational-learning',
|
||||
], () => {
|
||||
beforeAll(() => {
|
||||
testServer = startTestServer();
|
||||
@@ -177,6 +178,98 @@ Report the exact output — either "READY: <path>" or "NEEDS_SETUP".`,
|
||||
try { fs.rmSync(nonGitDir, { recursive: true, force: true }); } catch {}
|
||||
}, 60_000);
|
||||
|
||||
testConcurrentIfSelected('operational-learning', async () => {
|
||||
const opDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-oplearn-'));
|
||||
const gstackHome = path.join(opDir, '.gstack-home');
|
||||
|
||||
// Init git repo
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: opDir, stdio: 'pipe', timeout: 5000 });
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
fs.writeFileSync(path.join(opDir, 'app.ts'), 'console.log("hello");\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
// Copy bin scripts
|
||||
const binDir = path.join(opDir, 'bin');
|
||||
fs.mkdirSync(binDir, { recursive: true });
|
||||
for (const script of ['gstack-learnings-log', 'gstack-slug']) {
|
||||
fs.copyFileSync(path.join(ROOT, 'bin', script), path.join(binDir, script));
|
||||
fs.chmodSync(path.join(binDir, script), 0o755);
|
||||
}
|
||||
|
||||
// gstack-learnings-log will create the project dir automatically via gstack-slug
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `You just ran \`npm test\` in this project and it failed with this error:
|
||||
|
||||
Error: --experimental-vm-modules flag is required for ESM support in this project.
|
||||
Run: npm test --experimental-vm-modules
|
||||
|
||||
Per the Operational Self-Improvement instructions below, log an operational learning about this failure.
|
||||
|
||||
## Operational Self-Improvement
|
||||
|
||||
Before completing, reflect on this session:
|
||||
- Did any commands fail unexpectedly?
|
||||
|
||||
If yes, log an operational learning for future sessions:
|
||||
|
||||
\`\`\`bash
|
||||
GSTACK_HOME="${gstackHome}" ${binDir}/gstack-learnings-log '{"skill":"qa","type":"operational","key":"SHORT_KEY","insight":"DESCRIPTION","confidence":N,"source":"observed"}'
|
||||
\`\`\`
|
||||
|
||||
Replace SHORT_KEY with a kebab-case key like "esm-vm-modules-flag".
|
||||
Replace DESCRIPTION with a one-sentence description of what you learned.
|
||||
Replace N with a confidence score 1-10.
|
||||
|
||||
Log the operational learning now. Then say what you logged.`,
|
||||
workingDirectory: opDir,
|
||||
maxTurns: 5,
|
||||
timeout: 30_000,
|
||||
testName: 'operational-learning',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('operational learning', result);
|
||||
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
// Check if learnings file was created with an operational entry
|
||||
// The slug is derived from the git repo (dirname), so search all project dirs
|
||||
let hasOperational = false;
|
||||
const projectsDir = path.join(gstackHome, 'projects');
|
||||
if (fs.existsSync(projectsDir)) {
|
||||
for (const slug of fs.readdirSync(projectsDir)) {
|
||||
const lPath = path.join(projectsDir, slug, 'learnings.jsonl');
|
||||
if (fs.existsSync(lPath)) {
|
||||
const jsonl = fs.readFileSync(lPath, 'utf-8').trim();
|
||||
if (jsonl) {
|
||||
const entries = jsonl.split('\n').map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
|
||||
const opEntry = entries.find(e => e.type === 'operational');
|
||||
if (opEntry) {
|
||||
hasOperational = true;
|
||||
console.log(`Operational learning logged: key="${opEntry.key}" insight="${opEntry.insight}" (slug: ${slug})`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
recordE2E(evalCollector, 'operational learning', 'Skill E2E tests', result, {
|
||||
passed: exitOk && hasOperational,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(hasOperational).toBe(true);
|
||||
|
||||
// Clean up
|
||||
try { fs.rmSync(opDir, { recursive: true, force: true }); } catch {}
|
||||
}, 90_000);
|
||||
|
||||
testConcurrentIfSelected('session-awareness', async () => {
|
||||
const sessionDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-session-'));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user