From 0427c957f25721d679319ce5930062777300c6b3 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 17 Apr 2026 06:45:09 +0800 Subject: [PATCH] test: end-to-end pipeline + preamble injection coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added 6 tests to test/plan-tune.test.ts: Preamble injection (3 tests): - tier 2+ includes Question Tuning section with preference check, log, and user-origin gate language ('profile-poisoning defense', 'inline-user') - tier 1 does NOT include the prose section (QUESTION_TUNING bash echo still fires since it's in the bash block all tiers share) - codex host swaps binDir references to $GSTACK_BIN End-to-end pipeline (3 tests) — real binaries working together, not mocks: - Log 5 expand choices → --derive → profile shows scope_appetite > 0.5 (full log → registry lookup → signal map → normalization round-trip) - --write source: inline-tool-output rejected; --read confirms no pref was persisted (the profile-poisoning defense actually works end-to-end) - Migrate a 3-session legacy file; confirm legacy gstack-builder-profile shim still returns SESSION_COUNT: 3, TIER: welcome_back, CROSS_PROJECT: true test/plan-tune.test.ts now has 47 tests total. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/plan-tune.test.ts | 179 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/test/plan-tune.test.ts b/test/plan-tune.test.ts index de0c74a8..9e83a0b4 100644 --- a/test/plan-tune.test.ts +++ b/test/plan-tune.test.ts @@ -454,6 +454,185 @@ describe('one-way-doors classifier', () => { }); }); +// ----------------------------------------------------------------------- +// Preamble injection — the QUESTION_TUNING section must appear for tier >=2 +// ----------------------------------------------------------------------- + +describe('preamble — QUESTION_TUNING injection', () => { + test('tier 2+ skills include the Question Tuning section', async () => { + const { generatePreamble } = await import('../scripts/resolvers/preamble'); + const ctx = { + skillName: 'test-skill', + tmplPath: 'test.tmpl', + host: 'claude' as const, + paths: { + skillRoot: '~/.claude/skills/gstack', + localSkillRoot: '.claude/skills/gstack', + binDir: '~/.claude/skills/gstack/bin', + browseDir: '~/.claude/skills/gstack/browse/dist', + designDir: '~/.claude/skills/gstack/design/dist', + }, + preambleTier: 2, + }; + const out = generatePreamble(ctx); + expect(out).toContain('QUESTION_TUNING: $_QUESTION_TUNING'); + expect(out).toContain('## Question Tuning'); + expect(out).toContain('gstack-question-preference --check'); + expect(out).toContain('gstack-question-log'); + expect(out).toContain('profile-poisoning defense'); + expect(out).toContain('inline-user'); + }); + + test('tier 1 skills do NOT include Question Tuning section', async () => { + const { generatePreamble } = await import('../scripts/resolvers/preamble'); + const ctx = { + skillName: 'test-skill', + tmplPath: 'test.tmpl', + host: 'claude' as const, + paths: { + skillRoot: '~/.claude/skills/gstack', + localSkillRoot: '.claude/skills/gstack', + binDir: '~/.claude/skills/gstack/bin', + browseDir: '~/.claude/skills/gstack/browse/dist', + designDir: '~/.claude/skills/gstack/design/dist', + }, + preambleTier: 1, + }; + const out = generatePreamble(ctx); + // QUESTION_TUNING config echo still fires (it's in the bash block which all tiers get), + // but the prose section should NOT be present for tier 1. + expect(out).not.toContain('## Question Tuning'); + }); + + test('codex host produces different paths', async () => { + const { generateQuestionTuning } = await import('../scripts/resolvers/question-tuning'); + const codexCtx = { + skillName: 'test', + tmplPath: 'x', + host: 'codex' as const, + paths: { + skillRoot: '$GSTACK_ROOT', + localSkillRoot: '.agents/skills/gstack', + binDir: '$GSTACK_BIN', + browseDir: '$GSTACK_BROWSE', + designDir: '$GSTACK_DESIGN', + }, + }; + const out = generateQuestionTuning(codexCtx); + expect(out).toContain('$GSTACK_BIN/gstack-question-preference'); + expect(out).toContain('$GSTACK_BIN/gstack-question-log'); + }); +}); + +// ----------------------------------------------------------------------- +// End-to-end: log → preference → derive pipeline +// +// Exercises the real binaries (not mocks) to make sure the schema contract +// between them actually holds. +// ----------------------------------------------------------------------- + +describe('end-to-end pipeline (binaries working together)', () => { + test('log many expand choices → derive pushes scope_appetite up', () => { + const tmpHome = fs.mkdtempSync(path.join(require('os').tmpdir(), 'gstack-e2e-')); + try { + const env = { ...process.env, GSTACK_HOME: tmpHome }; + const { spawnSync } = require('child_process'); + const logBin = path.join(ROOT, 'bin', 'gstack-question-log'); + const devBin = path.join(ROOT, 'bin', 'gstack-developer-profile'); + + for (let i = 0; i < 5; i++) { + const r = spawnSync( + logBin, + [ + JSON.stringify({ + skill: 'plan-ceo-review', + question_id: 'plan-ceo-review-mode', + question_summary: 'mode?', + user_choice: 'expand', + session_id: `s${i}`, + ts: `2026-04-0${i + 1}T10:00:00Z`, + }), + ], + { env, cwd: ROOT, encoding: 'utf-8' }, + ); + expect(r.status).toBe(0); + } + + const derive = spawnSync(devBin, ['--derive'], { env, cwd: ROOT, encoding: 'utf-8' }); + expect(derive.status).toBe(0); + + const profileOut = spawnSync(devBin, ['--profile'], { env, cwd: ROOT, encoding: 'utf-8' }); + const p = JSON.parse(profileOut.stdout); + expect(p.inferred.sample_size).toBe(5); + expect(p.inferred.values.scope_appetite).toBeGreaterThan(0.5); + } finally { + fs.rmSync(tmpHome, { recursive: true, force: true }); + } + }); + + test('preference blocks tune: write from inline-tool-output in full pipeline', () => { + const tmpHome = fs.mkdtempSync(path.join(require('os').tmpdir(), 'gstack-e2e-')); + try { + const env = { ...process.env, GSTACK_HOME: tmpHome }; + const { spawnSync } = require('child_process'); + const prefBin = path.join(ROOT, 'bin', 'gstack-question-preference'); + + const r = spawnSync( + prefBin, + [ + '--write', + JSON.stringify({ question_id: 'fake-id', preference: 'never-ask', source: 'inline-tool-output' }), + ], + { env, cwd: ROOT, encoding: 'utf-8' }, + ); + expect(r.status).toBe(2); + expect(r.stderr).toContain('poisoning'); + + // Verify no preference was written + const read = spawnSync(prefBin, ['--read'], { env, cwd: ROOT, encoding: 'utf-8' }); + const prefs = JSON.parse(read.stdout); + expect(prefs['fake-id']).toBeUndefined(); + } finally { + fs.rmSync(tmpHome, { recursive: true, force: true }); + } + }); + + test('migration preserves sessions, builder-profile shim still works', () => { + const tmpHome = fs.mkdtempSync(path.join(require('os').tmpdir(), 'gstack-e2e-')); + try { + const env = { ...process.env, GSTACK_HOME: tmpHome }; + const { spawnSync } = require('child_process'); + const devBin = path.join(ROOT, 'bin', 'gstack-developer-profile'); + const shimBin = path.join(ROOT, 'bin', 'gstack-builder-profile'); + + // Seed a legacy file + fs.writeFileSync( + path.join(tmpHome, 'builder-profile.jsonl'), + [ + { date: '2026-01-01', mode: 'builder', project_slug: 'x', signals: ['taste'] }, + { date: '2026-02-01', mode: 'startup', project_slug: 'x', signals: ['named_users'] }, + { date: '2026-03-01', mode: 'builder', project_slug: 'y', signals: ['agency'] }, + ] + .map((e) => JSON.stringify(e)) + .join('\n') + '\n', + ); + + // Migrate + const m = spawnSync(devBin, ['--migrate'], { env, cwd: ROOT, encoding: 'utf-8' }); + expect(m.status).toBe(0); + + // Legacy shim should still return the same KEY: VALUE shape + const shimOut = spawnSync(shimBin, [], { env, cwd: ROOT, encoding: 'utf-8' }); + expect(shimOut.status).toBe(0); + expect(shimOut.stdout).toContain('SESSION_COUNT: 3'); + expect(shimOut.stdout).toContain('TIER: welcome_back'); + expect(shimOut.stdout).toContain('CROSS_PROJECT: true'); + } finally { + fs.rmSync(tmpHome, { recursive: true, force: true }); + } + }); +}); + function findAllTemplates(): string[] { const results: string[] = []; function walk(dir: string) {