/** * /plan-tune tests (gate tier) * * Covers the foundation of /plan-tune v1: * - Question registry schema validation * - Registry completeness (every AskUserQuestion pattern has an id) * - Id uniqueness (no duplicates) * - One-way door safety declarations * - Signal map references valid registry ids * * Binary-level tests (question-log, question-preference, developer-profile) * and migration tests live in sibling files created as those binaries ship. */ import { describe, test, expect } from 'bun:test'; import { QUESTIONS, getQuestion, getOneWayDoorIds, getAllRegisteredIds, getRegistryStats, type QuestionDef, } from '../scripts/question-registry'; import { classifyQuestion, isOneWayDoor, DESTRUCTIVE_PATTERN_LIST, ONE_WAY_SKILL_CATEGORY_SET, } from '../scripts/one-way-doors'; import { SIGNAL_MAP, applySignal, validateRegistrySignalKeys, newDimensionTotals, normalizeToDimensionValue, ALL_DIMENSIONS, } from '../scripts/psychographic-signals'; import { ARCHETYPES, FALLBACK_ARCHETYPE, matchArchetype, getAllArchetypeNames, } from '../scripts/archetypes'; import * as fs from 'fs'; import * as path from 'path'; const ROOT = path.resolve(import.meta.dir, '..'); // ----------------------------------------------------------------------- // Schema validation // ----------------------------------------------------------------------- describe('question-registry schema', () => { test('every entry has required fields', () => { for (const [key, q] of Object.entries(QUESTIONS as Record)) { expect(q.id).toBeDefined(); expect(q.skill).toBeDefined(); expect(q.category).toBeDefined(); expect(q.door_type).toBeDefined(); expect(q.description).toBeDefined(); expect(q.description.length).toBeGreaterThan(0); expect(q.id).toBe(key); // key and id must match } }); test('all ids are kebab-case and start with skill name', () => { for (const q of Object.values(QUESTIONS as Record)) { expect(q.id).toMatch(/^[a-z0-9-]+$/); expect(q.id.startsWith(q.skill + '-')).toBe(true); expect(q.id.length).toBeLessThanOrEqual(64); } }); test('no duplicate ids (keys and id fields are 1:1 by construction)', () => { const ids = Object.values(QUESTIONS as Record).map((q) => q.id); const unique = new Set(ids); expect(unique.size).toBe(ids.length); }); test('category is one of the allowed values', () => { const ALLOWED = new Set(['approval', 'clarification', 'routing', 'cherry-pick', 'feedback-loop']); for (const q of Object.values(QUESTIONS as Record)) { expect(ALLOWED.has(q.category)).toBe(true); } }); test('door_type is one-way or two-way', () => { for (const q of Object.values(QUESTIONS as Record)) { expect(q.door_type === 'one-way' || q.door_type === 'two-way').toBe(true); } }); test('options (if present) are non-empty arrays of strings', () => { for (const q of Object.values(QUESTIONS as Record)) { if (q.options) { expect(Array.isArray(q.options)).toBe(true); expect(q.options.length).toBeGreaterThan(0); for (const opt of q.options) { expect(typeof opt).toBe('string'); expect(opt.length).toBeGreaterThan(0); } } } }); test('descriptions are short and informative (<= 200 chars, no newlines)', () => { for (const q of Object.values(QUESTIONS as Record)) { expect(q.description.length).toBeLessThanOrEqual(200); expect(q.description.includes('\n')).toBe(false); } }); }); // ----------------------------------------------------------------------- // Runtime helpers // ----------------------------------------------------------------------- describe('question-registry helpers', () => { test('getQuestion returns entry for known id', () => { const q = getQuestion('ship-test-failure-triage'); expect(q).toBeDefined(); expect(q?.skill).toBe('ship'); expect(q?.door_type).toBe('one-way'); }); test('getQuestion returns undefined for unknown id', () => { expect(getQuestion('this-is-not-registered')).toBeUndefined(); }); test('getOneWayDoorIds returns Set of one-way ids', () => { const ids = getOneWayDoorIds(); expect(ids.has('ship-test-failure-triage')).toBe(true); expect(ids.has('review-sql-safety')).toBe(true); expect(ids.has('land-and-deploy-merge-confirm')).toBe(true); // And does NOT include a known two-way door: expect(ids.has('ship-changelog-voice-polish')).toBe(false); }); test('getAllRegisteredIds count matches QUESTIONS keys', () => { expect(getAllRegisteredIds().size).toBe(Object.keys(QUESTIONS).length); }); test('getRegistryStats totals are consistent', () => { const stats = getRegistryStats(); expect(stats.total).toBe(Object.keys(QUESTIONS).length); expect(stats.one_way + stats.two_way).toBe(stats.total); const bySkillSum = Object.values(stats.by_skill).reduce((a, b) => a + b, 0); expect(bySkillSum).toBe(stats.total); const byCategorySum = Object.values(stats.by_category).reduce((a, b) => a + b, 0); expect(byCategorySum).toBe(stats.total); }); }); // ----------------------------------------------------------------------- // Safety contract — one-way doors // ----------------------------------------------------------------------- describe('one-way door safety', () => { test('every destructive/security question is declared one-way', () => { // Safety-critical question ids must exist and be one-way. const mustBeOneWay = [ 'ship-test-failure-triage', // shipping broken tests 'review-sql-safety', // SQL injection path 'review-llm-trust-boundary', // LLM trust boundary 'cso-global-scan-approval', // scans outside branch 'cso-finding-fix', // security finding 'land-and-deploy-merge-confirm', // actual merge 'land-and-deploy-rollback', // rollback decision 'investigate-fix-apply', // applying a fix 'plan-ceo-review-premise-revise', // changing agreed premise 'plan-eng-review-arch-finding', // architecture change 'office-hours-landscape-privacy-gate',// sending data to search provider 'autoplan-user-challenge', // scope direction change ]; const oneWayIds = getOneWayDoorIds(); for (const id of mustBeOneWay) { expect(getQuestion(id)).toBeDefined(); expect(oneWayIds.has(id)).toBe(true); } }); test('at least 10 one-way doors are declared', () => { // Sanity check — if we lose one-way classification on critical questions, // this fails before safety bugs ship. expect(getOneWayDoorIds().size).toBeGreaterThanOrEqual(10); }); }); // ----------------------------------------------------------------------- // Coverage breadth — make sure we span the high-volume skills // ----------------------------------------------------------------------- describe('registry breadth', () => { test('high-volume skills have at least one registered question', () => { const stats = getRegistryStats(); const highVolume = [ 'ship', 'review', 'office-hours', 'plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review', 'qa', 'investigate', 'land-and-deploy', 'cso', ]; for (const skill of highVolume) { expect(stats.by_skill[skill] ?? 0).toBeGreaterThan(0); } }); test('preamble one-time prompts are registered (telemetry, proactive, routing)', () => { expect(getQuestion('preamble-telemetry-consent')).toBeDefined(); expect(getQuestion('preamble-proactive-behavior')).toBeDefined(); expect(getQuestion('preamble-routing-injection')).toBeDefined(); }); test('/plan-tune itself registers its enable + setup + mutation-confirm', () => { expect(getQuestion('plan-tune-enable-setup')).toBeDefined(); expect(getQuestion('plan-tune-declared-dimension')).toBeDefined(); expect(getQuestion('plan-tune-confirm-mutation')).toBeDefined(); }); }); // ----------------------------------------------------------------------- // Signal map consistency // ----------------------------------------------------------------------- describe('psychographic signal map', () => { test('signal_keys in registry are typed strings', () => { for (const q of Object.values(QUESTIONS as Record)) { if (q.signal_key !== undefined) { expect(typeof q.signal_key).toBe('string'); expect(q.signal_key.length).toBeGreaterThan(0); expect(q.signal_key).toMatch(/^[a-z0-9-]+$/); } } }); test('every signal_key in registry has a SIGNAL_MAP entry', () => { const { missing } = validateRegistrySignalKeys(); expect(missing).toEqual([]); }); test('applySignal mutates dimension totals per mapping', () => { const dims = newDimensionTotals(); const applied = applySignal(dims, 'scope-appetite', 'expand'); expect(applied.length).toBeGreaterThan(0); expect(dims.scope_appetite).toBeCloseTo(0.06, 5); }); test('applySignal returns [] for unknown signal_key', () => { const dims = newDimensionTotals(); const applied = applySignal(dims, 'no-such-signal', 'anything'); expect(applied).toEqual([]); expect(dims.scope_appetite).toBe(0); }); test('applySignal returns [] for unknown user_choice', () => { const dims = newDimensionTotals(); const applied = applySignal(dims, 'scope-appetite', 'definitely-not-a-real-choice'); expect(applied).toEqual([]); }); test('normalizeToDimensionValue maps 0 → 0.5 (neutral)', () => { expect(normalizeToDimensionValue(0)).toBeCloseTo(0.5, 5); }); test('normalizeToDimensionValue returns values in [0, 1]', () => { for (const total of [-10, -1, -0.5, 0, 0.5, 1, 10]) { const v = normalizeToDimensionValue(total); expect(v).toBeGreaterThanOrEqual(0); expect(v).toBeLessThanOrEqual(1); } }); test('ALL_DIMENSIONS has 5 entries', () => { expect(ALL_DIMENSIONS.length).toBe(5); }); test('no extra SIGNAL_MAP keys without registry reference (informational)', () => { // Extra keys are allowed (a signal might be reserved for upcoming registry // entries). But list them so drift is visible. const { extra } = validateRegistrySignalKeys(); // Allow up to 3 "reserved" extras before flagging. Tighten later. expect(extra.length).toBeLessThanOrEqual(3); }); }); // ----------------------------------------------------------------------- // Archetypes // ----------------------------------------------------------------------- describe('archetypes', () => { test('each archetype has name, description, center, tightness', () => { for (const arch of ARCHETYPES) { expect(arch.name).toBeDefined(); expect(arch.description).toBeDefined(); expect(arch.center).toBeDefined(); expect(arch.tightness).toBeGreaterThan(0); for (const d of ALL_DIMENSIONS) { expect(typeof arch.center[d]).toBe('number'); expect(arch.center[d]).toBeGreaterThanOrEqual(0); expect(arch.center[d]).toBeLessThanOrEqual(1); } } }); test('archetype names are unique', () => { const names = ARCHETYPES.map((a) => a.name); expect(new Set(names).size).toBe(names.length); }); test('matchArchetype returns Cathedral Builder for boil-the-ocean profile', () => { const dims = { scope_appetite: 0.88, risk_tolerance: 0.55, detail_preference: 0.5, autonomy: 0.5, architecture_care: 0.85, }; const match = matchArchetype(dims); expect(match.name).toBe('Cathedral Builder'); }); test('matchArchetype returns Ship-It Pragmatist for small-scope/fast profile', () => { const dims = { scope_appetite: 0.22, risk_tolerance: 0.78, detail_preference: 0.25, autonomy: 0.7, architecture_care: 0.38, }; const match = matchArchetype(dims); expect(match.name).toBe('Ship-It Pragmatist'); }); test('matchArchetype returns Polymath for extreme-outlier profile', () => { const dims = { scope_appetite: 0.05, risk_tolerance: 0.95, detail_preference: 0.95, autonomy: 0.05, architecture_care: 0.05, }; const match = matchArchetype(dims); expect(match.name).toBe(FALLBACK_ARCHETYPE.name); }); test('getAllArchetypeNames includes Polymath fallback', () => { const names = getAllArchetypeNames(); expect(names).toContain('Polymath'); expect(names.length).toBe(ARCHETYPES.length + 1); }); }); // ----------------------------------------------------------------------- // Registry completeness — warn about SKILL.md.tmpl AskUserQuestion calls // that don't appear to map to any registry entry. // // This is NOT a strict CI failure. Many AskUserQuestion invocations are // dynamic (agent generates question text at runtime), which is fine — the // agent picks the best-fitting registry id or generates an ad-hoc id. // // The test reports a count for visibility. A future enhancement will scan // for specific question_id references in template prose and require those // referenced ids to exist in the registry. // ----------------------------------------------------------------------- describe('AskUserQuestion template coverage (informational)', () => { test('count of templates using AskUserQuestion is non-trivial', () => { const templates = findAllTemplates(); const usingAsk = templates.filter((p) => fs.readFileSync(p, 'utf-8').includes('AskUserQuestion'), ); // At the time of writing, ~35 templates reference AskUserQuestion. // This sanity check catches an accidental global removal. expect(usingAsk.length).toBeGreaterThan(20); }); test('registry covers >= 10 skills from template files', () => { const stats = getRegistryStats(); expect(Object.keys(stats.by_skill).length).toBeGreaterThanOrEqual(10); }); }); // ----------------------------------------------------------------------- // One-way door classifier (belt-and-suspenders keyword fallback) // ----------------------------------------------------------------------- describe('one-way-doors classifier', () => { test('registry lookup wins when question_id is known', () => { const result = classifyQuestion({ question_id: 'ship-test-failure-triage' }); expect(result.oneWay).toBe(true); expect(result.reason).toBe('registry'); const safeResult = classifyQuestion({ question_id: 'ship-changelog-voice-polish' }); expect(safeResult.oneWay).toBe(false); expect(safeResult.reason).toBe('registry'); }); test('unknown question_id falls through to other checks', () => { const result = classifyQuestion({ question_id: 'some-ad-hoc-question-id' }); expect(result.reason).not.toBe('registry'); }); test('keyword fallback catches destructive summaries', () => { const cases = [ 'Delete this directory and all its contents?', 'Run rm -rf /tmp/scratch — proceed?', 'Force-push main?', 'git reset --hard origin/main — ok?', 'DROP TABLE users — confirm?', 'kubectl delete namespace prod', 'terraform destroy the staging cluster', 'rotate the API key', 'breaking change to the public API — ship anyway?', ]; for (const summary of cases) { const result = classifyQuestion({ summary }); expect(result.oneWay).toBe(true); expect(result.reason).toBe('keyword'); expect(result.matched).toBeDefined(); } }); test('skill-category fallback fires for cso:approval and land-and-deploy:approval', () => { expect(isOneWayDoor({ skill: 'cso', category: 'approval' })).toBe(true); expect(isOneWayDoor({ skill: 'land-and-deploy', category: 'approval' })).toBe(true); }); test('benign questions default to two-way', () => { const benign = [ 'Want to update the changelog voice?', 'Which mode should plan review use?', 'Open the essay in your browser?', ]; for (const summary of benign) { const result = classifyQuestion({ summary }); expect(result.oneWay).toBe(false); expect(result.reason).toBe('default-two-way'); } }); test('keyword patterns are non-empty', () => { expect(DESTRUCTIVE_PATTERN_LIST.length).toBeGreaterThan(15); }); test('skill-category set covers security + deploy', () => { expect(ONE_WAY_SKILL_CATEGORY_SET.has('cso:approval')).toBe(true); expect(ONE_WAY_SKILL_CATEGORY_SET.has('land-and-deploy:approval')).toBe(true); }); }); // ----------------------------------------------------------------------- // Preamble injection — the QUESTION_TUNING section must appear for tier >=2 // ----------------------------------------------------------------------- describe('preamble — QUESTION_TUNING injection', () => { test('tier 2+ skills include the Question Tuning section', async () => { const { generatePreamble } = await import('../scripts/resolvers/preamble'); const ctx = { skillName: 'test-skill', tmplPath: 'test.tmpl', host: 'claude' as const, paths: { skillRoot: '~/.claude/skills/gstack', localSkillRoot: '.claude/skills/gstack', binDir: '~/.claude/skills/gstack/bin', browseDir: '~/.claude/skills/gstack/browse/dist', designDir: '~/.claude/skills/gstack/design/dist', }, preambleTier: 2, }; const out = generatePreamble(ctx); expect(out).toContain('QUESTION_TUNING: $_QUESTION_TUNING'); expect(out).toContain('## Question Tuning'); expect(out).toContain('gstack-question-preference --check'); expect(out).toContain('gstack-question-log'); expect(out).toContain('profile-poisoning defense'); expect(out).toContain('inline-user'); }); test('tier 1 skills do NOT include Question Tuning section', async () => { const { generatePreamble } = await import('../scripts/resolvers/preamble'); const ctx = { skillName: 'test-skill', tmplPath: 'test.tmpl', host: 'claude' as const, paths: { skillRoot: '~/.claude/skills/gstack', localSkillRoot: '.claude/skills/gstack', binDir: '~/.claude/skills/gstack/bin', browseDir: '~/.claude/skills/gstack/browse/dist', designDir: '~/.claude/skills/gstack/design/dist', }, preambleTier: 1, }; const out = generatePreamble(ctx); // QUESTION_TUNING config echo still fires (it's in the bash block which all tiers get), // but the prose section should NOT be present for tier 1. expect(out).not.toContain('## Question Tuning'); }); test('codex host produces different paths', async () => { const { generateQuestionTuning } = await import('../scripts/resolvers/question-tuning'); const codexCtx = { skillName: 'test', tmplPath: 'x', host: 'codex' as const, paths: { skillRoot: '$GSTACK_ROOT', localSkillRoot: '.agents/skills/gstack', binDir: '$GSTACK_BIN', browseDir: '$GSTACK_BROWSE', designDir: '$GSTACK_DESIGN', }, }; const out = generateQuestionTuning(codexCtx); expect(out).toContain('$GSTACK_BIN/gstack-question-preference'); expect(out).toContain('$GSTACK_BIN/gstack-question-log'); }); }); // ----------------------------------------------------------------------- // End-to-end: log → preference → derive pipeline // // Exercises the real binaries (not mocks) to make sure the schema contract // between them actually holds. // ----------------------------------------------------------------------- describe('end-to-end pipeline (binaries working together)', () => { test('log many expand choices → derive pushes scope_appetite up', () => { const tmpHome = fs.mkdtempSync(path.join(require('os').tmpdir(), 'gstack-e2e-')); try { const env = { ...process.env, GSTACK_HOME: tmpHome }; const { spawnSync } = require('child_process'); const logBin = path.join(ROOT, 'bin', 'gstack-question-log'); const devBin = path.join(ROOT, 'bin', 'gstack-developer-profile'); for (let i = 0; i < 5; i++) { const r = spawnSync( logBin, [ JSON.stringify({ skill: 'plan-ceo-review', question_id: 'plan-ceo-review-mode', question_summary: 'mode?', user_choice: 'expand', session_id: `s${i}`, ts: `2026-04-0${i + 1}T10:00:00Z`, }), ], { env, cwd: ROOT, encoding: 'utf-8' }, ); expect(r.status).toBe(0); } const derive = spawnSync(devBin, ['--derive'], { env, cwd: ROOT, encoding: 'utf-8' }); expect(derive.status).toBe(0); const profileOut = spawnSync(devBin, ['--profile'], { env, cwd: ROOT, encoding: 'utf-8' }); const p = JSON.parse(profileOut.stdout); expect(p.inferred.sample_size).toBe(5); expect(p.inferred.values.scope_appetite).toBeGreaterThan(0.5); } finally { fs.rmSync(tmpHome, { recursive: true, force: true }); } }); test('preference blocks tune: write from inline-tool-output in full pipeline', () => { const tmpHome = fs.mkdtempSync(path.join(require('os').tmpdir(), 'gstack-e2e-')); try { const env = { ...process.env, GSTACK_HOME: tmpHome }; const { spawnSync } = require('child_process'); const prefBin = path.join(ROOT, 'bin', 'gstack-question-preference'); const r = spawnSync( prefBin, [ '--write', JSON.stringify({ question_id: 'fake-id', preference: 'never-ask', source: 'inline-tool-output' }), ], { env, cwd: ROOT, encoding: 'utf-8' }, ); expect(r.status).toBe(2); expect(r.stderr).toContain('poisoning'); // Verify no preference was written const read = spawnSync(prefBin, ['--read'], { env, cwd: ROOT, encoding: 'utf-8' }); const prefs = JSON.parse(read.stdout); expect(prefs['fake-id']).toBeUndefined(); } finally { fs.rmSync(tmpHome, { recursive: true, force: true }); } }); test('migration preserves sessions, builder-profile shim still works', () => { const tmpHome = fs.mkdtempSync(path.join(require('os').tmpdir(), 'gstack-e2e-')); try { const env = { ...process.env, GSTACK_HOME: tmpHome }; const { spawnSync } = require('child_process'); const devBin = path.join(ROOT, 'bin', 'gstack-developer-profile'); const shimBin = path.join(ROOT, 'bin', 'gstack-builder-profile'); // Seed a legacy file fs.writeFileSync( path.join(tmpHome, 'builder-profile.jsonl'), [ { date: '2026-01-01', mode: 'builder', project_slug: 'x', signals: ['taste'] }, { date: '2026-02-01', mode: 'startup', project_slug: 'x', signals: ['named_users'] }, { date: '2026-03-01', mode: 'builder', project_slug: 'y', signals: ['agency'] }, ] .map((e) => JSON.stringify(e)) .join('\n') + '\n', ); // Migrate const m = spawnSync(devBin, ['--migrate'], { env, cwd: ROOT, encoding: 'utf-8' }); expect(m.status).toBe(0); // Legacy shim should still return the same KEY: VALUE shape const shimOut = spawnSync(shimBin, [], { env, cwd: ROOT, encoding: 'utf-8' }); expect(shimOut.status).toBe(0); expect(shimOut.stdout).toContain('SESSION_COUNT: 3'); expect(shimOut.stdout).toContain('TIER: welcome_back'); expect(shimOut.stdout).toContain('CROSS_PROJECT: true'); } finally { fs.rmSync(tmpHome, { recursive: true, force: true }); } }); }); function findAllTemplates(): string[] { const results: string[] = []; function walk(dir: string) { let entries: fs.Dirent[]; try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; } for (const entry of entries) { const full = path.join(dir, entry.name); if (entry.isDirectory()) { // Skip node_modules and dotfiles if (entry.name === 'node_modules' || entry.name.startsWith('.')) continue; walk(full); } else if (entry.isFile() && entry.name === 'SKILL.md.tmpl') { results.push(full); } } } walk(ROOT); return results; }