test: end-to-end pipeline + preamble injection coverage

Added 6 tests to test/plan-tune.test.ts:

Preamble injection (3 tests):
- tier 2+ includes Question Tuning section with preference check, log,
  and user-origin gate language ('profile-poisoning defense', 'inline-user')
- tier 1 does NOT include the prose section (QUESTION_TUNING bash echo
  still fires since it's in the bash block all tiers share)
- codex host swaps binDir references to $GSTACK_BIN

End-to-end pipeline (3 tests) — real binaries working together, not mocks:
- Log 5 expand choices → --derive → profile shows scope_appetite > 0.5
  (full log → registry lookup → signal map → normalization round-trip)
- --write source: inline-tool-output rejected; --read confirms no pref
  was persisted (the profile-poisoning defense actually works end-to-end)
- Migrate a 3-session legacy file; confirm legacy gstack-builder-profile
  shim still returns SESSION_COUNT: 3, TIER: welcome_back, CROSS_PROJECT: true

test/plan-tune.test.ts now has 47 tests total.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-17 06:45:09 +08:00
parent b7c6150b6f
commit 0427c957f2
+179
View File
@@ -454,6 +454,185 @@ describe('one-way-doors classifier', () => {
});
});
// -----------------------------------------------------------------------
// Preamble injection — the QUESTION_TUNING section must appear for tier >=2
// -----------------------------------------------------------------------
describe('preamble — QUESTION_TUNING injection', () => {
test('tier 2+ skills include the Question Tuning section', async () => {
const { generatePreamble } = await import('../scripts/resolvers/preamble');
const ctx = {
skillName: 'test-skill',
tmplPath: 'test.tmpl',
host: 'claude' as const,
paths: {
skillRoot: '~/.claude/skills/gstack',
localSkillRoot: '.claude/skills/gstack',
binDir: '~/.claude/skills/gstack/bin',
browseDir: '~/.claude/skills/gstack/browse/dist',
designDir: '~/.claude/skills/gstack/design/dist',
},
preambleTier: 2,
};
const out = generatePreamble(ctx);
expect(out).toContain('QUESTION_TUNING: $_QUESTION_TUNING');
expect(out).toContain('## Question Tuning');
expect(out).toContain('gstack-question-preference --check');
expect(out).toContain('gstack-question-log');
expect(out).toContain('profile-poisoning defense');
expect(out).toContain('inline-user');
});
test('tier 1 skills do NOT include Question Tuning section', async () => {
const { generatePreamble } = await import('../scripts/resolvers/preamble');
const ctx = {
skillName: 'test-skill',
tmplPath: 'test.tmpl',
host: 'claude' as const,
paths: {
skillRoot: '~/.claude/skills/gstack',
localSkillRoot: '.claude/skills/gstack',
binDir: '~/.claude/skills/gstack/bin',
browseDir: '~/.claude/skills/gstack/browse/dist',
designDir: '~/.claude/skills/gstack/design/dist',
},
preambleTier: 1,
};
const out = generatePreamble(ctx);
// QUESTION_TUNING config echo still fires (it's in the bash block which all tiers get),
// but the prose section should NOT be present for tier 1.
expect(out).not.toContain('## Question Tuning');
});
test('codex host produces different paths', async () => {
const { generateQuestionTuning } = await import('../scripts/resolvers/question-tuning');
const codexCtx = {
skillName: 'test',
tmplPath: 'x',
host: 'codex' as const,
paths: {
skillRoot: '$GSTACK_ROOT',
localSkillRoot: '.agents/skills/gstack',
binDir: '$GSTACK_BIN',
browseDir: '$GSTACK_BROWSE',
designDir: '$GSTACK_DESIGN',
},
};
const out = generateQuestionTuning(codexCtx);
expect(out).toContain('$GSTACK_BIN/gstack-question-preference');
expect(out).toContain('$GSTACK_BIN/gstack-question-log');
});
});
// -----------------------------------------------------------------------
// End-to-end: log → preference → derive pipeline
//
// Exercises the real binaries (not mocks) to make sure the schema contract
// between them actually holds.
// -----------------------------------------------------------------------
describe('end-to-end pipeline (binaries working together)', () => {
test('log many expand choices → derive pushes scope_appetite up', () => {
const tmpHome = fs.mkdtempSync(path.join(require('os').tmpdir(), 'gstack-e2e-'));
try {
const env = { ...process.env, GSTACK_HOME: tmpHome };
const { spawnSync } = require('child_process');
const logBin = path.join(ROOT, 'bin', 'gstack-question-log');
const devBin = path.join(ROOT, 'bin', 'gstack-developer-profile');
for (let i = 0; i < 5; i++) {
const r = spawnSync(
logBin,
[
JSON.stringify({
skill: 'plan-ceo-review',
question_id: 'plan-ceo-review-mode',
question_summary: 'mode?',
user_choice: 'expand',
session_id: `s${i}`,
ts: `2026-04-0${i + 1}T10:00:00Z`,
}),
],
{ env, cwd: ROOT, encoding: 'utf-8' },
);
expect(r.status).toBe(0);
}
const derive = spawnSync(devBin, ['--derive'], { env, cwd: ROOT, encoding: 'utf-8' });
expect(derive.status).toBe(0);
const profileOut = spawnSync(devBin, ['--profile'], { env, cwd: ROOT, encoding: 'utf-8' });
const p = JSON.parse(profileOut.stdout);
expect(p.inferred.sample_size).toBe(5);
expect(p.inferred.values.scope_appetite).toBeGreaterThan(0.5);
} finally {
fs.rmSync(tmpHome, { recursive: true, force: true });
}
});
test('preference blocks tune: write from inline-tool-output in full pipeline', () => {
const tmpHome = fs.mkdtempSync(path.join(require('os').tmpdir(), 'gstack-e2e-'));
try {
const env = { ...process.env, GSTACK_HOME: tmpHome };
const { spawnSync } = require('child_process');
const prefBin = path.join(ROOT, 'bin', 'gstack-question-preference');
const r = spawnSync(
prefBin,
[
'--write',
JSON.stringify({ question_id: 'fake-id', preference: 'never-ask', source: 'inline-tool-output' }),
],
{ env, cwd: ROOT, encoding: 'utf-8' },
);
expect(r.status).toBe(2);
expect(r.stderr).toContain('poisoning');
// Verify no preference was written
const read = spawnSync(prefBin, ['--read'], { env, cwd: ROOT, encoding: 'utf-8' });
const prefs = JSON.parse(read.stdout);
expect(prefs['fake-id']).toBeUndefined();
} finally {
fs.rmSync(tmpHome, { recursive: true, force: true });
}
});
test('migration preserves sessions, builder-profile shim still works', () => {
const tmpHome = fs.mkdtempSync(path.join(require('os').tmpdir(), 'gstack-e2e-'));
try {
const env = { ...process.env, GSTACK_HOME: tmpHome };
const { spawnSync } = require('child_process');
const devBin = path.join(ROOT, 'bin', 'gstack-developer-profile');
const shimBin = path.join(ROOT, 'bin', 'gstack-builder-profile');
// Seed a legacy file
fs.writeFileSync(
path.join(tmpHome, 'builder-profile.jsonl'),
[
{ date: '2026-01-01', mode: 'builder', project_slug: 'x', signals: ['taste'] },
{ date: '2026-02-01', mode: 'startup', project_slug: 'x', signals: ['named_users'] },
{ date: '2026-03-01', mode: 'builder', project_slug: 'y', signals: ['agency'] },
]
.map((e) => JSON.stringify(e))
.join('\n') + '\n',
);
// Migrate
const m = spawnSync(devBin, ['--migrate'], { env, cwd: ROOT, encoding: 'utf-8' });
expect(m.status).toBe(0);
// Legacy shim should still return the same KEY: VALUE shape
const shimOut = spawnSync(shimBin, [], { env, cwd: ROOT, encoding: 'utf-8' });
expect(shimOut.status).toBe(0);
expect(shimOut.stdout).toContain('SESSION_COUNT: 3');
expect(shimOut.stdout).toContain('TIER: welcome_back');
expect(shimOut.stdout).toContain('CROSS_PROJECT: true');
} finally {
fs.rmSync(tmpHome, { recursive: true, force: true });
}
});
});
function findAllTemplates(): string[] {
const results: string[] = [];
function walk(dir: string) {