mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-17 23:30:09 +02:00
Merge origin/main into garrytan/upgrade-gstack-gbrain-v1
Catch up to main (1.52.0.0, plan-tune cathedral + browse memory work). Branch bumps to 1.52.1.0 — PATCH above main. Conflict resolutions: - VERSION / package.json → 1.52.1.0 (monotonic above main's 1.52.0.0) - CHANGELOG.md → reconstructed reverse-chronological: this branch's brain-aware-planning + save-results entry renumbered 1.51.1.0 → 1.52.1.0 on top, then main's 1.52.0.0 / 1.51.0.0 / 1.49.0.0 entries, then shared history. No entries dropped or orphaned. - setup → kept both endgame blocks (my gbrain detection + main's plan-tune cathedral hook install); they're independent. - SKILL.md files → regenerated from merged templates via bun run gen:skill-docs (canonical no-gbrain), not accepted from either merge side, per CLAUDE.md. Idempotent (0 STALE on re-run). - bin/gstack-config → both sides' additions present (main's GSTACK_STATE_ROOT support + this branch's gbrain-refresh subcommand). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,129 @@
|
||||
/**
|
||||
* Declared annotation helper (plan-tune cathedral T7) — unit tests.
|
||||
*
|
||||
* Verifies the helper's contract:
|
||||
* - Returns null for unknown signal_key.
|
||||
* - Returns null when the profile doesn't exist or declared is unset.
|
||||
* - Returns a phrase when declared >= 0.7 (strong high band).
|
||||
* - Returns a phrase when declared <= 0.3 (strong low band).
|
||||
* - Returns null when declared is in the middle band (0.3 < x < 0.7).
|
||||
* - primaryDimensionFor picks the dimension with largest |delta| total.
|
||||
* - Maps kebab signal_key to underscore Dimension correctly (D2 fix).
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
import { getDeclaredAnnotation, primaryDimensionFor } from '../scripts/declared-annotation';
|
||||
|
||||
let prevStateRoot: string | undefined;
|
||||
let prevHome: string | undefined;
|
||||
let stateRoot: string;
|
||||
|
||||
beforeEach(() => {
|
||||
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-annot-'));
|
||||
prevStateRoot = process.env.GSTACK_STATE_ROOT;
|
||||
prevHome = process.env.GSTACK_HOME;
|
||||
process.env.GSTACK_STATE_ROOT = stateRoot;
|
||||
delete process.env.GSTACK_HOME;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (prevStateRoot !== undefined) process.env.GSTACK_STATE_ROOT = prevStateRoot;
|
||||
else delete process.env.GSTACK_STATE_ROOT;
|
||||
if (prevHome !== undefined) process.env.GSTACK_HOME = prevHome;
|
||||
fs.rmSync(stateRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function writeProfile(declared: Record<string, number>): void {
|
||||
const p = path.join(stateRoot, 'developer-profile.json');
|
||||
fs.writeFileSync(p, JSON.stringify({ declared }, null, 2));
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// primaryDimensionFor — kebab→underscore mapping
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('primaryDimensionFor', () => {
|
||||
test('scope-appetite → scope_appetite (largest |delta| total)', () => {
|
||||
expect(primaryDimensionFor('scope-appetite')).toBe('scope_appetite');
|
||||
});
|
||||
|
||||
test('architecture-care → architecture_care (top dim by |delta|)', () => {
|
||||
expect(primaryDimensionFor('architecture-care')).toBe('architecture_care');
|
||||
});
|
||||
|
||||
test('unknown signal_key → null', () => {
|
||||
expect(primaryDimensionFor('totally-not-a-key')).toBe(null);
|
||||
});
|
||||
|
||||
test('empty/garbage input → null', () => {
|
||||
expect(primaryDimensionFor('')).toBe(null);
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// getDeclaredAnnotation
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('getDeclaredAnnotation', () => {
|
||||
test('returns null when no profile exists', () => {
|
||||
expect(getDeclaredAnnotation('scope-appetite')).toBe(null);
|
||||
});
|
||||
|
||||
test('returns null when declared unset for the dimension', () => {
|
||||
writeProfile({});
|
||||
expect(getDeclaredAnnotation('scope-appetite')).toBe(null);
|
||||
});
|
||||
|
||||
test('returns null when declared is in middle band (0.5)', () => {
|
||||
writeProfile({ scope_appetite: 0.5 });
|
||||
expect(getDeclaredAnnotation('scope-appetite')).toBe(null);
|
||||
});
|
||||
|
||||
test('returns high-band phrase when declared >= 0.7', () => {
|
||||
writeProfile({ scope_appetite: 0.85 });
|
||||
const annot = getDeclaredAnnotation('scope-appetite');
|
||||
expect(annot).toBeTruthy();
|
||||
expect(annot).toContain('boil the ocean');
|
||||
});
|
||||
|
||||
test('returns high-band phrase at the exact 0.7 threshold', () => {
|
||||
writeProfile({ scope_appetite: 0.7 });
|
||||
expect(getDeclaredAnnotation('scope-appetite')).toContain('boil the ocean');
|
||||
});
|
||||
|
||||
test('returns low-band phrase when declared <= 0.3', () => {
|
||||
writeProfile({ scope_appetite: 0.2 });
|
||||
const annot = getDeclaredAnnotation('scope-appetite');
|
||||
expect(annot).toBeTruthy();
|
||||
expect(annot).toContain('ship-small-fast');
|
||||
});
|
||||
|
||||
test('returns low-band phrase at the exact 0.3 threshold', () => {
|
||||
writeProfile({ scope_appetite: 0.3 });
|
||||
expect(getDeclaredAnnotation('scope-appetite')).toContain('ship-small-fast');
|
||||
});
|
||||
|
||||
test('returns null for unknown signal_key even when profile populated', () => {
|
||||
writeProfile({ scope_appetite: 0.85 });
|
||||
expect(getDeclaredAnnotation('totally-not-a-key')).toBe(null);
|
||||
});
|
||||
|
||||
test('all 5 dimensions render distinct high-band phrases', () => {
|
||||
// Use the 5 signal_keys known to map to each of the 5 dimensions.
|
||||
writeProfile({
|
||||
scope_appetite: 0.9,
|
||||
risk_tolerance: 0.9,
|
||||
detail_preference: 0.9,
|
||||
autonomy: 0.9,
|
||||
architecture_care: 0.9,
|
||||
});
|
||||
const scope = getDeclaredAnnotation('scope-appetite');
|
||||
const arch = getDeclaredAnnotation('architecture-care');
|
||||
expect(scope).toContain('boil the ocean');
|
||||
expect(arch).toContain('design-right');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,300 @@
|
||||
/**
|
||||
* gstack-distill-apply — Layer 8 proposal application (plan-tune cathedral T11).
|
||||
*
|
||||
* Verifies the three apply paths:
|
||||
* - memory-nugget → appended to ~/.gstack/free-text-memory.json (local
|
||||
* source-of-truth; gbrain is mirror when configured).
|
||||
* - preference → routed through gstack-question-preference with
|
||||
* source=plan-tune (user-origin gate cleared).
|
||||
* - declared-nudge → atomic update to developer-profile.json declared dim,
|
||||
* small=0.05, medium=0.10, large=0.15, clamped to [0,1].
|
||||
* Plus:
|
||||
* - --list shows proposals with kind, confidence, rationale, quotes.
|
||||
* - Applied proposals get applied_at + gbrain_published flag.
|
||||
* - Bad --proposal index errors with non-zero exit.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BIN = path.join(ROOT, 'bin', 'gstack-distill-apply');
|
||||
|
||||
let stateRoot: string;
|
||||
let fixtureCwd: string;
|
||||
let cwdSlug: string;
|
||||
let proposalFile: string;
|
||||
|
||||
beforeEach(() => {
|
||||
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-apply-'));
|
||||
cwdSlug = 'apply-fixture';
|
||||
fixtureCwd = path.join(stateRoot, cwdSlug);
|
||||
fs.mkdirSync(fixtureCwd, { recursive: true });
|
||||
fs.mkdirSync(path.join(stateRoot, 'projects', cwdSlug), { recursive: true });
|
||||
proposalFile = path.join(stateRoot, 'projects', cwdSlug, 'distillation-proposals.json');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(stateRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function writeProposals(proposals: Array<Record<string, unknown>>): void {
|
||||
fs.writeFileSync(
|
||||
proposalFile,
|
||||
JSON.stringify(
|
||||
{ generated_at: new Date().toISOString(), source_event_count: 1, proposals },
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
function run(args: string[]): { stdout: string; stderr: string; status: number } {
|
||||
const env: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v !== undefined) env[k] = v;
|
||||
}
|
||||
env.GSTACK_STATE_ROOT = stateRoot;
|
||||
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
|
||||
delete env.GSTACK_HOME;
|
||||
const res = spawnSync(BIN, args, { env, encoding: 'utf-8', cwd: fixtureCwd });
|
||||
return {
|
||||
stdout: res.stdout ?? '',
|
||||
stderr: res.stderr ?? '',
|
||||
status: res.status ?? -1,
|
||||
};
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// --list
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('--list', () => {
|
||||
test('handles missing proposals file', () => {
|
||||
const r = run(['--list']);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).toMatch(/NO_PROPOSALS/);
|
||||
});
|
||||
|
||||
test('renders all 3 kinds + source quotes', () => {
|
||||
writeProposals([
|
||||
{
|
||||
kind: 'preference',
|
||||
confidence: 0.9,
|
||||
question_id: 'ship-changelog-voice-polish',
|
||||
preference: 'never-ask',
|
||||
rationale: 'user repeatedly skipped this',
|
||||
source_quotes: ['skip the polish for typo PRs'],
|
||||
},
|
||||
{
|
||||
kind: 'declared-nudge',
|
||||
confidence: 0.85,
|
||||
dimension: 'scope_appetite',
|
||||
direction: 'up',
|
||||
magnitude: 'medium',
|
||||
},
|
||||
{
|
||||
kind: 'memory-nugget',
|
||||
confidence: 0.95,
|
||||
nugget: 'User prefers complete edge cases',
|
||||
applies_to_signal_keys: ['scope-appetite'],
|
||||
},
|
||||
]);
|
||||
const r = run(['--list']);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).toContain('preference');
|
||||
expect(r.stdout).toContain('declared-nudge');
|
||||
expect(r.stdout).toContain('memory-nugget');
|
||||
expect(r.stdout).toContain('skip the polish for typo PRs');
|
||||
expect(r.stdout).toContain('scope-appetite');
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// memory-nugget application
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('memory-nugget apply', () => {
|
||||
test('appends to ~/.gstack/free-text-memory.json with full metadata', () => {
|
||||
writeProposals([
|
||||
{
|
||||
kind: 'memory-nugget',
|
||||
confidence: 0.9,
|
||||
nugget: 'User prefers verbose explanations with tradeoffs',
|
||||
applies_to_signal_keys: ['detail-preference'],
|
||||
source_quotes: ['always explain the tradeoffs'],
|
||||
},
|
||||
]);
|
||||
const r = run(['--proposal', '0', '--gbrain-published', 'true']);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).toContain('APPLIED: memory-nugget');
|
||||
|
||||
const memPath = path.join(stateRoot, 'free-text-memory.json');
|
||||
const mem = JSON.parse(fs.readFileSync(memPath, 'utf-8'));
|
||||
expect(mem.nuggets.length).toBe(1);
|
||||
expect(mem.nuggets[0].nugget).toContain('verbose explanations');
|
||||
expect(mem.nuggets[0].applies_to_signal_keys).toEqual(['detail-preference']);
|
||||
expect(mem.nuggets[0].gbrain_published).toBe(true);
|
||||
expect(mem.nuggets[0].source_quotes).toEqual(['always explain the tradeoffs']);
|
||||
});
|
||||
|
||||
test('appends without clobbering existing nuggets', () => {
|
||||
fs.writeFileSync(
|
||||
path.join(stateRoot, 'free-text-memory.json'),
|
||||
JSON.stringify({ nuggets: [{ nugget: 'pre-existing', applies_to_signal_keys: [] }] }),
|
||||
);
|
||||
writeProposals([
|
||||
{
|
||||
kind: 'memory-nugget',
|
||||
confidence: 0.9,
|
||||
nugget: 'new nugget',
|
||||
applies_to_signal_keys: [],
|
||||
},
|
||||
]);
|
||||
run(['--proposal', '0']);
|
||||
const mem = JSON.parse(
|
||||
fs.readFileSync(path.join(stateRoot, 'free-text-memory.json'), 'utf-8'),
|
||||
);
|
||||
expect(mem.nuggets.length).toBe(2);
|
||||
expect(mem.nuggets[0].nugget).toBe('pre-existing');
|
||||
expect(mem.nuggets[1].nugget).toBe('new nugget');
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// preference application
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('preference apply', () => {
|
||||
test('routes through gstack-question-preference with source=plan-tune', () => {
|
||||
writeProposals([
|
||||
{
|
||||
kind: 'preference',
|
||||
confidence: 0.9,
|
||||
question_id: 'ship-changelog-voice-polish',
|
||||
preference: 'never-ask',
|
||||
source_quotes: ['skip the polish for typo PRs'],
|
||||
},
|
||||
]);
|
||||
const r = run(['--proposal', '0']);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).toContain('APPLIED: preference');
|
||||
|
||||
const prefPath = path.join(stateRoot, 'projects', cwdSlug, 'question-preferences.json');
|
||||
const prefs = JSON.parse(fs.readFileSync(prefPath, 'utf-8'));
|
||||
expect(prefs['ship-changelog-voice-polish']).toBe('never-ask');
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// declared-nudge application
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('declared-nudge apply', () => {
|
||||
test('medium up nudge on unset dim → 0.5 + 0.10 = 0.6', () => {
|
||||
writeProposals([
|
||||
{
|
||||
kind: 'declared-nudge',
|
||||
confidence: 0.9,
|
||||
dimension: 'scope_appetite',
|
||||
direction: 'up',
|
||||
magnitude: 'medium',
|
||||
},
|
||||
]);
|
||||
run(['--proposal', '0']);
|
||||
const profile = JSON.parse(
|
||||
fs.readFileSync(path.join(stateRoot, 'developer-profile.json'), 'utf-8'),
|
||||
);
|
||||
expect(profile.declared.scope_appetite).toBe(0.6);
|
||||
});
|
||||
|
||||
test('small down nudge on existing value', () => {
|
||||
fs.writeFileSync(
|
||||
path.join(stateRoot, 'developer-profile.json'),
|
||||
JSON.stringify({ declared: { scope_appetite: 0.8 } }),
|
||||
);
|
||||
writeProposals([
|
||||
{
|
||||
kind: 'declared-nudge',
|
||||
confidence: 0.9,
|
||||
dimension: 'scope_appetite',
|
||||
direction: 'down',
|
||||
magnitude: 'small',
|
||||
},
|
||||
]);
|
||||
run(['--proposal', '0']);
|
||||
const profile = JSON.parse(
|
||||
fs.readFileSync(path.join(stateRoot, 'developer-profile.json'), 'utf-8'),
|
||||
);
|
||||
expect(profile.declared.scope_appetite).toBe(0.75);
|
||||
});
|
||||
|
||||
test('clamps to [0, 1]', () => {
|
||||
fs.writeFileSync(
|
||||
path.join(stateRoot, 'developer-profile.json'),
|
||||
JSON.stringify({ declared: { scope_appetite: 0.95 } }),
|
||||
);
|
||||
writeProposals([
|
||||
{
|
||||
kind: 'declared-nudge',
|
||||
confidence: 0.9,
|
||||
dimension: 'scope_appetite',
|
||||
direction: 'up',
|
||||
magnitude: 'large',
|
||||
},
|
||||
]);
|
||||
run(['--proposal', '0']);
|
||||
const profile = JSON.parse(
|
||||
fs.readFileSync(path.join(stateRoot, 'developer-profile.json'), 'utf-8'),
|
||||
);
|
||||
expect(profile.declared.scope_appetite).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Proposal marked applied
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('proposal marked applied', () => {
|
||||
test('applied_at + gbrain_published written back to proposals.json', () => {
|
||||
writeProposals([
|
||||
{
|
||||
kind: 'memory-nugget',
|
||||
confidence: 0.9,
|
||||
nugget: 'something',
|
||||
applies_to_signal_keys: [],
|
||||
},
|
||||
]);
|
||||
run(['--proposal', '0', '--gbrain-published', 'true']);
|
||||
const p = JSON.parse(fs.readFileSync(proposalFile, 'utf-8'));
|
||||
expect(p.proposals[0].applied_at).toBeTruthy();
|
||||
expect(p.proposals[0].gbrain_published).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Error paths
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('error paths', () => {
|
||||
test('bad --proposal index exits non-zero', () => {
|
||||
writeProposals([
|
||||
{ kind: 'memory-nugget', confidence: 0.9, nugget: 'x', applies_to_signal_keys: [] },
|
||||
]);
|
||||
const r = run(['--proposal', '99']);
|
||||
expect(r.status).not.toBe(0);
|
||||
expect(r.stderr).toContain('invalid --proposal');
|
||||
});
|
||||
|
||||
test('missing --proposal exits non-zero', () => {
|
||||
writeProposals([
|
||||
{ kind: 'memory-nugget', confidence: 0.9, nugget: 'x', applies_to_signal_keys: [] },
|
||||
]);
|
||||
const r = run([]);
|
||||
expect(r.status).not.toBe(0);
|
||||
expect(r.stderr).toContain('--proposal');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,205 @@
|
||||
/**
|
||||
* gstack-distill-free-text — Layer 8 dream cycle (plan-tune cathedral T10).
|
||||
*
|
||||
* Covers the SDK-free paths: status, dry-run, rate cap, no-event handling.
|
||||
* The real API call path is exercised by the E2E test in T16; here we
|
||||
* verify the bin's deterministic plumbing without burning tokens.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BIN = path.join(ROOT, 'bin', 'gstack-distill-free-text');
|
||||
const QLOG_BIN = path.join(ROOT, 'bin', 'gstack-question-log');
|
||||
|
||||
let stateRoot: string;
|
||||
let fixtureCwd: string;
|
||||
let cwdSlug: string;
|
||||
|
||||
beforeEach(() => {
|
||||
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-dist-'));
|
||||
cwdSlug = 'distill-fixture';
|
||||
fixtureCwd = path.join(stateRoot, cwdSlug);
|
||||
fs.mkdirSync(fixtureCwd, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(stateRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function makeEnv(extra: Record<string, string> = {}): Record<string, string> {
|
||||
const env: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v !== undefined) env[k] = v;
|
||||
}
|
||||
env.GSTACK_STATE_ROOT = stateRoot;
|
||||
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
|
||||
delete env.GSTACK_HOME;
|
||||
return { ...env, ...extra };
|
||||
}
|
||||
|
||||
function run(args: string[]): { stdout: string; stderr: string; status: number } {
|
||||
const res = spawnSync(BIN, args, {
|
||||
env: makeEnv(),
|
||||
encoding: 'utf-8',
|
||||
cwd: fixtureCwd,
|
||||
});
|
||||
return {
|
||||
stdout: res.stdout ?? '',
|
||||
stderr: res.stderr ?? '',
|
||||
status: res.status ?? -1,
|
||||
};
|
||||
}
|
||||
|
||||
function writeAuqOtherEvent(text: string): void {
|
||||
spawnSync(
|
||||
QLOG_BIN,
|
||||
[
|
||||
JSON.stringify({
|
||||
skill: 'plan-tune',
|
||||
question_id: 'hook-distill00',
|
||||
question_summary: 'Test question for distillation',
|
||||
options_count: 2,
|
||||
user_choice: 'Other',
|
||||
source: 'auq-other',
|
||||
free_text: text,
|
||||
session_id: 's-distill',
|
||||
tool_use_id: 'tu-distill-' + Math.random().toString(36).slice(2, 8),
|
||||
}),
|
||||
],
|
||||
{
|
||||
env: makeEnv(),
|
||||
cwd: fixtureCwd,
|
||||
encoding: 'utf-8',
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
function writeCostLogEntry(slug: string, dateIso: string): void {
|
||||
fs.mkdirSync(stateRoot, { recursive: true });
|
||||
fs.appendFileSync(
|
||||
path.join(stateRoot, 'distill-cost.jsonl'),
|
||||
JSON.stringify({ ts: dateIso, slug, proposals_count: 0, cost_usd_est: 0 }) + '\n',
|
||||
);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Status subcommand
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('--status', () => {
|
||||
test('reports "no runs yet" when cost log absent', () => {
|
||||
const r = run(['--status']);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).toMatch(/no distill runs/);
|
||||
});
|
||||
|
||||
test('reports counts when prior runs exist', () => {
|
||||
writeCostLogEntry(cwdSlug, new Date().toISOString());
|
||||
writeCostLogEntry(cwdSlug, new Date().toISOString());
|
||||
const r = run(['--status']);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).toContain('RUNS: 2');
|
||||
expect(r.stdout).toMatch(/TODAY: 2 run\(s\)/);
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// No rate cap (v1.52.0.0 cap audit) — the natural rate of free-text events
|
||||
// is rare enough that count-based capping was theatrical. Cost log alone
|
||||
// provides auditability via --status.
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('no rate cap (audit removed)', () => {
|
||||
test('never exits with RATE_CAPPED, even with many runs today', () => {
|
||||
const today = new Date().toISOString();
|
||||
for (let i = 0; i < 10; i++) writeCostLogEntry(cwdSlug, today);
|
||||
const r = run([]);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).not.toMatch(/RATE_CAPPED/);
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// No events / no log
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('no-event paths', () => {
|
||||
test('exits NO_LOG when question-log.jsonl missing', () => {
|
||||
const r = run([]);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).toMatch(/NO_LOG/);
|
||||
});
|
||||
|
||||
test('exits NO_FREE_TEXT when log has events but none are auq-other', () => {
|
||||
spawnSync(
|
||||
QLOG_BIN,
|
||||
[
|
||||
JSON.stringify({
|
||||
skill: 'plan-tune',
|
||||
question_id: 'hook-other00',
|
||||
question_summary: 'Q',
|
||||
options_count: 2,
|
||||
user_choice: 'A',
|
||||
source: 'hook',
|
||||
session_id: 's',
|
||||
tool_use_id: 'tu-x',
|
||||
}),
|
||||
],
|
||||
{ env: makeEnv(), cwd: fixtureCwd, encoding: 'utf-8' },
|
||||
);
|
||||
const r = run([]);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).toMatch(/NO_FREE_TEXT/);
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Dry-run
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('--dry-run', () => {
|
||||
test('emits the distill prompt + events JSON without calling API', () => {
|
||||
writeAuqOtherEvent('I always include tests with new features');
|
||||
writeAuqOtherEvent('Skip design review for typo fixes');
|
||||
// Strip ANTHROPIC_API_KEY to prove no API call happens.
|
||||
const env = makeEnv();
|
||||
delete env.ANTHROPIC_API_KEY;
|
||||
const res = spawnSync(BIN, ['--dry-run'], { env, cwd: fixtureCwd, encoding: 'utf-8' });
|
||||
expect(res.status).toBe(0);
|
||||
expect(res.stdout).toContain('DISTILL PROMPT');
|
||||
expect(res.stdout).toContain('always include tests');
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// API key required
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('API auth', () => {
|
||||
test('fails loud when ANTHROPIC_API_KEY missing on sync run', () => {
|
||||
writeAuqOtherEvent('Some free text response that needs distilling');
|
||||
const env = makeEnv();
|
||||
delete env.ANTHROPIC_API_KEY;
|
||||
const res = spawnSync(BIN, [], { env, cwd: fixtureCwd, encoding: 'utf-8' });
|
||||
expect(res.status).not.toBe(0);
|
||||
expect(res.stderr).toMatch(/ANTHROPIC_API_KEY/);
|
||||
expect(res.stderr).toMatch(/separate billing/);
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Background spawn
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('--background', () => {
|
||||
test('detaches and exits with DISTILL_SPAWNED', () => {
|
||||
const r = run(['--background']);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).toMatch(/DISTILL_SPAWNED: pid=\d+/);
|
||||
});
|
||||
});
|
||||
+28
-1
@@ -650,7 +650,11 @@ If you are looping on the same diagnostic, same file, or failed fix variants, ST
|
||||
|
||||
Before each AskUserQuestion, choose `question_id` from `scripts/question-registry.ts` or `{skill}-{slug}`, then run `~/.claude/skills/gstack/bin/gstack-question-preference --check "<id>"`. `AUTO_DECIDE` means choose the recommended option and say "Auto-decided [summary] → [option] (your preference). Change with /plan-tune." `ASK_NORMALLY` means ask.
|
||||
|
||||
After answer, log best-effort:
|
||||
**Embed the question_id as a marker in the question text** so hooks can identify it deterministically (plan-tune cathedral T14 / D18 progressive markers). Append `<gstack-qid:{question_id}>` somewhere in the rendered question (the leading line or trailing line is fine; the marker doesn't render visibly to the user when wrapped in HTML-style angle brackets, but the hook strips it). Without the marker the PreToolUse enforcement hook treats the AUQ as observed-only and never auto-decides — so always include it when the question matches a registered `question_id`.
|
||||
|
||||
**Embed the option recommendation via the `(recommended)` label suffix** on exactly one option per AUQ. The PreToolUse hook parses `(recommended)` first, falls back to "Recommendation: X" prose, and refuses to auto-decide if ambiguous. Two `(recommended)` labels = refuse.
|
||||
|
||||
After answer, log best-effort (PostToolUse hook also captures deterministically when installed; dedup on (source, tool_use_id) handles double-writes):
|
||||
```bash
|
||||
~/.claude/skills/gstack/bin/gstack-question-log '{"skill":"ship","question_id":"<id>","question_summary":"<short>","category":"<approval|clarification|routing|cherry-pick|feedback-loop>","door_type":"<one-way|two-way>","options_count":N,"user_choice":"<key>","recommended":"<key>","session_id":"'"$_SESSION_ID"'"}' 2>/dev/null || true
|
||||
```
|
||||
@@ -3082,6 +3086,29 @@ This step is automatic — never skip it, never ask for confirmation.
|
||||
|
||||
---
|
||||
|
||||
## Step 21: Plan-tune discoverability nudge (first-successful-ship only)
|
||||
|
||||
Plan-tune cathedral T15. After a successful ship, surface /plan-tune once
|
||||
per machine. Single line, non-blocking, marker-gated so it never re-fires.
|
||||
|
||||
```bash
|
||||
_NUDGE_MARKER="$HOME/.gstack/.plan-tune-nudge-shown"
|
||||
_QT=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning 2>/dev/null || echo "false")
|
||||
if [ ! -f "$_NUDGE_MARKER" ] && [ "$_QT" = "false" ]; then
|
||||
echo ""
|
||||
echo "gstack can learn from your AskUserQuestion answers. Run /plan-tune to opt in"
|
||||
echo "— it captures which prompts you find valuable vs noisy and (with hooks installed)"
|
||||
echo "auto-decides your never-ask preferences."
|
||||
touch "$_NUDGE_MARKER"
|
||||
fi
|
||||
```
|
||||
|
||||
If the marker exists, OR question_tuning is already on, the nudge is a
|
||||
no-op. The marker guarantees at-most-once per machine. To re-enable:
|
||||
`rm ~/.gstack/.plan-tune-nudge-shown` before next ship.
|
||||
|
||||
---
|
||||
|
||||
## Important Rules
|
||||
|
||||
- **Never skip tests.** If tests fail, stop.
|
||||
|
||||
+28
-1
@@ -636,7 +636,11 @@ If you are looping on the same diagnostic, same file, or failed fix variants, ST
|
||||
|
||||
Before each AskUserQuestion, choose `question_id` from `scripts/question-registry.ts` or `{skill}-{slug}`, then run `$GSTACK_BIN/gstack-question-preference --check "<id>"`. `AUTO_DECIDE` means choose the recommended option and say "Auto-decided [summary] → [option] (your preference). Change with /plan-tune." `ASK_NORMALLY` means ask.
|
||||
|
||||
After answer, log best-effort:
|
||||
**Embed the question_id as a marker in the question text** so hooks can identify it deterministically (plan-tune cathedral T14 / D18 progressive markers). Append `<gstack-qid:{question_id}>` somewhere in the rendered question (the leading line or trailing line is fine; the marker doesn't render visibly to the user when wrapped in HTML-style angle brackets, but the hook strips it). Without the marker the PreToolUse enforcement hook treats the AUQ as observed-only and never auto-decides — so always include it when the question matches a registered `question_id`.
|
||||
|
||||
**Embed the option recommendation via the `(recommended)` label suffix** on exactly one option per AUQ. The PreToolUse hook parses `(recommended)` first, falls back to "Recommendation: X" prose, and refuses to auto-decide if ambiguous. Two `(recommended)` labels = refuse.
|
||||
|
||||
After answer, log best-effort (PostToolUse hook also captures deterministically when installed; dedup on (source, tool_use_id) handles double-writes):
|
||||
```bash
|
||||
$GSTACK_BIN/gstack-question-log '{"skill":"ship","question_id":"<id>","question_summary":"<short>","category":"<approval|clarification|routing|cherry-pick|feedback-loop>","door_type":"<one-way|two-way>","options_count":N,"user_choice":"<key>","recommended":"<key>","session_id":"'"$_SESSION_ID"'"}' 2>/dev/null || true
|
||||
```
|
||||
@@ -2692,6 +2696,29 @@ This step is automatic — never skip it, never ask for confirmation.
|
||||
|
||||
---
|
||||
|
||||
## Step 21: Plan-tune discoverability nudge (first-successful-ship only)
|
||||
|
||||
Plan-tune cathedral T15. After a successful ship, surface /plan-tune once
|
||||
per machine. Single line, non-blocking, marker-gated so it never re-fires.
|
||||
|
||||
```bash
|
||||
_NUDGE_MARKER="$HOME/.gstack/.plan-tune-nudge-shown"
|
||||
_QT=$($GSTACK_ROOT/bin/gstack-config get question_tuning 2>/dev/null || echo "false")
|
||||
if [ ! -f "$_NUDGE_MARKER" ] && [ "$_QT" = "false" ]; then
|
||||
echo ""
|
||||
echo "gstack can learn from your AskUserQuestion answers. Run /plan-tune to opt in"
|
||||
echo "— it captures which prompts you find valuable vs noisy and (with hooks installed)"
|
||||
echo "auto-decides your never-ask preferences."
|
||||
touch "$_NUDGE_MARKER"
|
||||
fi
|
||||
```
|
||||
|
||||
If the marker exists, OR question_tuning is already on, the nudge is a
|
||||
no-op. The marker guarantees at-most-once per machine. To re-enable:
|
||||
`rm ~/.gstack/.plan-tune-nudge-shown` before next ship.
|
||||
|
||||
---
|
||||
|
||||
## Important Rules
|
||||
|
||||
- **Never skip tests.** If tests fail, stop.
|
||||
|
||||
+28
-1
@@ -638,7 +638,11 @@ If you are looping on the same diagnostic, same file, or failed fix variants, ST
|
||||
|
||||
Before each AskUserQuestion, choose `question_id` from `scripts/question-registry.ts` or `{skill}-{slug}`, then run `$GSTACK_BIN/gstack-question-preference --check "<id>"`. `AUTO_DECIDE` means choose the recommended option and say "Auto-decided [summary] → [option] (your preference). Change with /plan-tune." `ASK_NORMALLY` means ask.
|
||||
|
||||
After answer, log best-effort:
|
||||
**Embed the question_id as a marker in the question text** so hooks can identify it deterministically (plan-tune cathedral T14 / D18 progressive markers). Append `<gstack-qid:{question_id}>` somewhere in the rendered question (the leading line or trailing line is fine; the marker doesn't render visibly to the user when wrapped in HTML-style angle brackets, but the hook strips it). Without the marker the PreToolUse enforcement hook treats the AUQ as observed-only and never auto-decides — so always include it when the question matches a registered `question_id`.
|
||||
|
||||
**Embed the option recommendation via the `(recommended)` label suffix** on exactly one option per AUQ. The PreToolUse hook parses `(recommended)` first, falls back to "Recommendation: X" prose, and refuses to auto-decide if ambiguous. Two `(recommended)` labels = refuse.
|
||||
|
||||
After answer, log best-effort (PostToolUse hook also captures deterministically when installed; dedup on (source, tool_use_id) handles double-writes):
|
||||
```bash
|
||||
$GSTACK_BIN/gstack-question-log '{"skill":"ship","question_id":"<id>","question_summary":"<short>","category":"<approval|clarification|routing|cherry-pick|feedback-loop>","door_type":"<one-way|two-way>","options_count":N,"user_choice":"<key>","recommended":"<key>","session_id":"'"$_SESSION_ID"'"}' 2>/dev/null || true
|
||||
```
|
||||
@@ -3070,6 +3074,29 @@ This step is automatic — never skip it, never ask for confirmation.
|
||||
|
||||
---
|
||||
|
||||
## Step 21: Plan-tune discoverability nudge (first-successful-ship only)
|
||||
|
||||
Plan-tune cathedral T15. After a successful ship, surface /plan-tune once
|
||||
per machine. Single line, non-blocking, marker-gated so it never re-fires.
|
||||
|
||||
```bash
|
||||
_NUDGE_MARKER="$HOME/.gstack/.plan-tune-nudge-shown"
|
||||
_QT=$($GSTACK_ROOT/bin/gstack-config get question_tuning 2>/dev/null || echo "false")
|
||||
if [ ! -f "$_NUDGE_MARKER" ] && [ "$_QT" = "false" ]; then
|
||||
echo ""
|
||||
echo "gstack can learn from your AskUserQuestion answers. Run /plan-tune to opt in"
|
||||
echo "— it captures which prompts you find valuable vs noisy and (with hooks installed)"
|
||||
echo "auto-decides your never-ask preferences."
|
||||
touch "$_NUDGE_MARKER"
|
||||
fi
|
||||
```
|
||||
|
||||
If the marker exists, OR question_tuning is already on, the nudge is a
|
||||
no-op. The marker guarantees at-most-once per machine. To re-enable:
|
||||
`rm ~/.gstack/.plan-tune-nudge-shown` before next ship.
|
||||
|
||||
---
|
||||
|
||||
## Important Rules
|
||||
|
||||
- **Never skip tests.** If tests fail, stop.
|
||||
|
||||
+6
-5
@@ -491,13 +491,14 @@
|
||||
},
|
||||
"plan-tune": {
|
||||
"skill": "plan-tune",
|
||||
"skillMdBytes": 51717,
|
||||
"skillMdLines": 1077,
|
||||
"estTokens": 12929,
|
||||
"tmplBytes": 15586,
|
||||
"skillMdBytes": 64017,
|
||||
"skillMdLines": 1357,
|
||||
"estTokens": 16004,
|
||||
"tmplBytes": 25196,
|
||||
"descriptionLen": 325,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
"hasPeriodicEval": false,
|
||||
"_baseline_note": "Rebased from 51717 → 64017 in plan-tune cathedral v1.52.0.0 (T13). Cathedral added Dream cycle, Recent auto-decisions, Audit unmarked, Dream cycle review/distill sections — all load-bearing for hook substrate. See CHANGELOG.md [1.52.0.0]."
|
||||
},
|
||||
"qa": {
|
||||
"skill": "qa",
|
||||
|
||||
@@ -323,10 +323,17 @@ describe('gen-skill-docs', () => {
|
||||
// Ratcheted 36500 → 39000 in the contributor wave when #1205 added the
|
||||
// \\u-escape CJK rule (rule 12 + self-check item) to the AskUserQuestion
|
||||
// preamble.
|
||||
// Ratcheted 39000 → 40000 in plan-tune cathedral T14: question-tuning
|
||||
// resolver gained the <gstack-qid:...> marker convention + the
|
||||
// (recommended) label requirement (D2 + D18 — both load-bearing for
|
||||
// hook enforcement). Adds ~700 bytes.
|
||||
// Ratcheted 40000 → 60000 in v1.52.0.0 cap audit: ~20K headroom so
|
||||
// future preamble adds don't trip the gate on each PR. Real runaway
|
||||
// (preamble doubling) still trips; normal scope growth doesn't.
|
||||
for (const skill of reviewSkills) {
|
||||
const content = fs.readFileSync(skill.path, 'utf-8');
|
||||
const preamble = extractPreambleBeforeWorkflow(content, skill.markers);
|
||||
expect(Buffer.byteLength(preamble, 'utf-8')).toBeLessThan(39_000);
|
||||
expect(Buffer.byteLength(preamble, 'utf-8')).toBeLessThan(60_000);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -0,0 +1,206 @@
|
||||
/**
|
||||
* gstack-codex-session-import — backfill question-log from Codex JSONL.
|
||||
*
|
||||
* Plan-tune cathedral T9. Verifies the structured-file parser (D5) handles
|
||||
* the two-tier recovery strategy from docs/spikes/codex-session-format.md:
|
||||
* - Marker-first: <gstack-qid:foo-bar> → source=codex-import-marker.
|
||||
* - Pattern fallback: D-numbered brief → source=codex-import-pattern,
|
||||
* hash-only question_id.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BIN = path.join(ROOT, 'bin', 'gstack-codex-session-import');
|
||||
|
||||
let stateRoot: string;
|
||||
let fixtureCwd: string;
|
||||
let cwdSlug: string;
|
||||
|
||||
beforeEach(() => {
|
||||
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-cdximp-'));
|
||||
cwdSlug = 'codex-fixture-slug';
|
||||
fixtureCwd = path.join(stateRoot, cwdSlug);
|
||||
fs.mkdirSync(fixtureCwd, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(stateRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function writeSessionFile(events: Array<Record<string, unknown>>, sessionId = 'sess-fixture'): string {
|
||||
const p = path.join(stateRoot, 'rollout-fixture.jsonl');
|
||||
const meta = {
|
||||
timestamp: new Date().toISOString(),
|
||||
type: 'session_meta',
|
||||
payload: { id: sessionId, cwd: fixtureCwd },
|
||||
};
|
||||
const lines = [JSON.stringify(meta), ...events.map((e) => JSON.stringify(e))];
|
||||
fs.writeFileSync(p, lines.join('\n') + '\n');
|
||||
return p;
|
||||
}
|
||||
|
||||
function agentMessage(text: string): Record<string, unknown> {
|
||||
return {
|
||||
timestamp: new Date().toISOString(),
|
||||
type: 'event_msg',
|
||||
payload: { type: 'agent_message', message: text },
|
||||
};
|
||||
}
|
||||
|
||||
function userMessage(text: string): Record<string, unknown> {
|
||||
return {
|
||||
timestamp: new Date().toISOString(),
|
||||
type: 'event_msg',
|
||||
payload: { type: 'user_message', message: text },
|
||||
};
|
||||
}
|
||||
|
||||
function runImport(sessionPath: string): { stdout: string; stderr: string; status: number } {
|
||||
const env: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v !== undefined) env[k] = v;
|
||||
}
|
||||
env.GSTACK_STATE_ROOT = stateRoot;
|
||||
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
|
||||
delete env.GSTACK_HOME;
|
||||
const res = spawnSync(BIN, [sessionPath], { env, encoding: 'utf-8', cwd: ROOT });
|
||||
return {
|
||||
stdout: res.stdout ?? '',
|
||||
stderr: res.stderr ?? '',
|
||||
status: res.status ?? -1,
|
||||
};
|
||||
}
|
||||
|
||||
function readImportedEvents(): Array<Record<string, unknown>> {
|
||||
const f = path.join(stateRoot, 'projects', cwdSlug, 'question-log.jsonl');
|
||||
if (!fs.existsSync(f)) return [];
|
||||
return fs
|
||||
.readFileSync(f, 'utf-8')
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(Boolean)
|
||||
.map((l) => JSON.parse(l));
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Marker-first path
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('marker-first import (source=codex-import-marker)', () => {
|
||||
test('extracts marker id from agent_message and pairs with next user_message', () => {
|
||||
const sessionPath = writeSessionFile([
|
||||
agentMessage(
|
||||
'D1 — Test\nELI10: blah\n<gstack-qid:ship-test-failure-triage> Tests failed.\nRecommendation: A\nA) Fix now (recommended)\nB) Investigate\nC) Ack and ship',
|
||||
),
|
||||
userMessage('A'),
|
||||
]);
|
||||
const r = runImport(sessionPath);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).toContain('IMPORTED: 1');
|
||||
const events = readImportedEvents();
|
||||
expect(events.length).toBe(1);
|
||||
expect(events[0].source).toBe('codex-import-marker');
|
||||
expect(events[0].question_id).toBe('ship-test-failure-triage');
|
||||
expect(events[0].user_choice).toContain('Fix now');
|
||||
expect(events[0].recommended).toContain('Fix now');
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Pattern fallback
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('pattern fallback (source=codex-import-pattern)', () => {
|
||||
test('D-numbered brief without marker → hash id + source=codex-import-pattern', () => {
|
||||
const sessionPath = writeSessionFile([
|
||||
agentMessage('D2 — Unmarked brief\nA) Foo (recommended)\nB) Bar'),
|
||||
userMessage('A'),
|
||||
]);
|
||||
const r = runImport(sessionPath);
|
||||
expect(r.status).toBe(0);
|
||||
const events = readImportedEvents();
|
||||
expect(events.length).toBe(1);
|
||||
expect(events[0].source).toBe('codex-import-pattern');
|
||||
expect((events[0].question_id as string).startsWith('hook-')).toBe(true);
|
||||
expect(events[0].user_choice).toContain('Foo');
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Edge cases
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('edge cases', () => {
|
||||
test('no AUQ-shaped events → 0 imported, exit 0', () => {
|
||||
const sessionPath = writeSessionFile([
|
||||
agentMessage('Just doing some work, nothing to ask.'),
|
||||
]);
|
||||
const r = runImport(sessionPath);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).toContain('IMPORTED: 0');
|
||||
});
|
||||
|
||||
test('agent_message with marker but no following user_message → skipped', () => {
|
||||
const sessionPath = writeSessionFile([
|
||||
agentMessage('<gstack-qid:test-q> D1 — Q\nA) Foo\nB) Bar'),
|
||||
// no user_message
|
||||
]);
|
||||
const r = runImport(sessionPath);
|
||||
expect(r.status).toBe(0);
|
||||
expect(readImportedEvents().length).toBe(0);
|
||||
});
|
||||
|
||||
test('two D-briefs in sequence → both imported', () => {
|
||||
const sessionPath = writeSessionFile([
|
||||
agentMessage('D1 — First <gstack-qid:q1>\nA) Foo (recommended)\nB) Bar'),
|
||||
userMessage('A'),
|
||||
agentMessage('D2 — Second <gstack-qid:q2>\nA) Baz (recommended)\nB) Qux'),
|
||||
userMessage('B'),
|
||||
]);
|
||||
const r = runImport(sessionPath);
|
||||
expect(r.status).toBe(0);
|
||||
const events = readImportedEvents();
|
||||
expect(events.length).toBe(2);
|
||||
expect(events[0].question_id).toBe('q1');
|
||||
expect(events[1].question_id).toBe('q2');
|
||||
});
|
||||
|
||||
test('numeric user response also resolves to letter index', () => {
|
||||
const sessionPath = writeSessionFile([
|
||||
agentMessage('D1 — Test <gstack-qid:numeric-q>\nA) Foo\nB) Bar\nC) Baz'),
|
||||
userMessage('B - I think B is right'),
|
||||
]);
|
||||
runImport(sessionPath);
|
||||
const events = readImportedEvents();
|
||||
expect(events.length).toBe(1);
|
||||
expect(events[0].user_choice).toContain('Bar');
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Default-mode (latest session) behavior
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('default mode (no args → latest)', () => {
|
||||
test('returns NO_SESSIONS when sessions dir is empty', () => {
|
||||
const emptyDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-empty-cdx-'));
|
||||
try {
|
||||
const env: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v !== undefined) env[k] = v;
|
||||
}
|
||||
env.GSTACK_STATE_ROOT = stateRoot;
|
||||
env.CODEX_SESSIONS_ROOT = emptyDir;
|
||||
const res = spawnSync(BIN, [], { env, encoding: 'utf-8', cwd: ROOT });
|
||||
expect(res.status).toBe(0);
|
||||
expect(res.stdout).toMatch(/NO_SESSIONS/);
|
||||
} finally {
|
||||
fs.rmSync(emptyDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,302 @@
|
||||
/**
|
||||
* gstack-settings-hook schema-aware surface (T3 plan-tune cathedral).
|
||||
*
|
||||
* Verifies add-event / remove-source / diff-event / rollback / list-sources
|
||||
* for PreToolUse + PostToolUse registration. Existing team-mode.test.ts
|
||||
* covers the legacy `add <cmd>` / `remove <cmd>` shape; this file only
|
||||
* covers the new surface introduced for the plan-tune cathedral.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { execSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const SETTINGS_HOOK = path.join(ROOT, 'bin', 'gstack-settings-hook');
|
||||
|
||||
let tmpDir: string;
|
||||
let settingsFile: string;
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-shsa-'));
|
||||
settingsFile = path.join(tmpDir, 'settings.json');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function run(args: string[]): { stdout: string; stderr: string; exitCode: number } {
|
||||
try {
|
||||
const stdout = execSync([SETTINGS_HOOK, ...args].map((s) => `'${s}'`).join(' '), {
|
||||
env: { ...process.env, GSTACK_SETTINGS_FILE: settingsFile },
|
||||
encoding: 'utf-8',
|
||||
timeout: 10000,
|
||||
});
|
||||
return { stdout, stderr: '', exitCode: 0 };
|
||||
} catch (e: any) {
|
||||
return { stdout: e.stdout || '', stderr: e.stderr || '', exitCode: e.status ?? 1 };
|
||||
}
|
||||
}
|
||||
|
||||
function settings(): any {
|
||||
return JSON.parse(fs.readFileSync(settingsFile, 'utf-8'));
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// add-event
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('add-event', () => {
|
||||
test('registers a PreToolUse hook with matcher + source tag', () => {
|
||||
const r = run([
|
||||
'add-event',
|
||||
'--event', 'PreToolUse',
|
||||
'--matcher', '(AskUserQuestion|mcp__.*__AskUserQuestion)',
|
||||
'--command', '/abs/path/to/question-preference-hook',
|
||||
'--source', 'plan-tune-cathedral',
|
||||
'--timeout', '5',
|
||||
]);
|
||||
expect(r.exitCode).toBe(0);
|
||||
const s = settings();
|
||||
expect(s.hooks.PreToolUse).toHaveLength(1);
|
||||
expect(s.hooks.PreToolUse[0].matcher).toBe('(AskUserQuestion|mcp__.*__AskUserQuestion)');
|
||||
expect(s.hooks.PreToolUse[0]._gstack_source).toBe('plan-tune-cathedral');
|
||||
expect(s.hooks.PreToolUse[0].hooks[0].command).toBe('/abs/path/to/question-preference-hook');
|
||||
expect(s.hooks.PreToolUse[0].hooks[0].timeout).toBe(5);
|
||||
});
|
||||
|
||||
test('registers a PostToolUse hook independently of PreToolUse', () => {
|
||||
run([
|
||||
'add-event',
|
||||
'--event', 'PreToolUse',
|
||||
'--matcher', 'AskUserQuestion',
|
||||
'--command', '/pre',
|
||||
'--source', 'plan-tune-cathedral',
|
||||
]);
|
||||
const r = run([
|
||||
'add-event',
|
||||
'--event', 'PostToolUse',
|
||||
'--matcher', 'AskUserQuestion',
|
||||
'--command', '/post',
|
||||
'--source', 'plan-tune-cathedral',
|
||||
]);
|
||||
expect(r.exitCode).toBe(0);
|
||||
const s = settings();
|
||||
expect(s.hooks.PreToolUse).toHaveLength(1);
|
||||
expect(s.hooks.PostToolUse).toHaveLength(1);
|
||||
expect(s.hooks.PreToolUse[0].hooks[0].command).toBe('/pre');
|
||||
expect(s.hooks.PostToolUse[0].hooks[0].command).toBe('/post');
|
||||
});
|
||||
|
||||
test('idempotent: re-adding same (event, matcher, source) updates in place', () => {
|
||||
run([
|
||||
'add-event',
|
||||
'--event', 'PreToolUse',
|
||||
'--matcher', 'AskUserQuestion',
|
||||
'--command', '/v1',
|
||||
'--source', 'plan-tune-cathedral',
|
||||
]);
|
||||
run([
|
||||
'add-event',
|
||||
'--event', 'PreToolUse',
|
||||
'--matcher', 'AskUserQuestion',
|
||||
'--command', '/v2',
|
||||
'--source', 'plan-tune-cathedral',
|
||||
]);
|
||||
const s = settings();
|
||||
expect(s.hooks.PreToolUse).toHaveLength(1);
|
||||
expect(s.hooks.PreToolUse[0].hooks[0].command).toBe('/v2');
|
||||
});
|
||||
|
||||
test('preserves unrelated existing hooks', () => {
|
||||
fs.writeFileSync(
|
||||
settingsFile,
|
||||
JSON.stringify({
|
||||
hooks: {
|
||||
PreToolUse: [
|
||||
{
|
||||
matcher: 'Bash',
|
||||
hooks: [{ type: 'command', command: '/user-own-hook' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
}, null, 2),
|
||||
);
|
||||
run([
|
||||
'add-event',
|
||||
'--event', 'PreToolUse',
|
||||
'--matcher', 'AskUserQuestion',
|
||||
'--command', '/gstack-hook',
|
||||
'--source', 'plan-tune-cathedral',
|
||||
]);
|
||||
const s = settings();
|
||||
expect(s.hooks.PreToolUse).toHaveLength(2);
|
||||
// User's Bash hook still present
|
||||
const bash = s.hooks.PreToolUse.find((e: any) => e.matcher === 'Bash');
|
||||
expect(bash).toBeDefined();
|
||||
expect(bash.hooks[0].command).toBe('/user-own-hook');
|
||||
});
|
||||
|
||||
test('writes a timestamped backup before mutating', () => {
|
||||
fs.writeFileSync(settingsFile, JSON.stringify({ existing: 'value' }));
|
||||
run([
|
||||
'add-event',
|
||||
'--event', 'PreToolUse',
|
||||
'--matcher', 'AskUserQuestion',
|
||||
'--command', '/gstack',
|
||||
'--source', 'plan-tune-cathedral',
|
||||
]);
|
||||
const backups = fs
|
||||
.readdirSync(tmpDir)
|
||||
.filter((f) => f.startsWith('settings.json.bak.'));
|
||||
expect(backups.length).toBeGreaterThanOrEqual(1);
|
||||
const backupContent = JSON.parse(fs.readFileSync(path.join(tmpDir, backups[0]), 'utf-8'));
|
||||
expect(backupContent.existing).toBe('value');
|
||||
expect(backupContent.hooks).toBeUndefined();
|
||||
});
|
||||
|
||||
test('rejects invalid --event', () => {
|
||||
const r = run([
|
||||
'add-event',
|
||||
'--event', 'NotAnEvent',
|
||||
'--command', '/x',
|
||||
'--source', 'plan-tune',
|
||||
]);
|
||||
expect(r.exitCode).not.toBe(0);
|
||||
expect(r.stderr).toMatch(/invalid --event/);
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// remove-source
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('remove-source', () => {
|
||||
test('removes all entries with a given source tag, leaves others alone', () => {
|
||||
fs.writeFileSync(
|
||||
settingsFile,
|
||||
JSON.stringify({
|
||||
hooks: {
|
||||
PreToolUse: [
|
||||
{ matcher: 'Bash', hooks: [{ command: '/keep-me' }] },
|
||||
],
|
||||
},
|
||||
}),
|
||||
);
|
||||
run([
|
||||
'add-event',
|
||||
'--event', 'PreToolUse',
|
||||
'--matcher', 'AskUserQuestion',
|
||||
'--command', '/a',
|
||||
'--source', 'plan-tune-cathedral',
|
||||
]);
|
||||
run([
|
||||
'add-event',
|
||||
'--event', 'PostToolUse',
|
||||
'--matcher', 'AskUserQuestion',
|
||||
'--command', '/b',
|
||||
'--source', 'plan-tune-cathedral',
|
||||
]);
|
||||
const r = run(['remove-source', '--source', 'plan-tune-cathedral']);
|
||||
expect(r.exitCode).toBe(0);
|
||||
expect(r.stdout).toMatch(/removed 2 hook/);
|
||||
const s = settings();
|
||||
expect(s.hooks.PostToolUse).toBeUndefined();
|
||||
expect(s.hooks.PreToolUse).toHaveLength(1);
|
||||
expect(s.hooks.PreToolUse[0].hooks[0].command).toBe('/keep-me');
|
||||
});
|
||||
|
||||
test('safely no-ops when settings.json missing', () => {
|
||||
const r = run(['remove-source', '--source', 'plan-tune-cathedral']);
|
||||
expect(r.exitCode).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// diff-event
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('diff-event', () => {
|
||||
test('emits BEFORE + AFTER without mutating settings.json', () => {
|
||||
fs.writeFileSync(settingsFile, JSON.stringify({ existing: 'value' }));
|
||||
const r = run([
|
||||
'diff-event',
|
||||
'--event', 'PreToolUse',
|
||||
'--matcher', 'AskUserQuestion',
|
||||
'--command', '/gstack',
|
||||
'--source', 'plan-tune-cathedral',
|
||||
]);
|
||||
expect(r.exitCode).toBe(0);
|
||||
expect(r.stdout).toContain('--- BEFORE');
|
||||
expect(r.stdout).toContain('--- AFTER');
|
||||
expect(r.stdout).toContain('plan-tune-cathedral');
|
||||
// Settings file unchanged.
|
||||
expect(JSON.parse(fs.readFileSync(settingsFile, 'utf-8'))).toEqual({ existing: 'value' });
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// rollback
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('rollback', () => {
|
||||
test('restores latest backup', () => {
|
||||
fs.writeFileSync(settingsFile, JSON.stringify({ original: true }));
|
||||
run([
|
||||
'add-event',
|
||||
'--event', 'PreToolUse',
|
||||
'--matcher', 'AskUserQuestion',
|
||||
'--command', '/gstack',
|
||||
'--source', 'plan-tune-cathedral',
|
||||
]);
|
||||
expect(settings().hooks).toBeDefined();
|
||||
const r = run(['rollback']);
|
||||
expect(r.exitCode).toBe(0);
|
||||
const s = settings();
|
||||
expect(s.original).toBe(true);
|
||||
expect(s.hooks).toBeUndefined();
|
||||
});
|
||||
|
||||
test('fails clearly when no backup pointer exists', () => {
|
||||
const r = run(['rollback']);
|
||||
expect(r.exitCode).not.toBe(0);
|
||||
expect(r.stderr).toMatch(/no backup pointer/);
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// list-sources
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('list-sources', () => {
|
||||
test('shows source-tagged hooks across all events', () => {
|
||||
run([
|
||||
'add-event',
|
||||
'--event', 'PreToolUse',
|
||||
'--matcher', 'AskUserQuestion',
|
||||
'--command', '/pre',
|
||||
'--source', 'plan-tune-cathedral',
|
||||
]);
|
||||
run([
|
||||
'add-event',
|
||||
'--event', 'PostToolUse',
|
||||
'--matcher', 'AskUserQuestion',
|
||||
'--command', '/post',
|
||||
'--source', 'plan-tune-cathedral',
|
||||
]);
|
||||
const r = run(['list-sources']);
|
||||
expect(r.exitCode).toBe(0);
|
||||
expect(r.stdout).toContain('PreToolUse');
|
||||
expect(r.stdout).toContain('PostToolUse');
|
||||
expect(r.stdout).toContain('plan-tune-cathedral');
|
||||
});
|
||||
|
||||
test('empty when no settings file', () => {
|
||||
const r = run(['list-sources']);
|
||||
expect(r.exitCode).toBe(0);
|
||||
expect(r.stdout).toMatch(/no settings file/);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,159 @@
|
||||
/**
|
||||
* GSTACK_STATE_ROOT override — verifies the 3 plan-tune bins honor
|
||||
* GSTACK_STATE_ROOT as a higher-priority override over GSTACK_HOME.
|
||||
*
|
||||
* Surfaced by plan-tune cathedral D16 (Codex outside voice): tests can't
|
||||
* isolate from real ~/.gstack today because the bins ignore STATE_ROOT.
|
||||
* Without this override, the cathedral's E2E + integration tests would
|
||||
* silently pollute the user's real profile.
|
||||
*
|
||||
* Contract:
|
||||
* - GSTACK_STATE_ROOT set → bins write under STATE_ROOT (HOME ignored).
|
||||
* - Only GSTACK_HOME set → bins write under HOME (existing behavior).
|
||||
* - Neither set → falls back to $HOME/.gstack (existing behavior).
|
||||
* - Both set → STATE_ROOT wins.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BIN_LOG = path.join(ROOT, 'bin', 'gstack-question-log');
|
||||
const BIN_PREF = path.join(ROOT, 'bin', 'gstack-question-preference');
|
||||
const BIN_DEV = path.join(ROOT, 'bin', 'gstack-developer-profile');
|
||||
|
||||
let stateRoot: string;
|
||||
let homeRoot: string;
|
||||
|
||||
beforeEach(() => {
|
||||
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-state-'));
|
||||
homeRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-home-'));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(stateRoot, { recursive: true, force: true });
|
||||
fs.rmSync(homeRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function runBin(
|
||||
bin: string,
|
||||
args: string[],
|
||||
env: Record<string, string | undefined>,
|
||||
): { stdout: string; stderr: string; status: number } {
|
||||
const cleaned: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries({ ...process.env, ...env })) {
|
||||
if (v !== undefined) cleaned[k] = v;
|
||||
}
|
||||
// Strip these from process.env so the override matrix is clean.
|
||||
if (env.GSTACK_STATE_ROOT === undefined) delete cleaned.GSTACK_STATE_ROOT;
|
||||
if (env.GSTACK_HOME === undefined) delete cleaned.GSTACK_HOME;
|
||||
const res = spawnSync(bin, args, {
|
||||
env: cleaned,
|
||||
encoding: 'utf-8',
|
||||
cwd: ROOT,
|
||||
});
|
||||
return {
|
||||
stdout: res.stdout ?? '',
|
||||
stderr: res.stderr ?? '',
|
||||
status: res.status ?? -1,
|
||||
};
|
||||
}
|
||||
|
||||
const SAMPLE_LOG = {
|
||||
skill: 'plan-tune',
|
||||
question_id: 'state-root-test',
|
||||
question_summary: 'Test STATE_ROOT honoring',
|
||||
category: 'clarification',
|
||||
door_type: 'two-way',
|
||||
options_count: 2,
|
||||
user_choice: 'a',
|
||||
recommended: 'a',
|
||||
session_id: 'state-root-test-session',
|
||||
};
|
||||
|
||||
describe('gstack-question-log honors GSTACK_STATE_ROOT', () => {
|
||||
test('STATE_ROOT set, HOME unset → writes under STATE_ROOT', () => {
|
||||
const r = runBin(BIN_LOG, [JSON.stringify(SAMPLE_LOG)], {
|
||||
GSTACK_STATE_ROOT: stateRoot,
|
||||
GSTACK_HOME: undefined,
|
||||
});
|
||||
expect(r.status).toBe(0);
|
||||
// The slug is derived from cwd; just check at least one log file exists.
|
||||
const projectDirs = fs.readdirSync(path.join(stateRoot, 'projects'));
|
||||
expect(projectDirs.length).toBeGreaterThanOrEqual(1);
|
||||
const logPath = path.join(stateRoot, 'projects', projectDirs[0], 'question-log.jsonl');
|
||||
expect(fs.existsSync(logPath)).toBe(true);
|
||||
});
|
||||
|
||||
test('STATE_ROOT wins over HOME when both set', () => {
|
||||
const r = runBin(BIN_LOG, [JSON.stringify(SAMPLE_LOG)], {
|
||||
GSTACK_STATE_ROOT: stateRoot,
|
||||
GSTACK_HOME: homeRoot,
|
||||
});
|
||||
expect(r.status).toBe(0);
|
||||
// STATE_ROOT must have the file.
|
||||
const stateProjects = fs.readdirSync(path.join(stateRoot, 'projects'));
|
||||
expect(stateProjects.length).toBeGreaterThanOrEqual(1);
|
||||
// HOME must NOT have a projects dir (or it must be empty).
|
||||
const homeProjectsPath = path.join(homeRoot, 'projects');
|
||||
if (fs.existsSync(homeProjectsPath)) {
|
||||
const homeProjects = fs.readdirSync(homeProjectsPath);
|
||||
expect(homeProjects.length).toBe(0);
|
||||
}
|
||||
});
|
||||
|
||||
test('only HOME set → preserves existing behavior (writes under HOME)', () => {
|
||||
const r = runBin(BIN_LOG, [JSON.stringify(SAMPLE_LOG)], {
|
||||
GSTACK_STATE_ROOT: undefined,
|
||||
GSTACK_HOME: homeRoot,
|
||||
});
|
||||
expect(r.status).toBe(0);
|
||||
const homeProjects = fs.readdirSync(path.join(homeRoot, 'projects'));
|
||||
expect(homeProjects.length).toBeGreaterThanOrEqual(1);
|
||||
// STATE_ROOT must NOT have anything.
|
||||
const stateProjectsPath = path.join(stateRoot, 'projects');
|
||||
if (fs.existsSync(stateProjectsPath)) {
|
||||
expect(fs.readdirSync(stateProjectsPath).length).toBe(0);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-question-preference honors GSTACK_STATE_ROOT', () => {
|
||||
test('STATE_ROOT set → preferences file lives under STATE_ROOT', () => {
|
||||
const write = runBin(
|
||||
BIN_PREF,
|
||||
[
|
||||
'--write',
|
||||
JSON.stringify({
|
||||
question_id: 'state-root-pref-test',
|
||||
preference: 'never-ask',
|
||||
source: 'plan-tune',
|
||||
}),
|
||||
],
|
||||
{ GSTACK_STATE_ROOT: stateRoot, GSTACK_HOME: undefined },
|
||||
);
|
||||
expect(write.status).toBe(0);
|
||||
const projectDirs = fs.readdirSync(path.join(stateRoot, 'projects'));
|
||||
expect(projectDirs.length).toBeGreaterThanOrEqual(1);
|
||||
const prefPath = path.join(stateRoot, 'projects', projectDirs[0], 'question-preferences.json');
|
||||
expect(fs.existsSync(prefPath)).toBe(true);
|
||||
const prefs = JSON.parse(fs.readFileSync(prefPath, 'utf-8'));
|
||||
expect(prefs['state-root-pref-test']).toBe('never-ask');
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-developer-profile honors GSTACK_STATE_ROOT', () => {
|
||||
test('STATE_ROOT set → profile file lives under STATE_ROOT, not HOME', () => {
|
||||
// --read creates a stub profile if missing.
|
||||
const r = runBin(BIN_DEV, ['--read'], {
|
||||
GSTACK_STATE_ROOT: stateRoot,
|
||||
GSTACK_HOME: homeRoot,
|
||||
});
|
||||
expect(r.status).toBe(0);
|
||||
expect(fs.existsSync(path.join(stateRoot, 'developer-profile.json'))).toBe(true);
|
||||
expect(fs.existsSync(path.join(homeRoot, 'developer-profile.json'))).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -191,6 +191,13 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
// /plan-tune (v1 observational)
|
||||
'plan-tune-inspect': ['plan-tune/**', 'scripts/question-registry.ts', 'scripts/psychographic-signals.ts', 'scripts/one-way-doors.ts', 'bin/gstack-question-log', 'bin/gstack-question-preference', 'bin/gstack-developer-profile'],
|
||||
|
||||
// /plan-tune cathedral (T16 — 5 E2E scenarios, all gate per D12)
|
||||
'plan-tune-hook-capture': ['hosts/claude/hooks/**', 'bin/gstack-question-log', 'bin/gstack-developer-profile', 'plan-tune/**'],
|
||||
'plan-tune-enforcement': ['hosts/claude/hooks/**', 'bin/gstack-question-preference', 'scripts/question-registry.ts'],
|
||||
'plan-tune-annotation': ['hosts/claude/hooks/**', 'scripts/declared-annotation.ts', 'scripts/psychographic-signals.ts', 'scripts/question-registry.ts'],
|
||||
'plan-tune-codex-import': ['bin/gstack-codex-session-import', 'bin/gstack-question-log', 'docs/spikes/codex-session-format.md'],
|
||||
'plan-tune-dream-cycle': ['bin/gstack-distill-free-text', 'bin/gstack-distill-apply', 'hosts/claude/hooks/**', 'plan-tune/**'],
|
||||
|
||||
// Codex offering verification
|
||||
'codex-offered-office-hours': ['office-hours/**', 'scripts/gen-skill-docs.ts'],
|
||||
'codex-offered-ceo-review': ['plan-ceo-review/**', 'scripts/gen-skill-docs.ts'],
|
||||
@@ -564,6 +571,13 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
// /plan-tune — gate (core v1 DX promise: plain-English intent routing)
|
||||
'plan-tune-inspect': 'gate',
|
||||
|
||||
// /plan-tune cathedral (T16 per D12 — all gate)
|
||||
'plan-tune-hook-capture': 'gate',
|
||||
'plan-tune-enforcement': 'gate',
|
||||
'plan-tune-annotation': 'gate',
|
||||
'plan-tune-codex-import': 'gate',
|
||||
'plan-tune-dream-cycle': 'gate',
|
||||
|
||||
// Codex offering verification
|
||||
'codex-offered-office-hours': 'gate',
|
||||
'codex-offered-ceo-review': 'gate',
|
||||
|
||||
@@ -0,0 +1,220 @@
|
||||
/**
|
||||
* Layer 8 memory cache + injection (plan-tune cathedral T12).
|
||||
*
|
||||
* Verifies the PreToolUse hook reads ~/.gstack/free-text-memory.json and
|
||||
* surfaces matching nuggets via additionalContext on the hook response.
|
||||
* Cache: per-session memory-cache.json populated on first read, sub-1ms
|
||||
* thereafter (D13 perf).
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const HOOK = path.join(ROOT, 'hosts', 'claude', 'hooks', 'question-preference-hook');
|
||||
|
||||
let stateRoot: string;
|
||||
let fixtureCwd: string;
|
||||
let cwdSlug: string;
|
||||
|
||||
beforeEach(() => {
|
||||
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-memcache-'));
|
||||
cwdSlug = 'memcache-fixture';
|
||||
fixtureCwd = path.join(stateRoot, cwdSlug);
|
||||
fs.mkdirSync(fixtureCwd, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(stateRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function writeMemory(nuggets: Array<{ nugget: string; applies_to_signal_keys: string[]; applied_at?: string }>) {
|
||||
fs.writeFileSync(path.join(stateRoot, 'free-text-memory.json'), JSON.stringify({ nuggets }));
|
||||
}
|
||||
|
||||
function runHook(stdin: object): { stdout: string; stderr: string; status: number; parsed: any } {
|
||||
const env: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v !== undefined) env[k] = v;
|
||||
}
|
||||
env.GSTACK_STATE_ROOT = stateRoot;
|
||||
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
|
||||
delete env.GSTACK_HOME;
|
||||
const res = spawnSync(HOOK, [], {
|
||||
env,
|
||||
input: JSON.stringify({ ...stdin, cwd: fixtureCwd }),
|
||||
encoding: 'utf-8',
|
||||
cwd: ROOT,
|
||||
});
|
||||
let parsed: any = null;
|
||||
try { parsed = JSON.parse(res.stdout || '{}'); } catch {}
|
||||
return {
|
||||
stdout: res.stdout ?? '',
|
||||
stderr: res.stderr ?? '',
|
||||
status: res.status ?? -1,
|
||||
parsed,
|
||||
};
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Injection behavior
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('memory injection', () => {
|
||||
test('injects matching nugget into additionalContext on defer', () => {
|
||||
writeMemory([
|
||||
{
|
||||
nugget: 'User prefers verbose explanations with tradeoffs',
|
||||
applies_to_signal_keys: ['detail-preference'],
|
||||
applied_at: '2026-05-01T00:00:00Z',
|
||||
},
|
||||
]);
|
||||
// ship-todos-reorganize has signal_key 'detail-preference' per registry.
|
||||
const r = runHook({
|
||||
session_id: 's1',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-1',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-todos-reorganize> Reorganize?',
|
||||
options: ['A) Accept (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
expect(r.parsed?.hookSpecificOutput?.additionalContext).toContain('verbose explanations');
|
||||
});
|
||||
|
||||
test('does not inject when no nugget matches the signal_key', () => {
|
||||
writeMemory([
|
||||
{
|
||||
nugget: 'Unrelated nugget',
|
||||
applies_to_signal_keys: ['totally-different-key'],
|
||||
},
|
||||
]);
|
||||
const r = runHook({
|
||||
session_id: 's2',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-2',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-todos-reorganize> Reorganize?',
|
||||
options: ['A) Accept (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
expect(r.parsed?.hookSpecificOutput?.additionalContext).toBeUndefined();
|
||||
});
|
||||
|
||||
test('caps to 3 most-recent nuggets when many match', () => {
|
||||
writeMemory([
|
||||
{ nugget: 'old-1', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-01-01T00:00:00Z' },
|
||||
{ nugget: 'old-2', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-02-01T00:00:00Z' },
|
||||
{ nugget: 'old-3', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-03-01T00:00:00Z' },
|
||||
{ nugget: 'old-4', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-04-01T00:00:00Z' },
|
||||
{ nugget: 'newest', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-05-01T00:00:00Z' },
|
||||
]);
|
||||
const r = runHook({
|
||||
session_id: 's3',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-3',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-todos-reorganize> Reorganize?',
|
||||
options: ['A) Accept (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
const ctx = r.parsed?.hookSpecificOutput?.additionalContext || '';
|
||||
expect(ctx).toContain('newest');
|
||||
expect(ctx).toContain('old-4');
|
||||
expect(ctx).toContain('old-3');
|
||||
expect(ctx).not.toContain('old-1');
|
||||
});
|
||||
|
||||
test('memory injection works alongside deny enforcement', () => {
|
||||
writeMemory([
|
||||
{
|
||||
nugget: 'User prefers reorganizing for clarity',
|
||||
applies_to_signal_keys: ['detail-preference'],
|
||||
applied_at: '2026-05-01T00:00:00Z',
|
||||
},
|
||||
]);
|
||||
// Set a never-ask preference and check both deny AND memory are surfaced.
|
||||
fs.mkdirSync(path.join(stateRoot, 'projects', cwdSlug), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(stateRoot, 'projects', cwdSlug, 'question-preferences.json'),
|
||||
JSON.stringify({ 'ship-todos-reorganize': 'never-ask' }),
|
||||
);
|
||||
const r = runHook({
|
||||
session_id: 's4',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-4',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-todos-reorganize> Reorganize?',
|
||||
options: ['A) Accept (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
// ship-todos-reorganize is two-way per registry — enforcement should fire.
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('deny');
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecisionReason).toContain('plan-tune auto-decide');
|
||||
// Memory context isn't injected on deny path (it's already in the reason),
|
||||
// but the deny reason should mention the auto-decision clearly.
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Cache behavior
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('per-session memory cache', () => {
|
||||
test('first read writes cache; subsequent reads use cache', () => {
|
||||
writeMemory([
|
||||
{ nugget: 'cached nugget', applies_to_signal_keys: ['detail-preference'] },
|
||||
]);
|
||||
runHook({
|
||||
session_id: 'cache-test',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-c1',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{ question: '<gstack-qid:ship-todos-reorganize> Q', options: ['A', 'B'] },
|
||||
],
|
||||
},
|
||||
});
|
||||
const cachePath = path.join(stateRoot, 'sessions', 'cache-test', 'memory-cache.json');
|
||||
expect(fs.existsSync(cachePath)).toBe(true);
|
||||
const cached = JSON.parse(fs.readFileSync(cachePath, 'utf-8'));
|
||||
expect(cached.nuggets).toHaveLength(1);
|
||||
expect(cached.nuggets[0].nugget).toBe('cached nugget');
|
||||
});
|
||||
|
||||
test('cache miss when canonical file empty/missing → empty nuggets', () => {
|
||||
const r = runHook({
|
||||
session_id: 'empty',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-e',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{ question: '<gstack-qid:ship-todos-reorganize> Q', options: ['A', 'B'] },
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
expect(r.parsed?.hookSpecificOutput?.additionalContext).toBeUndefined();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,212 @@
|
||||
/**
|
||||
* Plan-tune v1.49 gate regression tests.
|
||||
*
|
||||
* v1.49 shipped two prose-driven implicit gates inside plan-tune/SKILL.md.tmpl
|
||||
* Step 0:
|
||||
* - Consent gate: question_tuning=false AND ~/.gstack/.question-tuning-prompted missing
|
||||
* → run "Consent + opt-in".
|
||||
* - Setup gate: question_tuning=true AND declared empty AND
|
||||
* ~/.gstack/.declared-setup-prompted missing → run "5-Q setup".
|
||||
*
|
||||
* The gates are evaluated by the agent reading the template's bash + prose.
|
||||
* The cathedral (T5/T6) replaces enforcement with hooks, but it must NOT break
|
||||
* these v1.49 gates — they're the only path from "feature off" to "feature on"
|
||||
* for first-time users.
|
||||
*
|
||||
* Three regression tests, all FREE tier, IRON RULE (no opt-out):
|
||||
* 1. consent-gate fires under the right conditions and stops re-firing after marker.
|
||||
* 2. setup-gate fires under the right conditions and stops re-firing after marker.
|
||||
* 3. marker idempotency: re-invoking after either decision produces zero re-prompts.
|
||||
*
|
||||
* Strategy: exercise the helpers the gates depend on (gstack-config get,
|
||||
* developer-profile.json schema, marker file paths). If those break, the
|
||||
* gates break. Plus a static-template assertion so the gate language can't
|
||||
* be silently deleted from the template.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BIN_CONFIG = path.join(ROOT, 'bin', 'gstack-config');
|
||||
const BIN_DEV = path.join(ROOT, 'bin', 'gstack-developer-profile');
|
||||
const SKILL_TMPL = path.join(ROOT, 'plan-tune', 'SKILL.md.tmpl');
|
||||
|
||||
let stateRoot: string;
|
||||
|
||||
beforeEach(() => {
|
||||
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-gate-'));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(stateRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function runBin(
|
||||
bin: string,
|
||||
args: string[],
|
||||
): { stdout: string; stderr: string; status: number } {
|
||||
const env: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v !== undefined) env[k] = v;
|
||||
}
|
||||
env.GSTACK_STATE_ROOT = stateRoot;
|
||||
delete env.GSTACK_HOME;
|
||||
const res = spawnSync(bin, args, { env, encoding: 'utf-8', cwd: ROOT });
|
||||
return {
|
||||
stdout: res.stdout ?? '',
|
||||
stderr: res.stderr ?? '',
|
||||
status: res.status ?? -1,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Simulate the consent-gate check as the agent would evaluate it from
|
||||
* the template's Step 0 prose. Mirrors exactly the conditions in
|
||||
* plan-tune/SKILL.md.tmpl §"Implicit gates run first" → "Consent gate."
|
||||
*/
|
||||
function evaluateConsentGate(): boolean {
|
||||
const qt = runBin(BIN_CONFIG, ['get', 'question_tuning']).stdout.trim() || 'false';
|
||||
const markerPath = path.join(stateRoot, '.question-tuning-prompted');
|
||||
return qt === 'false' && !fs.existsSync(markerPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simulate the setup-gate check. Mirrors plan-tune/SKILL.md.tmpl §"Setup gate."
|
||||
*/
|
||||
function evaluateSetupGate(): boolean {
|
||||
const qt = runBin(BIN_CONFIG, ['get', 'question_tuning']).stdout.trim() || 'false';
|
||||
const profilePath = path.join(stateRoot, 'developer-profile.json');
|
||||
let declaredEmpty = true;
|
||||
if (fs.existsSync(profilePath)) {
|
||||
const profile = JSON.parse(fs.readFileSync(profilePath, 'utf-8'));
|
||||
declaredEmpty = !profile.declared || Object.keys(profile.declared).length === 0;
|
||||
}
|
||||
const markerPath = path.join(stateRoot, '.declared-setup-prompted');
|
||||
return qt === 'true' && declaredEmpty && !fs.existsSync(markerPath);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Test 1: consent gate fires + idempotent on marker write
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
describe('v1.49 consent gate', () => {
|
||||
test('fires when question_tuning=false AND no marker', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
|
||||
expect(evaluateConsentGate()).toBe(true);
|
||||
});
|
||||
|
||||
test('does NOT fire after marker is written (decline path)', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
|
||||
fs.writeFileSync(path.join(stateRoot, '.question-tuning-prompted'), '');
|
||||
expect(evaluateConsentGate()).toBe(false);
|
||||
});
|
||||
|
||||
test('does NOT fire after question_tuning flipped to true (accept path)', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
||||
expect(evaluateConsentGate()).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Test 2: setup gate fires + idempotent on marker write
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
describe('v1.49 setup gate', () => {
|
||||
test('fires when question_tuning=true AND declared empty AND no marker', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
||||
// --read creates a stub profile with empty declared.
|
||||
runBin(BIN_DEV, ['--read']);
|
||||
expect(evaluateSetupGate()).toBe(true);
|
||||
});
|
||||
|
||||
test('does NOT fire after declared populated (post-setup)', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
||||
runBin(BIN_DEV, ['--read']);
|
||||
// Simulate setup completion: populate declared.
|
||||
const profilePath = path.join(stateRoot, 'developer-profile.json');
|
||||
const profile = JSON.parse(fs.readFileSync(profilePath, 'utf-8'));
|
||||
profile.declared = {
|
||||
scope_appetite: 0.85,
|
||||
risk_tolerance: 0.7,
|
||||
detail_preference: 0.5,
|
||||
autonomy: 0.5,
|
||||
architecture_care: 0.85,
|
||||
};
|
||||
fs.writeFileSync(profilePath, JSON.stringify(profile, null, 2));
|
||||
expect(evaluateSetupGate()).toBe(false);
|
||||
});
|
||||
|
||||
test('does NOT fire after marker is written even if declared still empty (bail path)', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
||||
runBin(BIN_DEV, ['--read']);
|
||||
fs.writeFileSync(path.join(stateRoot, '.declared-setup-prompted'), '');
|
||||
expect(evaluateSetupGate()).toBe(false);
|
||||
});
|
||||
|
||||
test('does NOT fire when question_tuning still false (consent comes first)', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
|
||||
runBin(BIN_DEV, ['--read']);
|
||||
expect(evaluateSetupGate()).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Test 3: marker idempotency across re-invocations
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
describe('v1.49 marker idempotency', () => {
|
||||
test('consent gate stays silent across 5 re-invocations after one decline', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
|
||||
fs.writeFileSync(path.join(stateRoot, '.question-tuning-prompted'), '');
|
||||
for (let i = 0; i < 5; i++) {
|
||||
expect(evaluateConsentGate()).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
test('setup gate stays silent across 5 re-invocations after one bail', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
||||
runBin(BIN_DEV, ['--read']);
|
||||
fs.writeFileSync(path.join(stateRoot, '.declared-setup-prompted'), '');
|
||||
for (let i = 0; i < 5; i++) {
|
||||
expect(evaluateSetupGate()).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
test('both markers honored independently', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
||||
runBin(BIN_DEV, ['--read']);
|
||||
// Touch consent marker only; setup gate should still fire.
|
||||
fs.writeFileSync(path.join(stateRoot, '.question-tuning-prompted'), '');
|
||||
expect(evaluateConsentGate()).toBe(false);
|
||||
expect(evaluateSetupGate()).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Test 4: static-template assertion (catches accidental deletion of gate prose)
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
describe('v1.49 gate prose survives in skill template', () => {
|
||||
const tmpl = fs.readFileSync(SKILL_TMPL, 'utf-8');
|
||||
|
||||
test('Consent gate condition is present', () => {
|
||||
expect(tmpl).toMatch(/Consent gate/i);
|
||||
expect(tmpl).toMatch(/question-tuning-prompted/);
|
||||
expect(tmpl).toMatch(/question_tuning.*false/);
|
||||
});
|
||||
|
||||
test('Setup gate condition is present', () => {
|
||||
expect(tmpl).toMatch(/Setup gate/i);
|
||||
expect(tmpl).toMatch(/declared-setup-prompted/);
|
||||
expect(tmpl).toMatch(/declared.*empty/i);
|
||||
});
|
||||
|
||||
test('marker writes documented for both gates', () => {
|
||||
expect(tmpl).toMatch(/touch.*question-tuning-prompted/);
|
||||
expect(tmpl).toMatch(/touch.*declared-setup-prompted/);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,285 @@
|
||||
/**
|
||||
* PostToolUse hook (plan-tune cathedral T5) — unit tests.
|
||||
*
|
||||
* Feeds the hook synthetic Claude Code hook payloads via stdin and asserts
|
||||
* the resulting question-log.jsonl reflects the right schema. Covers:
|
||||
* - Marker-first question_id (D18 progressive markers)
|
||||
* - Hash fallback when no marker
|
||||
* - source=hook tagging
|
||||
* - source=auq-other when free_text present
|
||||
* - Dedup on (source, tool_use_id) composite (D3)
|
||||
* - Hook exits 0 even on malformed input (never blocks user session)
|
||||
* - mcp__*__AskUserQuestion matcher acceptance
|
||||
* - "(recommended)" label parse → recommended field populated
|
||||
* - Refuse-on-ambiguous: two (recommended) labels → recommended omitted
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const HOOK = path.join(ROOT, 'hosts', 'claude', 'hooks', 'question-log-hook');
|
||||
|
||||
let stateRoot: string;
|
||||
|
||||
beforeEach(() => {
|
||||
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-hooklog-'));
|
||||
// Pre-create slug-resolved project dir so the bin's gstack-slug doesn't
|
||||
// recompute every time.
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(stateRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function runHook(stdin: object): { stdout: string; stderr: string; status: number } {
|
||||
const env: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v !== undefined) env[k] = v;
|
||||
}
|
||||
env.GSTACK_STATE_ROOT = stateRoot;
|
||||
delete env.GSTACK_HOME;
|
||||
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
|
||||
const res = spawnSync(HOOK, [], {
|
||||
env,
|
||||
input: JSON.stringify(stdin),
|
||||
encoding: 'utf-8',
|
||||
cwd: ROOT,
|
||||
});
|
||||
return {
|
||||
stdout: res.stdout ?? '',
|
||||
stderr: res.stderr ?? '',
|
||||
status: res.status ?? -1,
|
||||
};
|
||||
}
|
||||
|
||||
function readLog(): Array<Record<string, unknown>> {
|
||||
const projectDirs = fs.existsSync(path.join(stateRoot, 'projects'))
|
||||
? fs.readdirSync(path.join(stateRoot, 'projects'))
|
||||
: [];
|
||||
const all: Array<Record<string, unknown>> = [];
|
||||
for (const d of projectDirs) {
|
||||
const f = path.join(stateRoot, 'projects', d, 'question-log.jsonl');
|
||||
if (!fs.existsSync(f)) continue;
|
||||
const lines = fs.readFileSync(f, 'utf-8').trim().split('\n').filter(Boolean);
|
||||
for (const l of lines) {
|
||||
try {
|
||||
all.push(JSON.parse(l));
|
||||
} catch {
|
||||
// skip malformed
|
||||
}
|
||||
}
|
||||
}
|
||||
return all;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Native AskUserQuestion capture
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('PostToolUse hook (native AskUserQuestion)', () => {
|
||||
test('captures one event per question with source=hook and tool_use_id', () => {
|
||||
const r = runHook({
|
||||
session_id: 'sess1',
|
||||
hook_event_name: 'PostToolUse',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-1',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: 'D1 — Test capture\nRecommendation: A',
|
||||
options: ['A) Accept (recommended)', 'B) Reject'],
|
||||
multiSelect: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
tool_response: {
|
||||
answers: [{ option_label: 'A) Accept (recommended)' }],
|
||||
},
|
||||
cwd: ROOT,
|
||||
});
|
||||
expect(r.status).toBe(0);
|
||||
const events = readLog();
|
||||
expect(events.length).toBe(1);
|
||||
expect(events[0].source).toBe('hook');
|
||||
expect(events[0].tool_use_id).toBe('tu-1');
|
||||
expect(events[0].session_id).toBe('sess1');
|
||||
expect(typeof events[0].question_id).toBe('string');
|
||||
expect((events[0].question_id as string).startsWith('hook-')).toBe(true);
|
||||
expect(events[0].user_choice).toContain('Accept');
|
||||
// Recommended parsed from (recommended) label
|
||||
expect(events[0].recommended).toContain('Accept');
|
||||
});
|
||||
|
||||
test('marker-first question_id when <gstack-qid:foo> present', () => {
|
||||
runHook({
|
||||
session_id: 'sess2',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-2',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: 'D2 — Marker test <gstack-qid:ship-test-failure-triage>\nRecommendation: A',
|
||||
options: ['A) Fix now (recommended)', 'B) Investigate', 'C) Ack and ship'],
|
||||
},
|
||||
],
|
||||
},
|
||||
tool_response: { answers: [{ option_label: 'A) Fix now (recommended)' }] },
|
||||
cwd: ROOT,
|
||||
});
|
||||
const events = readLog();
|
||||
expect(events.length).toBe(1);
|
||||
expect(events[0].question_id).toBe('ship-test-failure-triage');
|
||||
// Marker stripped from summary
|
||||
expect((events[0].question_summary as string).includes('<gstack-qid:')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// MCP AskUserQuestion variant (Conductor)
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('PostToolUse hook (mcp__*__AskUserQuestion variant)', () => {
|
||||
test('accepts mcp__conductor__AskUserQuestion tool_name', () => {
|
||||
const r = runHook({
|
||||
session_id: 'sess3',
|
||||
tool_name: 'mcp__conductor__AskUserQuestion',
|
||||
tool_use_id: 'tu-3',
|
||||
tool_input: {
|
||||
questions: [{ question: 'Test', options: ['A', 'B'] }],
|
||||
},
|
||||
tool_response: { answers: [{ option_label: 'A' }] },
|
||||
cwd: ROOT,
|
||||
});
|
||||
expect(r.status).toBe(0);
|
||||
expect(readLog().length).toBe(1);
|
||||
});
|
||||
|
||||
test('ignores unrelated tool_name (defensive)', () => {
|
||||
const r = runHook({
|
||||
session_id: 'sess4',
|
||||
tool_name: 'Bash',
|
||||
tool_use_id: 'tu-4',
|
||||
tool_input: {},
|
||||
cwd: ROOT,
|
||||
});
|
||||
expect(r.status).toBe(0);
|
||||
expect(readLog().length).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Free-text capture (Layer 8 dream cycle)
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('PostToolUse hook (free-text "Other" responses)', () => {
|
||||
test('source=auq-other and free_text populated when user types free text', () => {
|
||||
runHook({
|
||||
session_id: 'sess5',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-5',
|
||||
tool_input: {
|
||||
questions: [{ question: 'D5 — Other test', options: ['A', 'B'] }],
|
||||
},
|
||||
tool_response: {
|
||||
answers: [
|
||||
{
|
||||
option_label: 'Other',
|
||||
free_text: 'I always include tests with new features',
|
||||
},
|
||||
],
|
||||
},
|
||||
cwd: ROOT,
|
||||
});
|
||||
const events = readLog();
|
||||
expect(events.length).toBe(1);
|
||||
expect(events[0].source).toBe('auq-other');
|
||||
expect(events[0].free_text).toContain('always include tests');
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Dedup
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('PostToolUse hook (dedup on source + tool_use_id)', () => {
|
||||
test('second fire with same (source, tool_use_id) is dropped', () => {
|
||||
const payload = {
|
||||
session_id: 'sess6',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-6',
|
||||
tool_input: { questions: [{ question: 'Dedup test', options: ['A'] }] },
|
||||
tool_response: { answers: [{ option_label: 'A' }] },
|
||||
cwd: ROOT,
|
||||
};
|
||||
runHook(payload);
|
||||
runHook(payload);
|
||||
expect(readLog().length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Refuse-on-ambiguous (D2 safety)
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('PostToolUse hook (recommended parser safety)', () => {
|
||||
test('two (recommended) labels → recommended field omitted', () => {
|
||||
runHook({
|
||||
session_id: 'sess7',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-7',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: 'Ambiguous test',
|
||||
options: ['A) Foo (recommended)', 'B) Bar (recommended)'],
|
||||
},
|
||||
],
|
||||
},
|
||||
tool_response: { answers: [{ option_label: 'A) Foo (recommended)' }] },
|
||||
cwd: ROOT,
|
||||
});
|
||||
const events = readLog();
|
||||
expect(events.length).toBe(1);
|
||||
expect(events[0].recommended).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Crash safety
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('PostToolUse hook (crash safety)', () => {
|
||||
test('exits 0 on empty stdin', () => {
|
||||
const env: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v !== undefined) env[k] = v;
|
||||
}
|
||||
env.GSTACK_STATE_ROOT = stateRoot;
|
||||
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
|
||||
const res = spawnSync(HOOK, [], { env, input: '', encoding: 'utf-8' });
|
||||
expect(res.status).toBe(0);
|
||||
});
|
||||
|
||||
test('exits 0 on malformed JSON', () => {
|
||||
const env: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v !== undefined) env[k] = v;
|
||||
}
|
||||
env.GSTACK_STATE_ROOT = stateRoot;
|
||||
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
|
||||
const res = spawnSync(HOOK, [], {
|
||||
env,
|
||||
input: 'not json',
|
||||
encoding: 'utf-8',
|
||||
});
|
||||
expect(res.status).toBe(0);
|
||||
// Error logged to hook-errors.log
|
||||
const errLog = path.join(stateRoot, 'hook-errors.log');
|
||||
expect(fs.existsSync(errLog)).toBe(true);
|
||||
expect(fs.readFileSync(errLog, 'utf-8')).toContain('stdin parse failed');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,385 @@
|
||||
/**
|
||||
* PreToolUse enforcement hook (plan-tune cathedral T6) — unit tests.
|
||||
*
|
||||
* Covers:
|
||||
* - never-ask + marker + two-way + clean recommendation → deny+reason
|
||||
* - never-ask + no marker → defer (D18 marker gate)
|
||||
* - never-ask + one-way → defer (safety override)
|
||||
* - never-ask + ambiguous recommendation → defer (D2 refuse-on-ambiguous)
|
||||
* - always-ask → defer
|
||||
* - no preference → defer
|
||||
* - project preference wins over global (D8 precedence)
|
||||
* - global preference applies when no project preference set
|
||||
* - mcp__*__AskUserQuestion matcher accepted
|
||||
* - empty stdin → defer (crash safety)
|
||||
* - auto-decided event logged via gstack-question-log (PostToolUse won't fire)
|
||||
* - auto-decided marker written to ~/.gstack/sessions/<id>/.auto-decided-<tool_use_id>
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const HOOK = path.join(ROOT, 'hosts', 'claude', 'hooks', 'question-preference-hook');
|
||||
|
||||
let stateRoot: string;
|
||||
let cwdSlug: string;
|
||||
|
||||
let fixtureCwd: string;
|
||||
|
||||
beforeEach(() => {
|
||||
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-prefhook-'));
|
||||
cwdSlug = 'fixture-slug';
|
||||
fs.mkdirSync(path.join(stateRoot, 'projects', cwdSlug), { recursive: true });
|
||||
// Real directory that the hook can chdir() into. gstack-slug derives the
|
||||
// slug from the basename of this cwd (no .git => basename fallback path).
|
||||
fixtureCwd = path.join(stateRoot, cwdSlug);
|
||||
fs.mkdirSync(fixtureCwd, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(stateRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function writeProjectPref(questionId: string, preference: string): void {
|
||||
const f = path.join(stateRoot, 'projects', cwdSlug, 'question-preferences.json');
|
||||
let prefs: Record<string, string> = {};
|
||||
if (fs.existsSync(f)) prefs = JSON.parse(fs.readFileSync(f, 'utf-8'));
|
||||
prefs[questionId] = preference;
|
||||
fs.writeFileSync(f, JSON.stringify(prefs, null, 2));
|
||||
}
|
||||
|
||||
function writeGlobalPref(questionId: string, preference: string): void {
|
||||
const f = path.join(stateRoot, 'global-question-preferences.json');
|
||||
let prefs: Record<string, string> = {};
|
||||
if (fs.existsSync(f)) prefs = JSON.parse(fs.readFileSync(f, 'utf-8'));
|
||||
prefs[questionId] = preference;
|
||||
fs.writeFileSync(f, JSON.stringify(prefs, null, 2));
|
||||
}
|
||||
|
||||
function runHook(stdin: object, cwd?: string): {
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
status: number;
|
||||
parsed: any;
|
||||
} {
|
||||
const env: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v !== undefined) env[k] = v;
|
||||
}
|
||||
env.GSTACK_STATE_ROOT = stateRoot;
|
||||
delete env.GSTACK_HOME;
|
||||
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
|
||||
const res = spawnSync(HOOK, [], {
|
||||
env,
|
||||
input: JSON.stringify({ ...stdin, cwd: cwd || fixtureCwd }),
|
||||
encoding: 'utf-8',
|
||||
cwd: ROOT,
|
||||
});
|
||||
let parsed: any = null;
|
||||
try { parsed = JSON.parse(res.stdout || '{}'); } catch {}
|
||||
return {
|
||||
stdout: res.stdout ?? '',
|
||||
stderr: res.stderr ?? '',
|
||||
status: res.status ?? -1,
|
||||
parsed,
|
||||
};
|
||||
}
|
||||
|
||||
function autoDecidedEvents(): Array<Record<string, unknown>> {
|
||||
const f = path.join(stateRoot, 'projects', cwdSlug, 'question-log.jsonl');
|
||||
if (!fs.existsSync(f)) return [];
|
||||
return fs
|
||||
.readFileSync(f, 'utf-8')
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(Boolean)
|
||||
.map((l) => JSON.parse(l))
|
||||
.filter((e) => e.source === 'auto-decided');
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Defer paths
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('defers (no enforcement)', () => {
|
||||
test('no preference set → defer', () => {
|
||||
const r = runHook({
|
||||
session_id: 's1',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-1',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{ question: '<gstack-qid:test-q> Need approval?', options: ['A) Yes (recommended)', 'B) No'] },
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
});
|
||||
|
||||
test('marker missing → defer (D18)', () => {
|
||||
writeProjectPref('test-q', 'never-ask');
|
||||
const r = runHook({
|
||||
session_id: 's2',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-2',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{ question: 'No marker here', options: ['A) Yes (recommended)', 'B) No'] },
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
});
|
||||
|
||||
test('always-ask preference → defer', () => {
|
||||
writeProjectPref('test-q', 'always-ask');
|
||||
const r = runHook({
|
||||
session_id: 's3',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-3',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{ question: '<gstack-qid:test-q> Yes?', options: ['A) Yes (recommended)', 'B) No'] },
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
});
|
||||
|
||||
test('empty stdin → defer (crash safety)', () => {
|
||||
const env: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v !== undefined) env[k] = v;
|
||||
}
|
||||
env.GSTACK_STATE_ROOT = stateRoot;
|
||||
const res = spawnSync(HOOK, [], { env, input: '', encoding: 'utf-8' });
|
||||
expect(res.status).toBe(0);
|
||||
const parsed = JSON.parse(res.stdout || '{}');
|
||||
expect(parsed.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
});
|
||||
|
||||
test('non-AUQ tool_name → defer (defensive)', () => {
|
||||
writeProjectPref('test-q', 'never-ask');
|
||||
const r = runHook({ session_id: 's4', tool_name: 'Bash', tool_use_id: 'tu-4', tool_input: {} });
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Enforcement paths (deny+reason)
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('enforces never-ask preferences', () => {
|
||||
test('marker + never-ask + two-way + clean recommendation → deny', () => {
|
||||
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
|
||||
const r = runHook({
|
||||
session_id: 's5',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-5',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question:
|
||||
'<gstack-qid:ship-pre-landing-review-fix> Pre-landing review flagged issue.',
|
||||
options: ['A) Fix now (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('deny');
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecisionReason).toContain('plan-tune auto-decide');
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecisionReason).toContain('Fix now');
|
||||
});
|
||||
|
||||
test('one-way door → defer even with never-ask (safety override)', () => {
|
||||
writeProjectPref('ship-test-failure-triage', 'never-ask');
|
||||
const r = runHook({
|
||||
session_id: 's6',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-6',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-test-failure-triage> Tests failed.',
|
||||
options: ['A) Fix now (recommended)', 'B) Investigate', 'C) Ack and ship'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
});
|
||||
|
||||
test('ambiguous recommendation (two labels) → defer (D2 refuse-on-ambiguous)', () => {
|
||||
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
|
||||
const r = runHook({
|
||||
session_id: 's7',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-7',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-pre-landing-review-fix> Ambiguous',
|
||||
options: ['A) Fix now (recommended)', 'B) Skip (recommended)'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
});
|
||||
|
||||
test('no recommendation marker AND no prose match → defer', () => {
|
||||
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
|
||||
const r = runHook({
|
||||
session_id: 's8',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-8',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-pre-landing-review-fix> No rec',
|
||||
options: ['A) Foo', 'B) Bar'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Precedence (D8)
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('precedence: project wins over global (D8)', () => {
|
||||
test('project never-ask + global always-ask → enforce never-ask', () => {
|
||||
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
|
||||
writeGlobalPref('ship-pre-landing-review-fix', 'always-ask');
|
||||
const r = runHook({
|
||||
session_id: 's9',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-9',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-pre-landing-review-fix> P?',
|
||||
options: ['A) Fix (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('deny');
|
||||
});
|
||||
|
||||
test('only global never-ask → enforce (fallback path)', () => {
|
||||
writeGlobalPref('ship-pre-landing-review-fix', 'never-ask');
|
||||
const r = runHook({
|
||||
session_id: 's10',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-10',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-pre-landing-review-fix> P?',
|
||||
options: ['A) Fix (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('deny');
|
||||
});
|
||||
|
||||
test('project always-ask + global never-ask → defer (project wins)', () => {
|
||||
writeProjectPref('ship-pre-landing-review-fix', 'always-ask');
|
||||
writeGlobalPref('ship-pre-landing-review-fix', 'never-ask');
|
||||
const r = runHook({
|
||||
session_id: 's11',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-11',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-pre-landing-review-fix> P?',
|
||||
options: ['A) Fix (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// MCP matcher acceptance
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('MCP variant', () => {
|
||||
test('mcp__conductor__AskUserQuestion accepted and enforced', () => {
|
||||
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
|
||||
const r = runHook({
|
||||
session_id: 's12',
|
||||
tool_name: 'mcp__conductor__AskUserQuestion',
|
||||
tool_use_id: 'tu-12',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-pre-landing-review-fix> P?',
|
||||
options: ['A) Fix (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('deny');
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Auto-decided event logging (since PostToolUse never fires on deny)
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('auto-decided event tagging', () => {
|
||||
test('logs source=auto-decided event when enforcing', () => {
|
||||
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
|
||||
runHook({
|
||||
session_id: 's13',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-13',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-pre-landing-review-fix> P?',
|
||||
options: ['A) Fix (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
}, fixtureCwd);
|
||||
const events = autoDecidedEvents();
|
||||
expect(events.length).toBe(1);
|
||||
expect(events[0].question_id).toBe('ship-pre-landing-review-fix');
|
||||
expect(events[0].user_choice).toContain('Fix');
|
||||
expect(events[0].tool_use_id).toBe('tu-13');
|
||||
});
|
||||
|
||||
test('writes .auto-decided-<tool_use_id> marker for PostToolUse coordination', () => {
|
||||
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
|
||||
runHook({
|
||||
session_id: 's14',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-14',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-pre-landing-review-fix> P?',
|
||||
options: ['A) Fix (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
const markerPath = path.join(stateRoot, 'sessions', 's14', '.auto-decided-tu-14');
|
||||
expect(fs.existsSync(markerPath)).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -41,20 +41,24 @@ import { logBudgetOverride } from './helpers/budget-override';
|
||||
* v1.45.0.0 T5 — hard eval cost cap.
|
||||
*
|
||||
* Per-tier defaults (override via env):
|
||||
* EVALS_BUDGET_HARD_CAP_GATE default $25/run
|
||||
* EVALS_BUDGET_HARD_CAP_PERIODIC default $70/run
|
||||
* EVALS_BUDGET_HARD_CAP umbrella cap if a tier-specific isn't set; default $30
|
||||
* EVALS_BUDGET_HARD_CAP_GATE default $200/run
|
||||
* EVALS_BUDGET_HARD_CAP_PERIODIC default $500/run
|
||||
* EVALS_BUDGET_HARD_CAP umbrella cap if a tier-specific isn't set; default $300
|
||||
* EVALS_BUDGET_OVERRIDE_REASON if set, override fires AND audit-logs to
|
||||
* ~/.gstack/analytics/spend-overrides.jsonl
|
||||
*
|
||||
* Caps are dollars-per-run, not dollars-per-test. A test that legitimately
|
||||
* gets more expensive should bake into the baseline; a runaway eval (infinite
|
||||
* retry, model price change) gets stopped here.
|
||||
* Caps are dollars-per-run, not dollars-per-test. The cap exists to catch
|
||||
* runaway evals (infinite retry, model price change, prompt-blowup bug),
|
||||
* NOT to gate legitimate scope growth. Set high enough that real growth
|
||||
* never trips it — only obvious-bug territory does. Adjusted v1.52.0.0
|
||||
* (cathedral cap audit): $25 → $200 gate, $70 → $500 periodic. Prior
|
||||
* defaults tripped on normal-scope expansion; new ceilings are 8× the
|
||||
* historical worst-case eval run.
|
||||
*/
|
||||
const DEFAULT_HARD_CAP_USD = Number(process.env.EVALS_BUDGET_HARD_CAP) || 30;
|
||||
const DEFAULT_HARD_CAP_USD = Number(process.env.EVALS_BUDGET_HARD_CAP) || 300;
|
||||
const TIER_CAPS: Record<'e2e' | 'llm-judge', number> = {
|
||||
e2e: Number(process.env.EVALS_BUDGET_HARD_CAP_GATE) || DEFAULT_HARD_CAP_USD,
|
||||
'llm-judge': Number(process.env.EVALS_BUDGET_HARD_CAP_PERIODIC) || Math.max(70, DEFAULT_HARD_CAP_USD),
|
||||
e2e: Number(process.env.EVALS_BUDGET_HARD_CAP_GATE) || Math.min(200, DEFAULT_HARD_CAP_USD),
|
||||
'llm-judge': Number(process.env.EVALS_BUDGET_HARD_CAP_PERIODIC) || Math.max(500, DEFAULT_HARD_CAP_USD),
|
||||
};
|
||||
|
||||
function currentGitBranch(): string {
|
||||
|
||||
@@ -0,0 +1,458 @@
|
||||
/**
|
||||
* /plan-tune cathedral E2E (T16) — 5 scenarios, all gate tier per D12.
|
||||
*
|
||||
* Each scenario verifies that the cathedral's substrate works end-to-end
|
||||
* against a real `claude -p` invocation. Unit tests in test/{question-log-hook,
|
||||
* question-preference-hook, declared-annotation, distill-*}.test.ts cover
|
||||
* deterministic plumbing; this file proves the agent obeys the hook
|
||||
* contracts in a live session.
|
||||
*
|
||||
* Touchfile registration in test/helpers/touchfiles.ts:
|
||||
* - plan-tune-hook-capture
|
||||
* - plan-tune-enforcement
|
||||
* - plan-tune-annotation
|
||||
* - plan-tune-codex-import
|
||||
* - plan-tune-dream-cycle
|
||||
*
|
||||
* Each scenario uses GSTACK_STATE_ROOT to isolate from the user's real
|
||||
* ~/.gstack (per cathedral T1 + Codex D16 fix). Cost budget ~$3-4/scenario.
|
||||
*/
|
||||
|
||||
import { beforeAll, afterAll, expect } from 'bun:test';
|
||||
import {
|
||||
ROOT,
|
||||
describeIfSelected,
|
||||
testConcurrentIfSelected,
|
||||
copyDirSync,
|
||||
createEvalCollector,
|
||||
finalizeEvalCollector,
|
||||
} from './helpers/e2e-helpers';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const collector = createEvalCollector('e2e-plan-tune-cathedral');
|
||||
|
||||
afterAll(() => {
|
||||
finalizeEvalCollector(collector);
|
||||
});
|
||||
|
||||
/** Scaffold a fixture project with the bins + scripts the cathedral needs. */
|
||||
function scaffoldFixture(prefix: string): { workDir: string; stateRoot: string; slug: string } {
|
||||
const workDir = fs.mkdtempSync(path.join(os.tmpdir(), prefix));
|
||||
const stateRoot = path.join(workDir, '.gstack-state');
|
||||
fs.mkdirSync(stateRoot, { recursive: true });
|
||||
|
||||
// git init so gstack-slug resolves a deterministic slug.
|
||||
spawnSync('git', ['init', '-b', 'main'], { cwd: workDir, stdio: 'pipe' });
|
||||
spawnSync('git', ['config', 'user.email', 't@t.com'], { cwd: workDir, stdio: 'pipe' });
|
||||
spawnSync('git', ['config', 'user.name', 'T'], { cwd: workDir, stdio: 'pipe' });
|
||||
fs.writeFileSync(path.join(workDir, 'README.md'), '# cathedral fixture\n');
|
||||
spawnSync('git', ['add', '.'], { cwd: workDir, stdio: 'pipe' });
|
||||
spawnSync('git', ['commit', '-m', 'init'], { cwd: workDir, stdio: 'pipe' });
|
||||
|
||||
// Copy bins.
|
||||
const binDir = path.join(workDir, 'bin');
|
||||
fs.mkdirSync(binDir, { recursive: true });
|
||||
for (const script of [
|
||||
'gstack-slug',
|
||||
'gstack-config',
|
||||
'gstack-paths',
|
||||
'gstack-question-log',
|
||||
'gstack-question-preference',
|
||||
'gstack-developer-profile',
|
||||
'gstack-codex-session-import',
|
||||
'gstack-distill-free-text',
|
||||
'gstack-distill-apply',
|
||||
]) {
|
||||
const src = path.join(ROOT, 'bin', script);
|
||||
if (fs.existsSync(src)) {
|
||||
fs.copyFileSync(src, path.join(binDir, script));
|
||||
fs.chmodSync(path.join(binDir, script), 0o755);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy scripts that the bins import.
|
||||
const scriptsDir = path.join(workDir, 'scripts');
|
||||
fs.mkdirSync(scriptsDir, { recursive: true });
|
||||
for (const f of [
|
||||
'question-registry.ts',
|
||||
'psychographic-signals.ts',
|
||||
'archetypes.ts',
|
||||
'one-way-doors.ts',
|
||||
'declared-annotation.ts',
|
||||
]) {
|
||||
const src = path.join(ROOT, 'scripts', f);
|
||||
if (fs.existsSync(src)) fs.copyFileSync(src, path.join(scriptsDir, f));
|
||||
}
|
||||
|
||||
// Copy hooks dir.
|
||||
copyDirSync(path.join(ROOT, 'hosts', 'claude', 'hooks'), path.join(workDir, 'hosts', 'claude', 'hooks'));
|
||||
|
||||
const slug = path.basename(workDir).replace(/[^a-zA-Z0-9._-]/g, '');
|
||||
return { workDir, stateRoot, slug };
|
||||
}
|
||||
|
||||
function cleanupFixture(workDir: string): void {
|
||||
try {
|
||||
fs.rmSync(workDir, { recursive: true, force: true });
|
||||
} catch {
|
||||
// best-effort
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scenario 1: Hook capture — PostToolUse hook writes to question-log.jsonl
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describeIfSelected('PlanTune cathedral E2E: hook capture', ['plan-tune-hook-capture'], () => {
|
||||
let fixture: ReturnType<typeof scaffoldFixture>;
|
||||
|
||||
beforeAll(() => {
|
||||
fixture = scaffoldFixture('cathedral-cap-');
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
cleanupFixture(fixture.workDir);
|
||||
});
|
||||
|
||||
testConcurrentIfSelected('hook directly invoked → log fills', async () => {
|
||||
// Direct hook invocation simulates Claude Code's PostToolUse delivery.
|
||||
// E2E verifies the hook + bin chain works against real bins on disk
|
||||
// (the unit test exercises this with mocks).
|
||||
const hookPath = path.join(fixture.workDir, 'hosts', 'claude', 'hooks', 'question-log-hook');
|
||||
const payload = {
|
||||
session_id: 'cathedral-e2e-cap',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-cap-1',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question:
|
||||
'D1 — Cathedral E2E capture <gstack-qid:ship-test-failure-triage>\nRecommendation: A',
|
||||
options: ['A) Fix now (recommended)', 'B) Investigate'],
|
||||
},
|
||||
],
|
||||
},
|
||||
tool_response: { answers: [{ option_label: 'A) Fix now (recommended)' }] },
|
||||
cwd: fixture.workDir,
|
||||
};
|
||||
const res = spawnSync(hookPath, [], {
|
||||
env: {
|
||||
...process.env,
|
||||
GSTACK_STATE_ROOT: fixture.stateRoot,
|
||||
GSTACK_QUESTION_LOG_NO_DERIVE: '1',
|
||||
},
|
||||
input: JSON.stringify(payload),
|
||||
encoding: 'utf-8',
|
||||
});
|
||||
expect(res.status).toBe(0);
|
||||
const logPath = path.join(fixture.stateRoot, 'projects', fixture.slug, 'question-log.jsonl');
|
||||
expect(fs.existsSync(logPath)).toBe(true);
|
||||
const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n');
|
||||
expect(lines.length).toBeGreaterThanOrEqual(1);
|
||||
const evt = JSON.parse(lines[0]);
|
||||
expect(evt.source).toBe('hook');
|
||||
expect(evt.question_id).toBe('ship-test-failure-triage');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scenario 2: Enforcement — never-ask preference + marker + 2-way → deny
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describeIfSelected('PlanTune cathedral E2E: enforcement', ['plan-tune-enforcement'], () => {
|
||||
let fixture: ReturnType<typeof scaffoldFixture>;
|
||||
|
||||
beforeAll(() => {
|
||||
fixture = scaffoldFixture('cathedral-enf-');
|
||||
fs.mkdirSync(path.join(fixture.stateRoot, 'projects', fixture.slug), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(fixture.stateRoot, 'projects', fixture.slug, 'question-preferences.json'),
|
||||
JSON.stringify({ 'ship-changelog-voice-polish': 'never-ask' }),
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
cleanupFixture(fixture.workDir);
|
||||
});
|
||||
|
||||
testConcurrentIfSelected('PreToolUse hook denies + logs auto-decided event', async () => {
|
||||
const hookPath = path.join(
|
||||
fixture.workDir,
|
||||
'hosts',
|
||||
'claude',
|
||||
'hooks',
|
||||
'question-preference-hook',
|
||||
);
|
||||
const payload = {
|
||||
session_id: 'cathedral-e2e-enf',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-enf-1',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question:
|
||||
'<gstack-qid:ship-changelog-voice-polish> Polish CHANGELOG entry?',
|
||||
options: ['A) Accept (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
cwd: fixture.workDir,
|
||||
};
|
||||
const res = spawnSync(hookPath, [], {
|
||||
env: {
|
||||
...process.env,
|
||||
GSTACK_STATE_ROOT: fixture.stateRoot,
|
||||
GSTACK_QUESTION_LOG_NO_DERIVE: '1',
|
||||
},
|
||||
input: JSON.stringify(payload),
|
||||
encoding: 'utf-8',
|
||||
});
|
||||
expect(res.status).toBe(0);
|
||||
const parsed = JSON.parse(res.stdout || '{}');
|
||||
expect(parsed.hookSpecificOutput?.permissionDecision).toBe('deny');
|
||||
expect(parsed.hookSpecificOutput?.permissionDecisionReason).toContain('Accept');
|
||||
|
||||
// Auto-decided event was logged.
|
||||
const logPath = path.join(fixture.stateRoot, 'projects', fixture.slug, 'question-log.jsonl');
|
||||
expect(fs.existsSync(logPath)).toBe(true);
|
||||
const events = fs
|
||||
.readFileSync(logPath, 'utf-8')
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(Boolean)
|
||||
.map((l) => JSON.parse(l));
|
||||
const auto = events.filter((e) => e.source === 'auto-decided');
|
||||
expect(auto.length).toBe(1);
|
||||
expect(auto[0].question_id).toBe('ship-changelog-voice-polish');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scenario 3: Annotation — declared profile injected via additionalContext
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describeIfSelected('PlanTune cathedral E2E: annotation', ['plan-tune-annotation'], () => {
|
||||
let fixture: ReturnType<typeof scaffoldFixture>;
|
||||
|
||||
beforeAll(() => {
|
||||
fixture = scaffoldFixture('cathedral-ann-');
|
||||
// Strong declared profile that should annotate any signal_key=detail-preference question.
|
||||
fs.writeFileSync(
|
||||
path.join(fixture.stateRoot, 'developer-profile.json'),
|
||||
JSON.stringify({ declared: { detail_preference: 0.9 } }),
|
||||
);
|
||||
// Seed a memory nugget for the matching signal_key.
|
||||
fs.writeFileSync(
|
||||
path.join(fixture.stateRoot, 'free-text-memory.json'),
|
||||
JSON.stringify({
|
||||
nuggets: [
|
||||
{
|
||||
nugget: 'User prefers verbose explanations with tradeoffs',
|
||||
applies_to_signal_keys: ['detail-preference'],
|
||||
applied_at: new Date().toISOString(),
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
cleanupFixture(fixture.workDir);
|
||||
});
|
||||
|
||||
testConcurrentIfSelected('PreToolUse hook surfaces memory nugget on defer', async () => {
|
||||
const hookPath = path.join(
|
||||
fixture.workDir,
|
||||
'hosts',
|
||||
'claude',
|
||||
'hooks',
|
||||
'question-preference-hook',
|
||||
);
|
||||
const payload = {
|
||||
session_id: 'cathedral-e2e-ann',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-ann-1',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-todos-reorganize> Reorganize TODOs?',
|
||||
options: ['A) Accept (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
cwd: fixture.workDir,
|
||||
};
|
||||
const res = spawnSync(hookPath, [], {
|
||||
env: {
|
||||
...process.env,
|
||||
GSTACK_STATE_ROOT: fixture.stateRoot,
|
||||
GSTACK_QUESTION_LOG_NO_DERIVE: '1',
|
||||
},
|
||||
input: JSON.stringify(payload),
|
||||
encoding: 'utf-8',
|
||||
});
|
||||
expect(res.status).toBe(0);
|
||||
const parsed = JSON.parse(res.stdout || '{}');
|
||||
expect(parsed.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
expect(parsed.hookSpecificOutput?.additionalContext).toContain('verbose explanations');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scenario 4: Codex import — JSONL session → import bin → log fills
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describeIfSelected('PlanTune cathedral E2E: codex import', ['plan-tune-codex-import'], () => {
|
||||
let fixture: ReturnType<typeof scaffoldFixture>;
|
||||
let sessionFile: string;
|
||||
|
||||
beforeAll(() => {
|
||||
fixture = scaffoldFixture('cathedral-cdx-');
|
||||
sessionFile = path.join(fixture.workDir, 'rollout-cathedral.jsonl');
|
||||
const lines = [
|
||||
JSON.stringify({
|
||||
type: 'session_meta',
|
||||
payload: { id: 'cathedral-sess-1', cwd: fixture.workDir },
|
||||
}),
|
||||
JSON.stringify({
|
||||
timestamp: new Date().toISOString(),
|
||||
type: 'event_msg',
|
||||
payload: {
|
||||
type: 'agent_message',
|
||||
message:
|
||||
'D1 — Cathedral import <gstack-qid:plan-eng-review-scope-reduce>\nRecommendation: A\nA) Reduce (recommended)\nB) Keep',
|
||||
},
|
||||
}),
|
||||
JSON.stringify({
|
||||
timestamp: new Date().toISOString(),
|
||||
type: 'event_msg',
|
||||
payload: { type: 'user_message', message: 'A' },
|
||||
}),
|
||||
];
|
||||
fs.writeFileSync(sessionFile, lines.join('\n') + '\n');
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
cleanupFixture(fixture.workDir);
|
||||
});
|
||||
|
||||
testConcurrentIfSelected('importer extracts events with codex-import-marker source', async () => {
|
||||
const bin = path.join(fixture.workDir, 'bin', 'gstack-codex-session-import');
|
||||
const res = spawnSync(bin, [sessionFile], {
|
||||
env: {
|
||||
...process.env,
|
||||
GSTACK_STATE_ROOT: fixture.stateRoot,
|
||||
GSTACK_QUESTION_LOG_NO_DERIVE: '1',
|
||||
},
|
||||
encoding: 'utf-8',
|
||||
cwd: fixture.workDir,
|
||||
});
|
||||
expect(res.status).toBe(0);
|
||||
expect(res.stdout).toContain('IMPORTED: 1');
|
||||
const logPath = path.join(fixture.stateRoot, 'projects', fixture.slug, 'question-log.jsonl');
|
||||
expect(fs.existsSync(logPath)).toBe(true);
|
||||
const events = fs
|
||||
.readFileSync(logPath, 'utf-8')
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(Boolean)
|
||||
.map((l) => JSON.parse(l));
|
||||
expect(events.length).toBe(1);
|
||||
expect(events[0].source).toBe('codex-import-marker');
|
||||
expect(events[0].question_id).toBe('plan-eng-review-scope-reduce');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scenario 5: Dream cycle round-trip — capture → distill (mocked) → apply →
|
||||
// re-fire → memory injection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describeIfSelected('PlanTune cathedral E2E: dream cycle', ['plan-tune-dream-cycle'], () => {
|
||||
let fixture: ReturnType<typeof scaffoldFixture>;
|
||||
|
||||
beforeAll(() => {
|
||||
fixture = scaffoldFixture('cathedral-dream-');
|
||||
// Seed proposals file directly (the SDK call is exercised by the unit
|
||||
// test; here we verify apply → re-fire round-trip on top of a known
|
||||
// proposal shape).
|
||||
fs.mkdirSync(path.join(fixture.stateRoot, 'projects', fixture.slug), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(fixture.stateRoot, 'projects', fixture.slug, 'distillation-proposals.json'),
|
||||
JSON.stringify({
|
||||
generated_at: new Date().toISOString(),
|
||||
source_event_count: 1,
|
||||
proposals: [
|
||||
{
|
||||
kind: 'memory-nugget',
|
||||
confidence: 0.95,
|
||||
nugget: 'User wants every fix tested before shipping',
|
||||
applies_to_signal_keys: ['test-discipline'],
|
||||
source_quotes: ['always add tests for any fix'],
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
cleanupFixture(fixture.workDir);
|
||||
});
|
||||
|
||||
testConcurrentIfSelected('apply → re-fire → memory injected via additionalContext', async () => {
|
||||
// 1. Apply the proposal via gstack-distill-apply.
|
||||
const applyBin = path.join(fixture.workDir, 'bin', 'gstack-distill-apply');
|
||||
const applyRes = spawnSync(applyBin, ['--proposal', '0'], {
|
||||
env: { ...process.env, GSTACK_STATE_ROOT: fixture.stateRoot },
|
||||
encoding: 'utf-8',
|
||||
cwd: fixture.workDir,
|
||||
});
|
||||
expect(applyRes.status).toBe(0);
|
||||
|
||||
// Memory file should now contain the nugget.
|
||||
const memPath = path.join(fixture.stateRoot, 'free-text-memory.json');
|
||||
expect(fs.existsSync(memPath)).toBe(true);
|
||||
const mem = JSON.parse(fs.readFileSync(memPath, 'utf-8'));
|
||||
expect(mem.nuggets.length).toBe(1);
|
||||
|
||||
// 2. Re-fire a question whose signal_key matches the nugget. PreToolUse
|
||||
// hook should surface the nugget via additionalContext.
|
||||
const hookPath = path.join(
|
||||
fixture.workDir,
|
||||
'hosts',
|
||||
'claude',
|
||||
'hooks',
|
||||
'question-preference-hook',
|
||||
);
|
||||
const payload = {
|
||||
session_id: 'cathedral-e2e-dream',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-dream-1',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question:
|
||||
'<gstack-qid:plan-eng-review-test-gap> Add tests for this gap?',
|
||||
options: ['A) Add (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
cwd: fixture.workDir,
|
||||
};
|
||||
const hookRes = spawnSync(hookPath, [], {
|
||||
env: {
|
||||
...process.env,
|
||||
GSTACK_STATE_ROOT: fixture.stateRoot,
|
||||
GSTACK_QUESTION_LOG_NO_DERIVE: '1',
|
||||
},
|
||||
input: JSON.stringify(payload),
|
||||
encoding: 'utf-8',
|
||||
});
|
||||
expect(hookRes.status).toBe(0);
|
||||
const parsed = JSON.parse(hookRes.stdout || '{}');
|
||||
expect(parsed.hookSpecificOutput?.additionalContext).toContain('User wants every fix tested');
|
||||
});
|
||||
});
|
||||
@@ -37,13 +37,14 @@ import { logBudgetOverride } from './helpers/budget-override';
|
||||
const REPO_ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BASELINE_PATH = path.join(REPO_ROOT, 'test', 'fixtures', 'parity-baseline-v1.47.0.0.json');
|
||||
|
||||
// Default per-skill ratio is 1.05 (5% growth tolerance). T4 catalog trim
|
||||
// MOVES text from frontmatter (always-loaded catalog) to a body section
|
||||
// ("## When to invoke"), so small skills with already-short descriptions
|
||||
// see a tiny body growth from the section header itself (~20 bytes). The
|
||||
// 5% per-skill tolerance accommodates that while still catching real bloat;
|
||||
// the always-loaded catalog cost is enforced separately with a hard ceiling.
|
||||
const DEFAULT_RATIO = 1.05;
|
||||
// Default per-skill ratio is 1.50 (50% growth tolerance). Adjusted v1.52.0.0
|
||||
// (cathedral cap audit) from 1.05 → 1.50: a 5% ratio tripped on legitimate
|
||||
// feature additions (e.g., plan-tune cathedral T13 grew SKILL.md ×1.24
|
||||
// adding load-bearing Dream cycle + Audit unmarked + Recent auto-decisions
|
||||
// surfaces). Real bloat is 2-3×; this catches that while not tripping on
|
||||
// normal feature scope. The always-loaded catalog cost is enforced
|
||||
// separately with a hard ceiling.
|
||||
const DEFAULT_RATIO = 1.50;
|
||||
const RATIO = Number(process.env.GSTACK_SIZE_BUDGET_RATIO) || DEFAULT_RATIO;
|
||||
|
||||
interface Regression {
|
||||
|
||||
Reference in New Issue
Block a user