Merge origin/main (v1.52.1.0) into spec-pii-redaction-guard

Resolve bin/gstack-config (keep both redact_* and brain_* config keys).
Regenerate all SKILL.md from merged templates + resolvers (redact-doc resolver
now coexists with main's brain-aware-planning resolvers). Refresh ship goldens.
Move the redaction taxonomy reference in /cso and /spec to a pointer at
lib/redact-patterns.ts (single source of truth) so neither skill inlines the
full catalog — keeps both under the size budget after the merge.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-05-29 18:08:38 -07:00
145 changed files with 14101 additions and 412 deletions
+164
View File
@@ -0,0 +1,164 @@
/**
* brain-cache roundtrip integration tests (T2a / T19).
*
* Exercises the non-MCP-dependent parts of the cache layer:
* - Path resolution per scope (cross-project vs per-project)
* - Atomic _meta.json write/read
* - TTL staleness detection
* - Invalidate clears last_refresh
* - Schema-version mismatch triggers rebuild attempt (D4 A4)
* - Endpoint switch triggers rebuild attempt
*
* The brain-reachable refresh path (MCP fetch + compress) is tested
* separately in brain-cache-stale-but-usable.test.ts using a mocked
* spawnGbrain. T2a focuses on the cache-state machine.
*
* Uses tmp GSTACK_HOME per-test to avoid polluting the real ~/.gstack/.
* Gate-tier, free, ~50ms.
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import { mkdtempSync, existsSync, writeFileSync, readFileSync, rmSync, mkdirSync, readdirSync } from 'fs';
import { join } from 'path';
import { tmpdir } from 'os';
let TMP_HOME: string;
const ORIGINAL_HOME = process.env.GSTACK_HOME;
beforeEach(() => {
TMP_HOME = mkdtempSync(join(tmpdir(), 'gstack-cache-test-'));
process.env.GSTACK_HOME = TMP_HOME;
// Reload the cache module fresh per test so it picks up the new HOME.
delete require.cache[require.resolve('../bin/gstack-brain-cache')];
});
afterEach(() => {
if (ORIGINAL_HOME) process.env.GSTACK_HOME = ORIGINAL_HOME;
else delete process.env.GSTACK_HOME;
try { rmSync(TMP_HOME, { recursive: true, force: true }); } catch { /* best effort */ }
});
async function importCache(): Promise<typeof import('../bin/gstack-brain-cache')> {
return (await import('../bin/gstack-brain-cache')) as typeof import('../bin/gstack-brain-cache');
}
describe('brain-cache paths', () => {
test('cross-project entity (user-profile) lives in ~/.gstack/brain-cache/', async () => {
const mod = await importCache();
const path = mod.entityPath('user-profile', null);
expect(path).toBe(join(TMP_HOME, 'brain-cache', 'user-profile.md'));
});
test('per-project entity (product) lives in ~/.gstack/projects/<slug>/brain-cache/', async () => {
const mod = await importCache();
const path = mod.entityPath('product', 'helsinki');
expect(path).toBe(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', 'product.md'));
});
test('throws on unknown entity', async () => {
const mod = await importCache();
expect(() => mod.entityPath('not-an-entity', null)).toThrow();
});
test('per-project entity without slug throws', async () => {
const mod = await importCache();
expect(() => mod.entityPath('product', null)).toThrow();
});
});
describe('brain-cache meta lifecycle', () => {
test('cmdMeta on empty cache returns valid fresh meta', async () => {
const mod = await importCache();
const meta = mod.cmdMeta('helsinki');
expect(meta.schema_version).toMatch(/^\d+\.\d+\.\d+$/);
expect(meta.endpoint_hash).toMatch(/^[a-f0-9]{1,8}$|^local$/);
expect(meta.last_refresh).toEqual({});
});
test('cmdInvalidate writes meta even if no prior refresh', async () => {
const mod = await importCache();
mod.cmdInvalidate('product', 'helsinki');
const meta = mod.cmdMeta('helsinki');
// last_refresh remains empty (we just delete an absent key — that's a no-op
// but the meta file is now written to disk).
expect(meta.last_refresh.product).toBeUndefined();
expect(existsSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '_meta.json'))).toBe(true);
});
});
describe('brain-cache endpoint detection', () => {
test('detectEndpointHash returns "local" when no ~/.claude.json gbrain MCP', async () => {
// We don't write ~/.claude.json in the temp env, so this falls through to local.
const mod = await importCache();
// The user's real ~/.claude.json may have an MCP server; in that case the hash
// will be a real sha8. Either way, it's a stable string.
const hash = mod.detectEndpointHash();
expect(typeof hash).toBe('string');
expect(hash.length).toBeGreaterThan(0);
});
});
describe('brain-cache schema mismatch behavior', () => {
test('schema-version mismatch in meta triggers full-rebuild attempt on next get', async () => {
const mod = await importCache();
// Pre-seed meta with a different schema version, and a cache file that's
// recent enough to be "warm" by TTL but stale by schema version.
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
mkdirSync(cacheDir, { recursive: true });
writeFileSync(join(cacheDir, 'product.md'), '# stale-from-old-schema\n');
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
schema_version: '0.0.1',
endpoint_hash: mod.detectEndpointHash(),
last_refresh: { product: Date.now() },
last_attempt: {},
}));
const result = mod.cmdGet('product', 'helsinki');
// Brain is unreachable in this test (no gbrain mock), so refresh fails and
// the file gets deleted by the rebuild step. State should be 'missing' or
// 'stale-fallback' depending on whether the rebuild left a file behind.
expect(['missing', 'cold-refreshed', 'stale-fallback']).toContain(result.state);
});
});
describe('brain-cache state machine', () => {
test('warm: pre-seeded fresh cache returns warm without touching brain', async () => {
const mod = await importCache();
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
mkdirSync(cacheDir, { recursive: true });
const productContent = '# Product: helsinki\n\nA test product.\n';
writeFileSync(join(cacheDir, 'product.md'), productContent);
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
schema_version: '1.0.0', // matches GSTACK_SCHEMA_PACK_VERSION
endpoint_hash: mod.detectEndpointHash(),
last_refresh: { product: Date.now() }, // fresh
last_attempt: {},
}));
const result = mod.cmdGet('product', 'helsinki');
expect(result.state).toBe('warm');
expect(readFileSync(result.path, 'utf-8')).toBe(productContent);
});
test('missing: no cache + no brain returns missing state', async () => {
const mod = await importCache();
const result = mod.cmdGet('brand', 'helsinki');
expect(result.state).toBe('missing');
});
test('stale-fallback: stale cache with unreachable brain returns stale-fallback', async () => {
const mod = await importCache();
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
mkdirSync(cacheDir, { recursive: true });
writeFileSync(join(cacheDir, 'product.md'), '# stale\n');
// Set last_refresh way in the past (> 1d TTL for product)
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
schema_version: '1.0.0',
endpoint_hash: mod.detectEndpointHash(),
last_refresh: { product: 0 }, // epoch start = very stale
last_attempt: {},
}));
const result = mod.cmdGet('product', 'helsinki');
// Brain unreachable → cold refresh fails → stale-but-usable fallback
expect(result.state).toBe('stale-fallback');
});
});
+169
View File
@@ -0,0 +1,169 @@
/**
* Brain cache spec internal-consistency invariants (T14 / D2).
*
* Asserts that scripts/brain-cache-spec.ts is self-consistent:
* - Every skill's subset only references entities that exist.
* - Per-skill budget cap is achievable given per-entity caps.
* - Cross-project entities are clearly distinguished from per-project.
* - Invalidation graph has no dangling skill references.
* - Helper functions throw on unknown names (defensive).
*
* Gate-tier, free, pure import + assertion. Runs in <100ms.
*/
import { describe, test, expect } from 'bun:test';
import {
BRAIN_CACHE_ENTITIES,
SKILL_DIGEST_SUBSETS,
SKILL_PREFLIGHT_BUDGET_BYTES,
AUTOPLAN_PREFLIGHT_BUDGET_BYTES,
SALIENCE_DEFAULT_ALLOWLIST,
SKILL_CALIBRATION_WEIGHTS,
TRANSPORT_DEFAULT_POLICY,
USER_SLUG_RESOLUTION_ORDER,
GSTACK_SCHEMA_PACK_NAME,
GSTACK_SCHEMA_PACK_VERSION,
CACHE_REFRESH_LOCK_TIMEOUT_MS,
SKILL_RUN_RETENTION_DAYS,
getCacheFile,
getSkillSubset,
getSkillBudget,
getInvalidationTargets,
getPreflightSkills,
getMaxSubsetBytes,
} from '../scripts/brain-cache-spec';
describe('brain-cache-spec internal consistency', () => {
test('every skill subset references only known entities', () => {
const entityNames = new Set(Object.keys(BRAIN_CACHE_ENTITIES));
for (const [skill, subset] of Object.entries(SKILL_DIGEST_SUBSETS)) {
for (const name of subset) {
expect(entityNames.has(name)).toBe(true);
}
}
});
test('every skill with a subset has a budget', () => {
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
expect(SKILL_PREFLIGHT_BUDGET_BYTES[skill]).toBeGreaterThan(0);
}
});
test('per-skill budget is achievable given per-entity budgets', () => {
// Per-entity budgets are hard ceilings on each digest's own file size.
// Per-skill budget is enforced by the compressor on the SUM injected into
// the skill's preflight context — the same entity may be sampled (top-N)
// rather than verbatim. So sum may legitimately exceed skill budget; the
// compressor trims at write time. We allow up to 3x as a sanity ceiling
// (caught test/skill-preflight-budget.test.ts enforces the real cap).
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
const maxBytes = getMaxSubsetBytes(skill);
const skillBudget = getSkillBudget(skill);
expect(maxBytes).toBeLessThanOrEqual(skillBudget * 3);
}
});
test('autoplan total budget covers the 4 plan-* skills (excluding office-hours)', () => {
const autoplanSkills = ['plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review'];
const sum = autoplanSkills.reduce((acc, s) => acc + getSkillBudget(s), 0);
expect(sum).toBeLessThanOrEqual(AUTOPLAN_PREFLIGHT_BUDGET_BYTES);
});
test('every entity has a positive TTL and a positive budget', () => {
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
expect(entity.ttl_ms).toBeGreaterThan(0);
expect(entity.budget_bytes).toBeGreaterThan(0);
expect(entity.file).toMatch(/\.md$/);
expect(['cross-project', 'per-project']).toContain(entity.scope);
}
});
test('user-profile is the only cross-project entity', () => {
const crossProject = Object.entries(BRAIN_CACHE_ENTITIES)
.filter(([_, e]) => e.scope === 'cross-project')
.map(([n]) => n);
expect(crossProject).toEqual(['user-profile']);
});
test('salience entity has shortest TTL (changes hourly)', () => {
const ttls = Object.values(BRAIN_CACHE_ENTITIES).map((e) => e.ttl_ms);
expect(BRAIN_CACHE_ENTITIES.salience.ttl_ms).toBe(Math.min(...ttls));
});
test('salience allowlist has sane defaults (no personal/family/therapy)', () => {
const blocked = ['personal/', 'family/', 'therapy/', 'reflection'];
for (const prefix of blocked) {
expect(SALIENCE_DEFAULT_ALLOWLIST.some((p) => p.startsWith(prefix))).toBe(false);
}
// Must contain at least projects/ + gstack/ (work-flow surfaces)
expect(SALIENCE_DEFAULT_ALLOWLIST).toContain('projects/');
expect(SALIENCE_DEFAULT_ALLOWLIST).toContain('gstack/');
});
test('calibration weights are bounded 0-1 and present for all preflight skills', () => {
for (const skill of getPreflightSkills()) {
const weight = SKILL_CALIBRATION_WEIGHTS[skill];
expect(weight).toBeGreaterThan(0);
expect(weight).toBeLessThanOrEqual(1);
}
});
test('transport policy defaults exist for all transport modes', () => {
const required = ['local-pglite', 'local-stdio', 'remote-http-single-tenant', 'remote-http-ambiguous'];
for (const transport of required) {
expect(TRANSPORT_DEFAULT_POLICY[transport]).toBeDefined();
}
// Local transports must default personal (D4 / Phase 1.5 default rule)
expect(TRANSPORT_DEFAULT_POLICY['local-pglite']).toBe('personal');
expect(TRANSPORT_DEFAULT_POLICY['local-stdio']).toBe('personal');
// Ambiguous remote MUST require explicit ask (never silent default)
expect(TRANSPORT_DEFAULT_POLICY['remote-http-ambiguous']).toBe('unset');
});
test('user-slug resolution chain has 4 deterministic fallbacks ending in non-empty', () => {
expect(USER_SLUG_RESOLUTION_ORDER.length).toBe(4);
expect(USER_SLUG_RESOLUTION_ORDER[USER_SLUG_RESOLUTION_ORDER.length - 1]).toBe('anonymous_hostname_sha8');
});
test('schema pack identity is stable strings', () => {
expect(GSTACK_SCHEMA_PACK_NAME).toBe('gstack-core');
expect(GSTACK_SCHEMA_PACK_VERSION).toMatch(/^\d+\.\d+\.\d+$/);
});
test('refresh lock timeout matches /sync-gbrain convention (5 min)', () => {
expect(CACHE_REFRESH_LOCK_TIMEOUT_MS).toBe(5 * 60_000);
});
test('skill-run retention is 90 days per D10 lifecycle policy', () => {
expect(SKILL_RUN_RETENTION_DAYS).toBe(90);
});
test('invalidation graph: every "skill-run-write" target also depends on it', () => {
// recent-decisions invalidates on skill-run-write — verify the contract holds
const targets = getInvalidationTargets('skill-run-write');
expect(targets).toContain('recent-decisions');
});
test('invalidation graph: /plan-ceo-review invalidates product + goals + recent-decisions chain', () => {
const targets = getInvalidationTargets('/plan-ceo-review');
expect(targets).toContain('product');
expect(targets).toContain('goals');
});
test('helpers throw on unknown names (defensive)', () => {
expect(() => getCacheFile('nonsense-entity')).toThrow();
expect(() => getSkillSubset('not-a-skill')).toThrow();
expect(() => getSkillBudget('not-a-skill')).toThrow();
});
test('helpers return correct values for known names', () => {
expect(getCacheFile('product')).toBe('product.md');
expect(getSkillSubset('plan-eng-review')).toEqual(['product', 'recent-decisions']);
expect(getSkillBudget('office-hours')).toBe(5120);
});
test('all 5 preflight skills are real planning-skill names', () => {
const expected = ['office-hours', 'plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review'];
expect(getPreflightSkills().sort()).toEqual(expected.sort());
});
});
+166
View File
@@ -0,0 +1,166 @@
/**
* Brain-aware planning resolver tests (T4 / T19).
*
* Verifies the three resolvers in scripts/resolvers/gbrain.ts:
* - generateBrainPreflight — fires for preflight skills, empty for others
* - generateBrainCacheRefresh — same gating
* - generateBrainWriteBack — same gating; only weighted skills emit
*
* Gate-tier, free, pure import + render.
*/
import { describe, test, expect } from 'bun:test';
import {
generateBrainPreflight,
generateBrainCacheRefresh,
generateBrainWriteBack,
} from '../scripts/resolvers/gbrain';
import { SKILL_DIGEST_SUBSETS } from '../scripts/brain-cache-spec';
import { HOST_PATHS } from '../scripts/resolvers/types';
import type { TemplateContext } from '../scripts/resolvers/types';
function buildCtx(skillName: string): TemplateContext {
return {
skillName,
tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
host: 'claude',
paths: HOST_PATHS.claude,
};
}
describe('generateBrainPreflight', () => {
test('emits content for every registered preflight skill', () => {
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
const out = generateBrainPreflight(buildCtx(skill));
expect(out.length).toBeGreaterThan(0);
expect(out).toContain('## Brain Context');
expect(out).toContain('gstack-brain-cache get');
}
});
test('emits empty string for non-preflight skills (no behavior)', () => {
const nonPlanning = ['ship', 'qa', 'investigate', 'retro', 'design-review'];
for (const skill of nonPlanning) {
expect(generateBrainPreflight(buildCtx(skill))).toBe('');
}
});
test('includes per-skill subset entities (office-hours loads 5 digests)', () => {
const out = generateBrainPreflight(buildCtx('office-hours'));
// office-hours loads: product, goals, user-profile, recent-decisions, salience
expect(out).toContain('product');
expect(out).toContain('goals');
expect(out).toContain('user-profile');
expect(out).toContain('recent-decisions');
expect(out).toContain('salience');
});
test('plan-eng-review loads minimal subset (2 digests)', () => {
const out = generateBrainPreflight(buildCtx('plan-eng-review'));
expect(out).toContain('product');
expect(out).toContain('recent-decisions');
// Should NOT load brand or developer-persona
expect(out).not.toContain('gstack-brain-cache get brand');
expect(out).not.toContain('gstack-brain-cache get developer-persona');
});
test('mentions D9 salience privacy in the prose (transparency)', () => {
const out = generateBrainPreflight(buildCtx('office-hours'));
expect(out.toLowerCase()).toContain('privacy');
expect(out.toLowerCase()).toContain('allowlist');
});
test('user-profile is loaded WITHOUT --project flag (cross-project)', () => {
const out = generateBrainPreflight(buildCtx('office-hours'));
const userProfileLine = out.split('\n').find((l) => l.includes('user-profile')) || '';
// user-profile is cross-project; the get call should NOT have --project
// (the only --project mentions on that line are inside the comment, not in the get call)
const getLine = out.split('\n').find((l) => l.includes('gstack-brain-cache get user-profile')) || '';
expect(getLine).not.toContain('--project');
});
test('per-project entities are loaded WITH --project "$SLUG"', () => {
const out = generateBrainPreflight(buildCtx('plan-eng-review'));
expect(out).toContain('--project "$SLUG"');
});
});
describe('generateBrainCacheRefresh', () => {
test('emits refresh hook for preflight skills', () => {
const out = generateBrainCacheRefresh(buildCtx('plan-ceo-review'));
expect(out).toContain('Background Refresh');
expect(out).toContain('gstack-brain-cache refresh');
});
test('empty for non-preflight skills', () => {
expect(generateBrainCacheRefresh(buildCtx('ship'))).toBe('');
});
test('uses background backgrounding (does not block user)', () => {
const out = generateBrainCacheRefresh(buildCtx('plan-ceo-review'));
// Background refresh fires the cache refresh in a detached process
expect(out).toContain('&');
});
});
describe('generateBrainWriteBack', () => {
test('emits write-back block for all 5 weighted preflight skills', () => {
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
const out = generateBrainWriteBack(buildCtx(skill));
expect(out.length).toBeGreaterThan(0);
expect(out).toContain('Calibration Write-Back');
expect(out).toContain('BRAIN_CALIBRATION_WRITEBACK');
}
});
test('empty for non-preflight skills', () => {
expect(generateBrainWriteBack(buildCtx('ship'))).toBe('');
});
test('includes per-skill calibration weight (E5)', () => {
const ceo = generateBrainWriteBack(buildCtx('plan-ceo-review'));
expect(ceo).toContain('weight: 0.8'); // SKILL_CALIBRATION_WEIGHTS['plan-ceo-review'] = 0.8
const office = generateBrainWriteBack(buildCtx('office-hours'));
expect(office).toContain('weight: 0.9'); // strongest calibration weight
const design = generateBrainWriteBack(buildCtx('plan-design-review'));
expect(design).toContain('weight: 0.5'); // weakest (design predictions are noisy)
});
test('mentions personal trust policy gate (D11 codex tension)', () => {
const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
expect(out.toLowerCase()).toContain('personal');
expect(out).toContain('brain_trust_policy');
});
test('mentions fallback path when takes_add MCP op unavailable (upstream T8)', () => {
const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
expect(out).toContain('put_page');
expect(out).toContain('takes');
});
test('emits invalidation bash for affected cache digests', () => {
const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
// plan-ceo-review invalidates: product, goals, competitive-intel
expect(out).toContain('gstack-brain-cache invalidate');
});
});
describe('resolver registration in index.ts', () => {
test('BRAIN_PREFLIGHT placeholder is registered', async () => {
const { RESOLVERS } = await import('../scripts/resolvers/index');
expect(RESOLVERS.BRAIN_PREFLIGHT).toBeDefined();
expect(typeof RESOLVERS.BRAIN_PREFLIGHT).toBe('function');
});
test('BRAIN_CACHE_REFRESH placeholder is registered', async () => {
const { RESOLVERS } = await import('../scripts/resolvers/index');
expect(RESOLVERS.BRAIN_CACHE_REFRESH).toBeDefined();
});
test('BRAIN_WRITE_BACK placeholder is registered', async () => {
const { RESOLVERS } = await import('../scripts/resolvers/index');
expect(RESOLVERS.BRAIN_WRITE_BACK).toBeDefined();
});
});
+153
View File
@@ -0,0 +1,153 @@
/**
* Concurrent-refresh lockfile dedup (T15 / D3).
*
* When autoplan dispatches 4 planning skills back-to-back and they all hit a
* cold-miss on the same digest, only ONE should actually fetch from the brain;
* the rest dedup via the project-scoped lockfile at
* ~/.gstack/projects/<slug>/brain-cache/.refresh.lock. Stale locks (process
* dead, or older than CACHE_REFRESH_LOCK_TIMEOUT_MS) are taken over.
*
* Gate-tier, free, pure file-IO. Uses tmp GSTACK_HOME.
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import { mkdtempSync, existsSync, writeFileSync, readFileSync, rmSync, mkdirSync, unlinkSync } from 'fs';
import { join } from 'path';
import { tmpdir, hostname } from 'os';
let TMP_HOME: string;
const ORIGINAL_HOME = process.env.GSTACK_HOME;
beforeEach(() => {
TMP_HOME = mkdtempSync(join(tmpdir(), 'gstack-lock-test-'));
process.env.GSTACK_HOME = TMP_HOME;
delete require.cache[require.resolve('../bin/gstack-brain-cache')];
});
afterEach(() => {
if (ORIGINAL_HOME) process.env.GSTACK_HOME = ORIGINAL_HOME;
else delete process.env.GSTACK_HOME;
try { rmSync(TMP_HOME, { recursive: true, force: true }); } catch { /* best effort */ }
});
async function importCache(): Promise<typeof import('../bin/gstack-brain-cache')> {
return (await import('../bin/gstack-brain-cache')) as typeof import('../bin/gstack-brain-cache');
}
describe('concurrent-refresh lockfile dedup', () => {
test('first caller acquires lock; second concurrent caller deduplicates', async () => {
const mod = await importCache();
// Pre-create dirs to avoid Race On First Use.
mkdirSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache'), { recursive: true });
let callbackRan = 0;
// Hold the lock by entering withRefreshLock and stalling inside the callback.
let outerResolve: (() => void) | null = null;
const outer = new Promise<void>((r) => { outerResolve = r; });
const outerCall = (async () => {
const result = mod.withRefreshLock('helsinki', () => {
callbackRan++;
// Block until the test signals release.
const start = Date.now();
while (!outerResolve) { /* spin briefly */ if (Date.now() - start > 100) break; }
return 'first';
});
return result;
})();
// Give outer call a tick to acquire lock.
await new Promise((r) => setTimeout(r, 10));
// Inner call should dedup since the lock file exists with a fresh ts.
// Manually verify by writing a fake lock and checking tryAcquireLock returns dedup.
const lockFile = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '.refresh.lock');
// Outer call already completed since the sync callback returns immediately.
// Stand up an artificial lock to simulate concurrent in-flight refresh.
writeFileSync(lockFile, JSON.stringify({
pid: 999999, // unlikely-to-exist pid on host
host: 'some-other-host',
ts: Date.now(),
}));
const innerResult = mod.withRefreshLock('helsinki', () => 'inner');
expect(innerResult).toBe('dedup');
// Cleanup
try { unlinkSync(lockFile); } catch { /* best effort */ }
await outerCall;
});
test('stale lock (older than timeout) is taken over', async () => {
const mod = await importCache();
mkdirSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache'), { recursive: true });
const lockFile = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '.refresh.lock');
// Lock is 10 minutes old — way past the 5-min timeout.
writeFileSync(lockFile, JSON.stringify({
pid: 999999,
host: 'some-other-host',
ts: Date.now() - 10 * 60_000,
}));
const result = mod.withRefreshLock('helsinki', () => 'took-over');
expect(result).toBe('took-over');
});
test('lock from same host with dead PID is taken over', async () => {
const mod = await importCache();
mkdirSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache'), { recursive: true });
const lockFile = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '.refresh.lock');
// Same host, but PID 999999 which is unlikely to exist.
writeFileSync(lockFile, JSON.stringify({
pid: 999999,
host: hostname(),
ts: Date.now(),
}));
const result = mod.withRefreshLock('helsinki', () => 'took-over-dead-pid');
expect(result).toBe('took-over-dead-pid');
});
test('lock is released after callback runs', async () => {
const mod = await importCache();
mkdirSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache'), { recursive: true });
const lockFile = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '.refresh.lock');
mod.withRefreshLock('helsinki', () => 'done');
expect(existsSync(lockFile)).toBe(false);
});
test('lock is released even when callback throws', async () => {
const mod = await importCache();
mkdirSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache'), { recursive: true });
const lockFile = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '.refresh.lock');
expect(() => {
mod.withRefreshLock('helsinki', () => {
throw new Error('callback failed');
});
}).toThrow();
expect(existsSync(lockFile)).toBe(false);
});
test('corrupt lock file is taken over (defensive)', async () => {
const mod = await importCache();
mkdirSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache'), { recursive: true });
const lockFile = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '.refresh.lock');
writeFileSync(lockFile, 'not valid json {{{');
const result = mod.withRefreshLock('helsinki', () => 'recovered');
expect(result).toBe('recovered');
});
test('cross-project lock uses ~/.gstack/brain-cache/.refresh.lock', async () => {
const mod = await importCache();
mkdirSync(join(TMP_HOME, 'brain-cache'), { recursive: true });
const lockFile = join(TMP_HOME, 'brain-cache', '.refresh.lock');
mod.withRefreshLock(null, () => 'cross-project');
// Lock file was created and then released
expect(existsSync(lockFile)).toBe(false); // released
});
});
+18 -12
View File
@@ -1,7 +1,10 @@
/**
* Cross-skill taxonomy alignment. /cso renders the full generated taxonomy table;
* /spec references it without inlining. Both derive from lib/redact-patterns via
* the shared resolver, so a manual edit to the wrong place is caught here.
* Cross-skill taxonomy alignment. The canonical taxonomy lives in
* lib/redact-patterns.ts (single source of truth). /spec and /cso both reference
* it by pointer rather than inlining the full catalog (size discipline). This
* test guards that the recognizable HIGH-tier prefixes stay present in /cso's
* archaeology prose and that the resolver-generated table stays derived from the
* lib (no drift between the generator and the pattern source).
*/
import { describe, test, expect } from "bun:test";
import * as fs from "fs";
@@ -15,17 +18,20 @@ const CSO = fs.readFileSync(path.join(ROOT, "cso", "SKILL.md"), "utf-8");
const ctx = { skillName: "cso", tmplPath: "", host: "claude" as const, paths: HOST_PATHS["claude"] };
describe("cso/spec taxonomy alignment", () => {
test("cso renders the full generated taxonomy table verbatim", () => {
const table = generateRedactTaxonomyTable(ctx);
// A couple of representative lines from the generated table must appear in /cso.
const line = table.split("\n").find((l) => l.includes("`aws.access_key`"));
expect(line).toBeTruthy();
expect(CSO).toContain(line!);
test("cso archaeology names the recognizable HIGH-tier prefixes", () => {
for (const s of ["AKIA", "ghp_", "sk-ant-", "BEGIN"]) {
expect(CSO).toContain(s);
}
});
test("cso lists every HIGH-tier credential id (the archaeology contract, no drift)", () => {
for (const p of PATTERNS.filter((x) => x.tier === "HIGH")) {
expect(CSO).toContain(`\`${p.id}\``);
test("cso points to lib/redact-patterns.ts as the single source of truth", () => {
expect(CSO).toContain("lib/redact-patterns.ts");
});
test("the generated taxonomy table is derived from lib (every pattern id present)", () => {
const table = generateRedactTaxonomyTable(ctx);
for (const p of PATTERNS) {
expect(table).toContain(`\`${p.id}\``);
}
});
+129
View File
@@ -0,0 +1,129 @@
/**
* Declared annotation helper (plan-tune cathedral T7) — unit tests.
*
* Verifies the helper's contract:
* - Returns null for unknown signal_key.
* - Returns null when the profile doesn't exist or declared is unset.
* - Returns a phrase when declared >= 0.7 (strong high band).
* - Returns a phrase when declared <= 0.3 (strong low band).
* - Returns null when declared is in the middle band (0.3 < x < 0.7).
* - primaryDimensionFor picks the dimension with largest |delta| total.
* - Maps kebab signal_key to underscore Dimension correctly (D2 fix).
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { getDeclaredAnnotation, primaryDimensionFor } from '../scripts/declared-annotation';
let prevStateRoot: string | undefined;
let prevHome: string | undefined;
let stateRoot: string;
beforeEach(() => {
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-annot-'));
prevStateRoot = process.env.GSTACK_STATE_ROOT;
prevHome = process.env.GSTACK_HOME;
process.env.GSTACK_STATE_ROOT = stateRoot;
delete process.env.GSTACK_HOME;
});
afterEach(() => {
if (prevStateRoot !== undefined) process.env.GSTACK_STATE_ROOT = prevStateRoot;
else delete process.env.GSTACK_STATE_ROOT;
if (prevHome !== undefined) process.env.GSTACK_HOME = prevHome;
fs.rmSync(stateRoot, { recursive: true, force: true });
});
function writeProfile(declared: Record<string, number>): void {
const p = path.join(stateRoot, 'developer-profile.json');
fs.writeFileSync(p, JSON.stringify({ declared }, null, 2));
}
// ----------------------------------------------------------------------
// primaryDimensionFor — kebab→underscore mapping
// ----------------------------------------------------------------------
describe('primaryDimensionFor', () => {
test('scope-appetite → scope_appetite (largest |delta| total)', () => {
expect(primaryDimensionFor('scope-appetite')).toBe('scope_appetite');
});
test('architecture-care → architecture_care (top dim by |delta|)', () => {
expect(primaryDimensionFor('architecture-care')).toBe('architecture_care');
});
test('unknown signal_key → null', () => {
expect(primaryDimensionFor('totally-not-a-key')).toBe(null);
});
test('empty/garbage input → null', () => {
expect(primaryDimensionFor('')).toBe(null);
});
});
// ----------------------------------------------------------------------
// getDeclaredAnnotation
// ----------------------------------------------------------------------
describe('getDeclaredAnnotation', () => {
test('returns null when no profile exists', () => {
expect(getDeclaredAnnotation('scope-appetite')).toBe(null);
});
test('returns null when declared unset for the dimension', () => {
writeProfile({});
expect(getDeclaredAnnotation('scope-appetite')).toBe(null);
});
test('returns null when declared is in middle band (0.5)', () => {
writeProfile({ scope_appetite: 0.5 });
expect(getDeclaredAnnotation('scope-appetite')).toBe(null);
});
test('returns high-band phrase when declared >= 0.7', () => {
writeProfile({ scope_appetite: 0.85 });
const annot = getDeclaredAnnotation('scope-appetite');
expect(annot).toBeTruthy();
expect(annot).toContain('boil the ocean');
});
test('returns high-band phrase at the exact 0.7 threshold', () => {
writeProfile({ scope_appetite: 0.7 });
expect(getDeclaredAnnotation('scope-appetite')).toContain('boil the ocean');
});
test('returns low-band phrase when declared <= 0.3', () => {
writeProfile({ scope_appetite: 0.2 });
const annot = getDeclaredAnnotation('scope-appetite');
expect(annot).toBeTruthy();
expect(annot).toContain('ship-small-fast');
});
test('returns low-band phrase at the exact 0.3 threshold', () => {
writeProfile({ scope_appetite: 0.3 });
expect(getDeclaredAnnotation('scope-appetite')).toContain('ship-small-fast');
});
test('returns null for unknown signal_key even when profile populated', () => {
writeProfile({ scope_appetite: 0.85 });
expect(getDeclaredAnnotation('totally-not-a-key')).toBe(null);
});
test('all 5 dimensions render distinct high-band phrases', () => {
// Use the 5 signal_keys known to map to each of the 5 dimensions.
writeProfile({
scope_appetite: 0.9,
risk_tolerance: 0.9,
detail_preference: 0.9,
autonomy: 0.9,
architecture_care: 0.9,
});
const scope = getDeclaredAnnotation('scope-appetite');
const arch = getDeclaredAnnotation('architecture-care');
expect(scope).toContain('boil the ocean');
expect(arch).toContain('design-right');
});
});
+300
View File
@@ -0,0 +1,300 @@
/**
* gstack-distill-apply — Layer 8 proposal application (plan-tune cathedral T11).
*
* Verifies the three apply paths:
* - memory-nugget → appended to ~/.gstack/free-text-memory.json (local
* source-of-truth; gbrain is mirror when configured).
* - preference → routed through gstack-question-preference with
* source=plan-tune (user-origin gate cleared).
* - declared-nudge → atomic update to developer-profile.json declared dim,
* small=0.05, medium=0.10, large=0.15, clamped to [0,1].
* Plus:
* - --list shows proposals with kind, confidence, rationale, quotes.
* - Applied proposals get applied_at + gbrain_published flag.
* - Bad --proposal index errors with non-zero exit.
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { spawnSync } from 'child_process';
const ROOT = path.resolve(import.meta.dir, '..');
const BIN = path.join(ROOT, 'bin', 'gstack-distill-apply');
let stateRoot: string;
let fixtureCwd: string;
let cwdSlug: string;
let proposalFile: string;
beforeEach(() => {
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-apply-'));
cwdSlug = 'apply-fixture';
fixtureCwd = path.join(stateRoot, cwdSlug);
fs.mkdirSync(fixtureCwd, { recursive: true });
fs.mkdirSync(path.join(stateRoot, 'projects', cwdSlug), { recursive: true });
proposalFile = path.join(stateRoot, 'projects', cwdSlug, 'distillation-proposals.json');
});
afterEach(() => {
fs.rmSync(stateRoot, { recursive: true, force: true });
});
function writeProposals(proposals: Array<Record<string, unknown>>): void {
fs.writeFileSync(
proposalFile,
JSON.stringify(
{ generated_at: new Date().toISOString(), source_event_count: 1, proposals },
null,
2,
),
);
}
function run(args: string[]): { stdout: string; stderr: string; status: number } {
const env: Record<string, string> = {};
for (const [k, v] of Object.entries(process.env)) {
if (v !== undefined) env[k] = v;
}
env.GSTACK_STATE_ROOT = stateRoot;
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
delete env.GSTACK_HOME;
const res = spawnSync(BIN, args, { env, encoding: 'utf-8', cwd: fixtureCwd });
return {
stdout: res.stdout ?? '',
stderr: res.stderr ?? '',
status: res.status ?? -1,
};
}
// ----------------------------------------------------------------------
// --list
// ----------------------------------------------------------------------
describe('--list', () => {
test('handles missing proposals file', () => {
const r = run(['--list']);
expect(r.status).toBe(0);
expect(r.stdout).toMatch(/NO_PROPOSALS/);
});
test('renders all 3 kinds + source quotes', () => {
writeProposals([
{
kind: 'preference',
confidence: 0.9,
question_id: 'ship-changelog-voice-polish',
preference: 'never-ask',
rationale: 'user repeatedly skipped this',
source_quotes: ['skip the polish for typo PRs'],
},
{
kind: 'declared-nudge',
confidence: 0.85,
dimension: 'scope_appetite',
direction: 'up',
magnitude: 'medium',
},
{
kind: 'memory-nugget',
confidence: 0.95,
nugget: 'User prefers complete edge cases',
applies_to_signal_keys: ['scope-appetite'],
},
]);
const r = run(['--list']);
expect(r.status).toBe(0);
expect(r.stdout).toContain('preference');
expect(r.stdout).toContain('declared-nudge');
expect(r.stdout).toContain('memory-nugget');
expect(r.stdout).toContain('skip the polish for typo PRs');
expect(r.stdout).toContain('scope-appetite');
});
});
// ----------------------------------------------------------------------
// memory-nugget application
// ----------------------------------------------------------------------
describe('memory-nugget apply', () => {
test('appends to ~/.gstack/free-text-memory.json with full metadata', () => {
writeProposals([
{
kind: 'memory-nugget',
confidence: 0.9,
nugget: 'User prefers verbose explanations with tradeoffs',
applies_to_signal_keys: ['detail-preference'],
source_quotes: ['always explain the tradeoffs'],
},
]);
const r = run(['--proposal', '0', '--gbrain-published', 'true']);
expect(r.status).toBe(0);
expect(r.stdout).toContain('APPLIED: memory-nugget');
const memPath = path.join(stateRoot, 'free-text-memory.json');
const mem = JSON.parse(fs.readFileSync(memPath, 'utf-8'));
expect(mem.nuggets.length).toBe(1);
expect(mem.nuggets[0].nugget).toContain('verbose explanations');
expect(mem.nuggets[0].applies_to_signal_keys).toEqual(['detail-preference']);
expect(mem.nuggets[0].gbrain_published).toBe(true);
expect(mem.nuggets[0].source_quotes).toEqual(['always explain the tradeoffs']);
});
test('appends without clobbering existing nuggets', () => {
fs.writeFileSync(
path.join(stateRoot, 'free-text-memory.json'),
JSON.stringify({ nuggets: [{ nugget: 'pre-existing', applies_to_signal_keys: [] }] }),
);
writeProposals([
{
kind: 'memory-nugget',
confidence: 0.9,
nugget: 'new nugget',
applies_to_signal_keys: [],
},
]);
run(['--proposal', '0']);
const mem = JSON.parse(
fs.readFileSync(path.join(stateRoot, 'free-text-memory.json'), 'utf-8'),
);
expect(mem.nuggets.length).toBe(2);
expect(mem.nuggets[0].nugget).toBe('pre-existing');
expect(mem.nuggets[1].nugget).toBe('new nugget');
});
});
// ----------------------------------------------------------------------
// preference application
// ----------------------------------------------------------------------
describe('preference apply', () => {
test('routes through gstack-question-preference with source=plan-tune', () => {
writeProposals([
{
kind: 'preference',
confidence: 0.9,
question_id: 'ship-changelog-voice-polish',
preference: 'never-ask',
source_quotes: ['skip the polish for typo PRs'],
},
]);
const r = run(['--proposal', '0']);
expect(r.status).toBe(0);
expect(r.stdout).toContain('APPLIED: preference');
const prefPath = path.join(stateRoot, 'projects', cwdSlug, 'question-preferences.json');
const prefs = JSON.parse(fs.readFileSync(prefPath, 'utf-8'));
expect(prefs['ship-changelog-voice-polish']).toBe('never-ask');
});
});
// ----------------------------------------------------------------------
// declared-nudge application
// ----------------------------------------------------------------------
describe('declared-nudge apply', () => {
test('medium up nudge on unset dim → 0.5 + 0.10 = 0.6', () => {
writeProposals([
{
kind: 'declared-nudge',
confidence: 0.9,
dimension: 'scope_appetite',
direction: 'up',
magnitude: 'medium',
},
]);
run(['--proposal', '0']);
const profile = JSON.parse(
fs.readFileSync(path.join(stateRoot, 'developer-profile.json'), 'utf-8'),
);
expect(profile.declared.scope_appetite).toBe(0.6);
});
test('small down nudge on existing value', () => {
fs.writeFileSync(
path.join(stateRoot, 'developer-profile.json'),
JSON.stringify({ declared: { scope_appetite: 0.8 } }),
);
writeProposals([
{
kind: 'declared-nudge',
confidence: 0.9,
dimension: 'scope_appetite',
direction: 'down',
magnitude: 'small',
},
]);
run(['--proposal', '0']);
const profile = JSON.parse(
fs.readFileSync(path.join(stateRoot, 'developer-profile.json'), 'utf-8'),
);
expect(profile.declared.scope_appetite).toBe(0.75);
});
test('clamps to [0, 1]', () => {
fs.writeFileSync(
path.join(stateRoot, 'developer-profile.json'),
JSON.stringify({ declared: { scope_appetite: 0.95 } }),
);
writeProposals([
{
kind: 'declared-nudge',
confidence: 0.9,
dimension: 'scope_appetite',
direction: 'up',
magnitude: 'large',
},
]);
run(['--proposal', '0']);
const profile = JSON.parse(
fs.readFileSync(path.join(stateRoot, 'developer-profile.json'), 'utf-8'),
);
expect(profile.declared.scope_appetite).toBe(1);
});
});
// ----------------------------------------------------------------------
// Proposal marked applied
// ----------------------------------------------------------------------
describe('proposal marked applied', () => {
test('applied_at + gbrain_published written back to proposals.json', () => {
writeProposals([
{
kind: 'memory-nugget',
confidence: 0.9,
nugget: 'something',
applies_to_signal_keys: [],
},
]);
run(['--proposal', '0', '--gbrain-published', 'true']);
const p = JSON.parse(fs.readFileSync(proposalFile, 'utf-8'));
expect(p.proposals[0].applied_at).toBeTruthy();
expect(p.proposals[0].gbrain_published).toBe(true);
});
});
// ----------------------------------------------------------------------
// Error paths
// ----------------------------------------------------------------------
describe('error paths', () => {
test('bad --proposal index exits non-zero', () => {
writeProposals([
{ kind: 'memory-nugget', confidence: 0.9, nugget: 'x', applies_to_signal_keys: [] },
]);
const r = run(['--proposal', '99']);
expect(r.status).not.toBe(0);
expect(r.stderr).toContain('invalid --proposal');
});
test('missing --proposal exits non-zero', () => {
writeProposals([
{ kind: 'memory-nugget', confidence: 0.9, nugget: 'x', applies_to_signal_keys: [] },
]);
const r = run([]);
expect(r.status).not.toBe(0);
expect(r.stderr).toContain('--proposal');
});
});
+205
View File
@@ -0,0 +1,205 @@
/**
* gstack-distill-free-text — Layer 8 dream cycle (plan-tune cathedral T10).
*
* Covers the SDK-free paths: status, dry-run, rate cap, no-event handling.
* The real API call path is exercised by the E2E test in T16; here we
* verify the bin's deterministic plumbing without burning tokens.
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { spawnSync } from 'child_process';
const ROOT = path.resolve(import.meta.dir, '..');
const BIN = path.join(ROOT, 'bin', 'gstack-distill-free-text');
const QLOG_BIN = path.join(ROOT, 'bin', 'gstack-question-log');
let stateRoot: string;
let fixtureCwd: string;
let cwdSlug: string;
beforeEach(() => {
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-dist-'));
cwdSlug = 'distill-fixture';
fixtureCwd = path.join(stateRoot, cwdSlug);
fs.mkdirSync(fixtureCwd, { recursive: true });
});
afterEach(() => {
fs.rmSync(stateRoot, { recursive: true, force: true });
});
function makeEnv(extra: Record<string, string> = {}): Record<string, string> {
const env: Record<string, string> = {};
for (const [k, v] of Object.entries(process.env)) {
if (v !== undefined) env[k] = v;
}
env.GSTACK_STATE_ROOT = stateRoot;
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
delete env.GSTACK_HOME;
return { ...env, ...extra };
}
function run(args: string[]): { stdout: string; stderr: string; status: number } {
const res = spawnSync(BIN, args, {
env: makeEnv(),
encoding: 'utf-8',
cwd: fixtureCwd,
});
return {
stdout: res.stdout ?? '',
stderr: res.stderr ?? '',
status: res.status ?? -1,
};
}
function writeAuqOtherEvent(text: string): void {
spawnSync(
QLOG_BIN,
[
JSON.stringify({
skill: 'plan-tune',
question_id: 'hook-distill00',
question_summary: 'Test question for distillation',
options_count: 2,
user_choice: 'Other',
source: 'auq-other',
free_text: text,
session_id: 's-distill',
tool_use_id: 'tu-distill-' + Math.random().toString(36).slice(2, 8),
}),
],
{
env: makeEnv(),
cwd: fixtureCwd,
encoding: 'utf-8',
},
);
}
function writeCostLogEntry(slug: string, dateIso: string): void {
fs.mkdirSync(stateRoot, { recursive: true });
fs.appendFileSync(
path.join(stateRoot, 'distill-cost.jsonl'),
JSON.stringify({ ts: dateIso, slug, proposals_count: 0, cost_usd_est: 0 }) + '\n',
);
}
// ----------------------------------------------------------------------
// Status subcommand
// ----------------------------------------------------------------------
describe('--status', () => {
test('reports "no runs yet" when cost log absent', () => {
const r = run(['--status']);
expect(r.status).toBe(0);
expect(r.stdout).toMatch(/no distill runs/);
});
test('reports counts when prior runs exist', () => {
writeCostLogEntry(cwdSlug, new Date().toISOString());
writeCostLogEntry(cwdSlug, new Date().toISOString());
const r = run(['--status']);
expect(r.status).toBe(0);
expect(r.stdout).toContain('RUNS: 2');
expect(r.stdout).toMatch(/TODAY: 2 run\(s\)/);
});
});
// ----------------------------------------------------------------------
// No rate cap (v1.52.0.0 cap audit) — the natural rate of free-text events
// is rare enough that count-based capping was theatrical. Cost log alone
// provides auditability via --status.
// ----------------------------------------------------------------------
describe('no rate cap (audit removed)', () => {
test('never exits with RATE_CAPPED, even with many runs today', () => {
const today = new Date().toISOString();
for (let i = 0; i < 10; i++) writeCostLogEntry(cwdSlug, today);
const r = run([]);
expect(r.status).toBe(0);
expect(r.stdout).not.toMatch(/RATE_CAPPED/);
});
});
// ----------------------------------------------------------------------
// No events / no log
// ----------------------------------------------------------------------
describe('no-event paths', () => {
test('exits NO_LOG when question-log.jsonl missing', () => {
const r = run([]);
expect(r.status).toBe(0);
expect(r.stdout).toMatch(/NO_LOG/);
});
test('exits NO_FREE_TEXT when log has events but none are auq-other', () => {
spawnSync(
QLOG_BIN,
[
JSON.stringify({
skill: 'plan-tune',
question_id: 'hook-other00',
question_summary: 'Q',
options_count: 2,
user_choice: 'A',
source: 'hook',
session_id: 's',
tool_use_id: 'tu-x',
}),
],
{ env: makeEnv(), cwd: fixtureCwd, encoding: 'utf-8' },
);
const r = run([]);
expect(r.status).toBe(0);
expect(r.stdout).toMatch(/NO_FREE_TEXT/);
});
});
// ----------------------------------------------------------------------
// Dry-run
// ----------------------------------------------------------------------
describe('--dry-run', () => {
test('emits the distill prompt + events JSON without calling API', () => {
writeAuqOtherEvent('I always include tests with new features');
writeAuqOtherEvent('Skip design review for typo fixes');
// Strip ANTHROPIC_API_KEY to prove no API call happens.
const env = makeEnv();
delete env.ANTHROPIC_API_KEY;
const res = spawnSync(BIN, ['--dry-run'], { env, cwd: fixtureCwd, encoding: 'utf-8' });
expect(res.status).toBe(0);
expect(res.stdout).toContain('DISTILL PROMPT');
expect(res.stdout).toContain('always include tests');
});
});
// ----------------------------------------------------------------------
// API key required
// ----------------------------------------------------------------------
describe('API auth', () => {
test('fails loud when ANTHROPIC_API_KEY missing on sync run', () => {
writeAuqOtherEvent('Some free text response that needs distilling');
const env = makeEnv();
delete env.ANTHROPIC_API_KEY;
const res = spawnSync(BIN, [], { env, cwd: fixtureCwd, encoding: 'utf-8' });
expect(res.status).not.toBe(0);
expect(res.stderr).toMatch(/ANTHROPIC_API_KEY/);
expect(res.stderr).toMatch(/separate billing/);
});
});
// ----------------------------------------------------------------------
// Background spawn
// ----------------------------------------------------------------------
describe('--background', () => {
test('detaches and exits with DISTILL_SPAWNED', () => {
const r = run(['--background']);
expect(r.status).toBe(0);
expect(r.stdout).toMatch(/DISTILL_SPAWNED: pid=\d+/);
});
});
+28 -1
View File
@@ -650,7 +650,11 @@ If you are looping on the same diagnostic, same file, or failed fix variants, ST
Before each AskUserQuestion, choose `question_id` from `scripts/question-registry.ts` or `{skill}-{slug}`, then run `~/.claude/skills/gstack/bin/gstack-question-preference --check "<id>"`. `AUTO_DECIDE` means choose the recommended option and say "Auto-decided [summary] → [option] (your preference). Change with /plan-tune." `ASK_NORMALLY` means ask.
After answer, log best-effort:
**Embed the question_id as a marker in the question text** so hooks can identify it deterministically (plan-tune cathedral T14 / D18 progressive markers). Append `<gstack-qid:{question_id}>` somewhere in the rendered question (the leading line or trailing line is fine; the marker doesn't render visibly to the user when wrapped in HTML-style angle brackets, but the hook strips it). Without the marker the PreToolUse enforcement hook treats the AUQ as observed-only and never auto-decides — so always include it when the question matches a registered `question_id`.
**Embed the option recommendation via the `(recommended)` label suffix** on exactly one option per AUQ. The PreToolUse hook parses `(recommended)` first, falls back to "Recommendation: X" prose, and refuses to auto-decide if ambiguous. Two `(recommended)` labels = refuse.
After answer, log best-effort (PostToolUse hook also captures deterministically when installed; dedup on (source, tool_use_id) handles double-writes):
```bash
~/.claude/skills/gstack/bin/gstack-question-log '{"skill":"ship","question_id":"<id>","question_summary":"<short>","category":"<approval|clarification|routing|cherry-pick|feedback-loop>","door_type":"<one-way|two-way>","options_count":N,"user_choice":"<key>","recommended":"<key>","session_id":"'"$_SESSION_ID"'"}' 2>/dev/null || true
```
@@ -3109,6 +3113,29 @@ This step is automatic — never skip it, never ask for confirmation.
---
## Step 21: Plan-tune discoverability nudge (first-successful-ship only)
Plan-tune cathedral T15. After a successful ship, surface /plan-tune once
per machine. Single line, non-blocking, marker-gated so it never re-fires.
```bash
_NUDGE_MARKER="$HOME/.gstack/.plan-tune-nudge-shown"
_QT=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning 2>/dev/null || echo "false")
if [ ! -f "$_NUDGE_MARKER" ] && [ "$_QT" = "false" ]; then
echo ""
echo "gstack can learn from your AskUserQuestion answers. Run /plan-tune to opt in"
echo "— it captures which prompts you find valuable vs noisy and (with hooks installed)"
echo "auto-decides your never-ask preferences."
touch "$_NUDGE_MARKER"
fi
```
If the marker exists, OR question_tuning is already on, the nudge is a
no-op. The marker guarantees at-most-once per machine. To re-enable:
`rm ~/.gstack/.plan-tune-nudge-shown` before next ship.
---
## Important Rules
- **Never skip tests.** If tests fail, stop.
+28 -1
View File
@@ -636,7 +636,11 @@ If you are looping on the same diagnostic, same file, or failed fix variants, ST
Before each AskUserQuestion, choose `question_id` from `scripts/question-registry.ts` or `{skill}-{slug}`, then run `$GSTACK_BIN/gstack-question-preference --check "<id>"`. `AUTO_DECIDE` means choose the recommended option and say "Auto-decided [summary] → [option] (your preference). Change with /plan-tune." `ASK_NORMALLY` means ask.
After answer, log best-effort:
**Embed the question_id as a marker in the question text** so hooks can identify it deterministically (plan-tune cathedral T14 / D18 progressive markers). Append `<gstack-qid:{question_id}>` somewhere in the rendered question (the leading line or trailing line is fine; the marker doesn't render visibly to the user when wrapped in HTML-style angle brackets, but the hook strips it). Without the marker the PreToolUse enforcement hook treats the AUQ as observed-only and never auto-decides — so always include it when the question matches a registered `question_id`.
**Embed the option recommendation via the `(recommended)` label suffix** on exactly one option per AUQ. The PreToolUse hook parses `(recommended)` first, falls back to "Recommendation: X" prose, and refuses to auto-decide if ambiguous. Two `(recommended)` labels = refuse.
After answer, log best-effort (PostToolUse hook also captures deterministically when installed; dedup on (source, tool_use_id) handles double-writes):
```bash
$GSTACK_BIN/gstack-question-log '{"skill":"ship","question_id":"<id>","question_summary":"<short>","category":"<approval|clarification|routing|cherry-pick|feedback-loop>","door_type":"<one-way|two-way>","options_count":N,"user_choice":"<key>","recommended":"<key>","session_id":"'"$_SESSION_ID"'"}' 2>/dev/null || true
```
@@ -2719,6 +2723,29 @@ This step is automatic — never skip it, never ask for confirmation.
---
## Step 21: Plan-tune discoverability nudge (first-successful-ship only)
Plan-tune cathedral T15. After a successful ship, surface /plan-tune once
per machine. Single line, non-blocking, marker-gated so it never re-fires.
```bash
_NUDGE_MARKER="$HOME/.gstack/.plan-tune-nudge-shown"
_QT=$($GSTACK_ROOT/bin/gstack-config get question_tuning 2>/dev/null || echo "false")
if [ ! -f "$_NUDGE_MARKER" ] && [ "$_QT" = "false" ]; then
echo ""
echo "gstack can learn from your AskUserQuestion answers. Run /plan-tune to opt in"
echo "— it captures which prompts you find valuable vs noisy and (with hooks installed)"
echo "auto-decides your never-ask preferences."
touch "$_NUDGE_MARKER"
fi
```
If the marker exists, OR question_tuning is already on, the nudge is a
no-op. The marker guarantees at-most-once per machine. To re-enable:
`rm ~/.gstack/.plan-tune-nudge-shown` before next ship.
---
## Important Rules
- **Never skip tests.** If tests fail, stop.
+28 -1
View File
@@ -638,7 +638,11 @@ If you are looping on the same diagnostic, same file, or failed fix variants, ST
Before each AskUserQuestion, choose `question_id` from `scripts/question-registry.ts` or `{skill}-{slug}`, then run `$GSTACK_BIN/gstack-question-preference --check "<id>"`. `AUTO_DECIDE` means choose the recommended option and say "Auto-decided [summary] → [option] (your preference). Change with /plan-tune." `ASK_NORMALLY` means ask.
After answer, log best-effort:
**Embed the question_id as a marker in the question text** so hooks can identify it deterministically (plan-tune cathedral T14 / D18 progressive markers). Append `<gstack-qid:{question_id}>` somewhere in the rendered question (the leading line or trailing line is fine; the marker doesn't render visibly to the user when wrapped in HTML-style angle brackets, but the hook strips it). Without the marker the PreToolUse enforcement hook treats the AUQ as observed-only and never auto-decides — so always include it when the question matches a registered `question_id`.
**Embed the option recommendation via the `(recommended)` label suffix** on exactly one option per AUQ. The PreToolUse hook parses `(recommended)` first, falls back to "Recommendation: X" prose, and refuses to auto-decide if ambiguous. Two `(recommended)` labels = refuse.
After answer, log best-effort (PostToolUse hook also captures deterministically when installed; dedup on (source, tool_use_id) handles double-writes):
```bash
$GSTACK_BIN/gstack-question-log '{"skill":"ship","question_id":"<id>","question_summary":"<short>","category":"<approval|clarification|routing|cherry-pick|feedback-loop>","door_type":"<one-way|two-way>","options_count":N,"user_choice":"<key>","recommended":"<key>","session_id":"'"$_SESSION_ID"'"}' 2>/dev/null || true
```
@@ -3097,6 +3101,29 @@ This step is automatic — never skip it, never ask for confirmation.
---
## Step 21: Plan-tune discoverability nudge (first-successful-ship only)
Plan-tune cathedral T15. After a successful ship, surface /plan-tune once
per machine. Single line, non-blocking, marker-gated so it never re-fires.
```bash
_NUDGE_MARKER="$HOME/.gstack/.plan-tune-nudge-shown"
_QT=$($GSTACK_ROOT/bin/gstack-config get question_tuning 2>/dev/null || echo "false")
if [ ! -f "$_NUDGE_MARKER" ] && [ "$_QT" = "false" ]; then
echo ""
echo "gstack can learn from your AskUserQuestion answers. Run /plan-tune to opt in"
echo "— it captures which prompts you find valuable vs noisy and (with hooks installed)"
echo "auto-decides your never-ask preferences."
touch "$_NUDGE_MARKER"
fi
```
If the marker exists, OR question_tuning is already on, the nudge is a
no-op. The marker guarantees at-most-once per machine. To re-enable:
`rm ~/.gstack/.plan-tune-nudge-shown` before next ship.
---
## Important Rules
- **Never skip tests.** If tests fail, stop.
+30
View File
@@ -0,0 +1,30 @@
# Founder pitch — pixel.fund
Founder: Maya Chen (CEO, ex-Stripe), co-founder Aria Patel (CTO,
ex-Robinhood). YC W26.
## What
A donation-budget tool for solo creators. Set a monthly $ floor for
causes you care about, pixel.fund auto-allocates each dollar across your
chosen orgs (Direct Relief, GiveDirectly, etc.) the moment a Stripe
payout lands. One-line embeddable receipt. 1% platform fee.
## Traction
- 2026-04-01 launched private beta with 14 creators from her newsletter
- 2026-05-15 hit 51 paying creators, $4,200 MRR
- Waitlist of 230 from a single tweet by a tech-Twitter influencer
- Two creators asked about a "team plan" (multi-seat) unprompted
## Status quo
Creators today either (a) write checks ad-hoc and forget about it, or
(b) use Patreon-style platforms where the "cause" is opaque (general
fund). Maya talked to 40 creators in YC interviews — 31 said they "want
to give more but it's mental overhead."
## What Maya wants from office hours
Should she chase the team-plan signal, or go deeper on the solo flow
first? She's two weeks from running out of YC dorm food.
+6 -5
View File
@@ -491,13 +491,14 @@
},
"plan-tune": {
"skill": "plan-tune",
"skillMdBytes": 51717,
"skillMdLines": 1077,
"estTokens": 12929,
"tmplBytes": 15586,
"skillMdBytes": 64017,
"skillMdLines": 1357,
"estTokens": 16004,
"tmplBytes": 25196,
"descriptionLen": 325,
"hasGateEval": true,
"hasPeriodicEval": false
"hasPeriodicEval": false,
"_baseline_note": "Rebased from 51717 → 64017 in plan-tune cathedral v1.52.0.0 (T13). Cathedral added Dream cycle, Recent auto-decisions, Audit unmarked, Dream cycle review/distill sections — all load-bearing for hook substrate. See CHANGELOG.md [1.52.0.0]."
},
"qa": {
"skill": "qa",
+193
View File
@@ -0,0 +1,193 @@
/**
* Regression pin for the setup-time gbrain detection → gen-skill-docs
* override (T2 / v1.50.0.0).
*
* The override mechanism lives in scripts/gen-skill-docs.ts: when invoked
* with --respect-detection, it reads ~/.gstack/gbrain-detection.json and
* un-suppresses GBRAIN_CONTEXT_LOAD + GBRAIN_SAVE_RESULTS for hosts that
* statically list them in suppressedResolvers (claude, codex, slate,
* factory, opencode, openclaw, cursor, kiro).
*
* Tests drive gen-skill-docs as a subprocess against a temp GSTACK_HOME
* with each detection state, then assert what landed in the generated
* Claude-host SKILL.md. This is end-to-end through the actual override
* pipeline — no mocking — so it catches regressions in either the loader
* or the suppressedResolvers filter.
*
* Gate-tier, free, ~3-5s per test (gen-skill-docs runs the full skill
* generation against the real repo; --host claude scopes to one host).
*/
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import { execFileSync } from 'child_process';
import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'fs';
import { tmpdir } from 'os';
import { join } from 'path';
const REPO_ROOT = join(import.meta.dir, '..');
interface FixtureEnv {
tmpHome: string;
cleanup: () => void;
}
function makeFixture(detectionJson: string | null): FixtureEnv {
const tmpHome = mkdtempSync(join(tmpdir(), 'gbrain-detect-test-'));
if (detectionJson !== null) {
writeFileSync(join(tmpHome, 'gbrain-detection.json'), detectionJson);
}
return {
tmpHome,
cleanup: () => {
try {
rmSync(tmpHome, { recursive: true, force: true });
} catch {
// best effort
}
},
};
}
/**
* Run gen-skill-docs with --respect-detection and an isolated GSTACK_HOME.
* Returns the regenerated office-hours/SKILL.md content WITHOUT writing
* over the committed file: we use --dry-run to keep the working tree
* clean, then parse the output via re-reading the committed file... no,
* that doesn't work for dry-run since dry-run doesn't write.
*
* Approach: generate to a temp output dir by running gen-skill-docs in a
* temp checkout. Simpler alternative: actually regenerate, snapshot the
* file content, then git-checkout the committed version back. We use this
* since gen-skill-docs doesn't expose an output-path arg.
*/
function regenAndSnapshot(opts: {
respectDetection: boolean;
tmpHome: string;
files: string[];
}): Map<string, string> {
// Save committed content so we can restore after snapshotting.
const original = new Map<string, string>();
for (const f of opts.files) {
original.set(f, readFileSync(join(REPO_ROOT, f), 'utf-8'));
}
const args = [
'run',
'scripts/gen-skill-docs.ts',
'--host',
'claude',
];
if (opts.respectDetection) args.push('--respect-detection');
try {
execFileSync('bun', args, {
cwd: REPO_ROOT,
env: { ...process.env, GSTACK_HOME: opts.tmpHome },
stdio: ['ignore', 'pipe', 'pipe'],
timeout: 30_000,
});
// Snapshot the regenerated content.
const snapshot = new Map<string, string>();
for (const f of opts.files) {
snapshot.set(f, readFileSync(join(REPO_ROOT, f), 'utf-8'));
}
return snapshot;
} finally {
// Always restore so the test leaves the working tree clean.
for (const [f, content] of original) {
writeFileSync(join(REPO_ROOT, f), content);
}
}
}
describe('gbrain detection override → gen-skill-docs', () => {
// Single skill probe is enough to assert the override pipeline. The
// resolver unit test (test/resolvers-gbrain-save-results.test.ts) covers
// per-skill metadata correctness already.
const PROBE_FILES = ['office-hours/SKILL.md'];
test('with detected:true, Claude-host SKILL.md gains brain-aware blocks', () => {
const { tmpHome, cleanup } = makeFixture(
JSON.stringify({ gbrain_local_status: 'ok', gbrain_on_path: true, gbrain_version: 'test-0.41.0' }),
);
try {
const snap = regenAndSnapshot({
respectDetection: true,
tmpHome,
files: PROBE_FILES,
});
const content = snap.get('office-hours/SKILL.md')!;
// GBRAIN_SAVE_RESULTS un-suppressed → resolver output rendered.
expect(content).toContain('## Save Results to Brain');
expect(content).toContain('gbrain put "office-hours/');
expect(content).toContain('Skip this entire section if `gbrain` is not on PATH');
// GBRAIN_CONTEXT_LOAD also un-suppressed (D6 bundling).
expect(content).toContain('## Brain Context Load');
} finally {
cleanup();
}
});
test('with detected:false (status != "ok"), brain blocks stay suppressed', () => {
const { tmpHome, cleanup } = makeFixture(
JSON.stringify({ gbrain_local_status: 'no-cli', gbrain_on_path: false, gbrain_version: null }),
);
try {
const snap = regenAndSnapshot({
respectDetection: true,
tmpHome,
files: PROBE_FILES,
});
const content = snap.get('office-hours/SKILL.md')!;
// GBRAIN_SAVE_RESULTS suppressed → no rendered block, no gbrain put line.
expect(content).not.toContain('gbrain put "office-hours/');
// Section header from the resolver also absent (resolver returns "").
// BUT — the BRAIN_CACHE_REFRESH and BRAIN_WRITE_BACK resolvers are NOT
// gated by detection (host-agnostic), so other "Brain ..." sections may
// still appear. We only assert the SAVE_RESULTS-specific marker is gone.
} finally {
cleanup();
}
});
test('with NO detection file, brain blocks stay suppressed (same as detected:false)', () => {
const { tmpHome, cleanup } = makeFixture(null);
try {
const snap = regenAndSnapshot({
respectDetection: true,
tmpHome,
files: PROBE_FILES,
});
const content = snap.get('office-hours/SKILL.md')!;
expect(content).not.toContain('gbrain put "office-hours/');
} finally {
cleanup();
}
});
test('without --respect-detection flag, detection file is IGNORED (CI canonical path)', () => {
// Even if a detection file exists with detected:true, the default
// `bun run gen:skill-docs` (CI) must produce no-gbrain output so the
// committed SKILL.md stays reproducible regardless of any developer's
// local gbrain install state.
const { tmpHome, cleanup } = makeFixture(
JSON.stringify({ gbrain_local_status: 'ok', gbrain_on_path: true, gbrain_version: 'test-0.41.0' }),
);
try {
const snap = regenAndSnapshot({
respectDetection: false,
tmpHome,
files: PROBE_FILES,
});
const content = snap.get('office-hours/SKILL.md')!;
expect(content).not.toContain('gbrain put "office-hours/');
expect(content).not.toContain('## Save Results to Brain');
} finally {
cleanup();
}
});
});
+8 -1
View File
@@ -323,10 +323,17 @@ describe('gen-skill-docs', () => {
// Ratcheted 36500 → 39000 in the contributor wave when #1205 added the
// \\u-escape CJK rule (rule 12 + self-check item) to the AskUserQuestion
// preamble.
// Ratcheted 39000 → 40000 in plan-tune cathedral T14: question-tuning
// resolver gained the <gstack-qid:...> marker convention + the
// (recommended) label requirement (D2 + D18 — both load-bearing for
// hook enforcement). Adds ~700 bytes.
// Ratcheted 40000 → 60000 in v1.52.0.0 cap audit: ~20K headroom so
// future preamble adds don't trip the gate on each PR. Real runaway
// (preamble doubling) still trips; normal scope growth doesn't.
for (const skill of reviewSkills) {
const content = fs.readFileSync(skill.path, 'utf-8');
const preamble = extractPreambleBeforeWorkflow(content, skill.markers);
expect(Buffer.byteLength(preamble, 'utf-8')).toBeLessThan(39_000);
expect(Buffer.byteLength(preamble, 'utf-8')).toBeLessThan(60_000);
}
});
+206
View File
@@ -0,0 +1,206 @@
/**
* gstack-codex-session-import — backfill question-log from Codex JSONL.
*
* Plan-tune cathedral T9. Verifies the structured-file parser (D5) handles
* the two-tier recovery strategy from docs/spikes/codex-session-format.md:
* - Marker-first: <gstack-qid:foo-bar> → source=codex-import-marker.
* - Pattern fallback: D-numbered brief → source=codex-import-pattern,
* hash-only question_id.
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { spawnSync } from 'child_process';
const ROOT = path.resolve(import.meta.dir, '..');
const BIN = path.join(ROOT, 'bin', 'gstack-codex-session-import');
let stateRoot: string;
let fixtureCwd: string;
let cwdSlug: string;
beforeEach(() => {
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-cdximp-'));
cwdSlug = 'codex-fixture-slug';
fixtureCwd = path.join(stateRoot, cwdSlug);
fs.mkdirSync(fixtureCwd, { recursive: true });
});
afterEach(() => {
fs.rmSync(stateRoot, { recursive: true, force: true });
});
function writeSessionFile(events: Array<Record<string, unknown>>, sessionId = 'sess-fixture'): string {
const p = path.join(stateRoot, 'rollout-fixture.jsonl');
const meta = {
timestamp: new Date().toISOString(),
type: 'session_meta',
payload: { id: sessionId, cwd: fixtureCwd },
};
const lines = [JSON.stringify(meta), ...events.map((e) => JSON.stringify(e))];
fs.writeFileSync(p, lines.join('\n') + '\n');
return p;
}
function agentMessage(text: string): Record<string, unknown> {
return {
timestamp: new Date().toISOString(),
type: 'event_msg',
payload: { type: 'agent_message', message: text },
};
}
function userMessage(text: string): Record<string, unknown> {
return {
timestamp: new Date().toISOString(),
type: 'event_msg',
payload: { type: 'user_message', message: text },
};
}
function runImport(sessionPath: string): { stdout: string; stderr: string; status: number } {
const env: Record<string, string> = {};
for (const [k, v] of Object.entries(process.env)) {
if (v !== undefined) env[k] = v;
}
env.GSTACK_STATE_ROOT = stateRoot;
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
delete env.GSTACK_HOME;
const res = spawnSync(BIN, [sessionPath], { env, encoding: 'utf-8', cwd: ROOT });
return {
stdout: res.stdout ?? '',
stderr: res.stderr ?? '',
status: res.status ?? -1,
};
}
function readImportedEvents(): Array<Record<string, unknown>> {
const f = path.join(stateRoot, 'projects', cwdSlug, 'question-log.jsonl');
if (!fs.existsSync(f)) return [];
return fs
.readFileSync(f, 'utf-8')
.trim()
.split('\n')
.filter(Boolean)
.map((l) => JSON.parse(l));
}
// ----------------------------------------------------------------------
// Marker-first path
// ----------------------------------------------------------------------
describe('marker-first import (source=codex-import-marker)', () => {
test('extracts marker id from agent_message and pairs with next user_message', () => {
const sessionPath = writeSessionFile([
agentMessage(
'D1 — Test\nELI10: blah\n<gstack-qid:ship-test-failure-triage> Tests failed.\nRecommendation: A\nA) Fix now (recommended)\nB) Investigate\nC) Ack and ship',
),
userMessage('A'),
]);
const r = runImport(sessionPath);
expect(r.status).toBe(0);
expect(r.stdout).toContain('IMPORTED: 1');
const events = readImportedEvents();
expect(events.length).toBe(1);
expect(events[0].source).toBe('codex-import-marker');
expect(events[0].question_id).toBe('ship-test-failure-triage');
expect(events[0].user_choice).toContain('Fix now');
expect(events[0].recommended).toContain('Fix now');
});
});
// ----------------------------------------------------------------------
// Pattern fallback
// ----------------------------------------------------------------------
describe('pattern fallback (source=codex-import-pattern)', () => {
test('D-numbered brief without marker → hash id + source=codex-import-pattern', () => {
const sessionPath = writeSessionFile([
agentMessage('D2 — Unmarked brief\nA) Foo (recommended)\nB) Bar'),
userMessage('A'),
]);
const r = runImport(sessionPath);
expect(r.status).toBe(0);
const events = readImportedEvents();
expect(events.length).toBe(1);
expect(events[0].source).toBe('codex-import-pattern');
expect((events[0].question_id as string).startsWith('hook-')).toBe(true);
expect(events[0].user_choice).toContain('Foo');
});
});
// ----------------------------------------------------------------------
// Edge cases
// ----------------------------------------------------------------------
describe('edge cases', () => {
test('no AUQ-shaped events → 0 imported, exit 0', () => {
const sessionPath = writeSessionFile([
agentMessage('Just doing some work, nothing to ask.'),
]);
const r = runImport(sessionPath);
expect(r.status).toBe(0);
expect(r.stdout).toContain('IMPORTED: 0');
});
test('agent_message with marker but no following user_message → skipped', () => {
const sessionPath = writeSessionFile([
agentMessage('<gstack-qid:test-q> D1 — Q\nA) Foo\nB) Bar'),
// no user_message
]);
const r = runImport(sessionPath);
expect(r.status).toBe(0);
expect(readImportedEvents().length).toBe(0);
});
test('two D-briefs in sequence → both imported', () => {
const sessionPath = writeSessionFile([
agentMessage('D1 — First <gstack-qid:q1>\nA) Foo (recommended)\nB) Bar'),
userMessage('A'),
agentMessage('D2 — Second <gstack-qid:q2>\nA) Baz (recommended)\nB) Qux'),
userMessage('B'),
]);
const r = runImport(sessionPath);
expect(r.status).toBe(0);
const events = readImportedEvents();
expect(events.length).toBe(2);
expect(events[0].question_id).toBe('q1');
expect(events[1].question_id).toBe('q2');
});
test('numeric user response also resolves to letter index', () => {
const sessionPath = writeSessionFile([
agentMessage('D1 — Test <gstack-qid:numeric-q>\nA) Foo\nB) Bar\nC) Baz'),
userMessage('B - I think B is right'),
]);
runImport(sessionPath);
const events = readImportedEvents();
expect(events.length).toBe(1);
expect(events[0].user_choice).toContain('Bar');
});
});
// ----------------------------------------------------------------------
// Default-mode (latest session) behavior
// ----------------------------------------------------------------------
describe('default mode (no args → latest)', () => {
test('returns NO_SESSIONS when sessions dir is empty', () => {
const emptyDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-empty-cdx-'));
try {
const env: Record<string, string> = {};
for (const [k, v] of Object.entries(process.env)) {
if (v !== undefined) env[k] = v;
}
env.GSTACK_STATE_ROOT = stateRoot;
env.CODEX_SESSIONS_ROOT = emptyDir;
const res = spawnSync(BIN, [], { env, encoding: 'utf-8', cwd: ROOT });
expect(res.status).toBe(0);
expect(res.stdout).toMatch(/NO_SESSIONS/);
} finally {
fs.rmSync(emptyDir, { recursive: true, force: true });
}
});
});
+150
View File
@@ -0,0 +1,150 @@
/**
* gstack-core@1.0.0 schema pack validation (T1).
*
* Asserts the schema pack is well-formed and matches the v1.48 plan:
* - Exactly 8 page types (7 entities + 1 take)
* - Frontmatter shape is internally consistent
* - Retention policies match SKILL_RUN_RETENTION_DAYS spec
* - Link verbs only reference declared verbs
* - JSON payload shape is acceptable to mcp__gbrain__schema_apply_mutations
*
* Gate-tier, free, pure import + assertion.
*/
import { describe, test, expect } from 'bun:test';
import {
GSTACK_CORE_SCHEMA_PACK,
getSchemaPackMutationPayload,
getSchemaPackTypeNames,
getRetentionPolicy,
} from '../scripts/gstack-schema-pack';
import {
GSTACK_SCHEMA_PACK_NAME,
GSTACK_SCHEMA_PACK_VERSION,
} from '../scripts/brain-cache-spec';
describe('gstack-core schema pack', () => {
test('identity matches brain-cache-spec constants', () => {
expect(GSTACK_CORE_SCHEMA_PACK.name).toBe(GSTACK_SCHEMA_PACK_NAME);
expect(GSTACK_CORE_SCHEMA_PACK.version).toBe(GSTACK_SCHEMA_PACK_VERSION);
});
test('declares exactly 8 page types (7 entities + gstack/take)', () => {
expect(GSTACK_CORE_SCHEMA_PACK.page_types.length).toBe(8);
});
test('all 7 brain-cache entities have a matching schema page type', () => {
const types = getSchemaPackTypeNames();
const required = [
'gstack/user-profile',
'gstack/product',
'gstack/goal',
'gstack/developer-persona',
'gstack/brand',
'gstack/competitive-intel',
'gstack/skill-run',
];
for (const name of required) {
expect(types).toContain(name);
}
});
test('gstack/take exists with kind=bet supported (Phase 2 / E5)', () => {
const take = GSTACK_CORE_SCHEMA_PACK.page_types.find((t) => t.type === 'gstack/take');
expect(take).toBeDefined();
const kind = take!.fields.find((f) => f.name === 'kind');
expect(kind?.values).toContain('bet');
expect(kind?.values).toContain('fact');
});
test('every page type has a required type + slug field', () => {
for (const def of GSTACK_CORE_SCHEMA_PACK.page_types) {
const typeField = def.fields.find((f) => f.name === 'type');
const slugField = def.fields.find((f) => f.name === 'slug');
expect(typeField?.required).toBe(true);
expect(slugField?.required).toBe(true);
}
});
test('enum fields declare their values', () => {
for (const def of GSTACK_CORE_SCHEMA_PACK.page_types) {
for (const field of def.fields) {
if (field.type === 'enum') {
expect(field.values).toBeDefined();
expect(field.values!.length).toBeGreaterThan(0);
}
}
}
});
test('skill-run is the only archive-after-90d type', () => {
const archived = GSTACK_CORE_SCHEMA_PACK.page_types
.filter((t) => t.retention === 'archive-after-90d')
.map((t) => t.type);
expect(archived).toEqual(['gstack/skill-run']);
});
test('gstack/take is never-archive (calibration scorecard preservation)', () => {
expect(getRetentionPolicy('gstack/take')).toBe('never-archive');
});
test('getRetentionPolicy throws on unknown type (defensive)', () => {
expect(() => getRetentionPolicy('gstack/nonexistent')).toThrow();
});
test('link verbs declared on emits_links are also in pack.link_verbs', () => {
const declared = new Set(GSTACK_CORE_SCHEMA_PACK.link_verbs);
for (const def of GSTACK_CORE_SCHEMA_PACK.page_types) {
for (const link of def.emits_links ?? []) {
expect(declared.has(link.verb)).toBe(true);
}
}
});
test('link verbs only target declared gstack/ page types', () => {
const declared = new Set(getSchemaPackTypeNames());
for (const def of GSTACK_CORE_SCHEMA_PACK.page_types) {
for (const link of def.emits_links ?? []) {
expect(declared.has(link.target_type)).toBe(true);
}
}
});
test('mutation payload is well-formed JSON', () => {
const payload = getSchemaPackMutationPayload();
expect(payload.schema_version).toBe(1);
expect(payload.schema_pack).toBeDefined();
expect(typeof payload.schema_pack.name).toBe('string');
expect(Array.isArray(payload.schema_pack.page_types)).toBe(true);
// round-trip through JSON to catch unserializable values (functions, undefined, etc.)
const json = JSON.stringify(payload);
const reparsed = JSON.parse(json);
expect(reparsed.schema_pack.name).toBe(payload.schema_pack.name);
});
test('gstack/product has expected emits_links graph (product → goal/persona/brand/etc.)', () => {
const product = GSTACK_CORE_SCHEMA_PACK.page_types.find((t) => t.type === 'gstack/product')!;
const verbs = (product.emits_links ?? []).map((l) => `${l.verb}:${l.target_type}`);
expect(verbs).toContain('targets:gstack/goal');
expect(verbs).toContain('observed_by:gstack/developer-persona');
expect(verbs).toContain('has_brand:gstack/brand');
expect(verbs).toContain('competes_with:gstack/competitive-intel');
});
test('gstack/goal has lifecycle status enum (active/resolved/expired/archived)', () => {
const goal = GSTACK_CORE_SCHEMA_PACK.page_types.find((t) => t.type === 'gstack/goal')!;
const status = goal.fields.find((f) => f.name === 'status');
expect(status?.values).toEqual(['active', 'resolved', 'expired', 'archived']);
});
test('gstack/skill-run records the bet count for calibration coverage', () => {
const sr = GSTACK_CORE_SCHEMA_PACK.page_types.find((t) => t.type === 'gstack/skill-run')!;
const takesField = sr.fields.find((f) => f.name === 'takes_written');
expect(takesField).toBeDefined();
expect(takesField?.type).toBe('number');
});
test('gstack/user-profile is never-archive (cross-project, long-lived)', () => {
expect(getRetentionPolicy('gstack/user-profile')).toBe('never-archive');
});
});
@@ -0,0 +1,302 @@
/**
* gstack-settings-hook schema-aware surface (T3 plan-tune cathedral).
*
* Verifies add-event / remove-source / diff-event / rollback / list-sources
* for PreToolUse + PostToolUse registration. Existing team-mode.test.ts
* covers the legacy `add <cmd>` / `remove <cmd>` shape; this file only
* covers the new surface introduced for the plan-tune cathedral.
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { execSync } from 'child_process';
const ROOT = path.resolve(import.meta.dir, '..');
const SETTINGS_HOOK = path.join(ROOT, 'bin', 'gstack-settings-hook');
let tmpDir: string;
let settingsFile: string;
beforeEach(() => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-shsa-'));
settingsFile = path.join(tmpDir, 'settings.json');
});
afterEach(() => {
fs.rmSync(tmpDir, { recursive: true, force: true });
});
function run(args: string[]): { stdout: string; stderr: string; exitCode: number } {
try {
const stdout = execSync([SETTINGS_HOOK, ...args].map((s) => `'${s}'`).join(' '), {
env: { ...process.env, GSTACK_SETTINGS_FILE: settingsFile },
encoding: 'utf-8',
timeout: 10000,
});
return { stdout, stderr: '', exitCode: 0 };
} catch (e: any) {
return { stdout: e.stdout || '', stderr: e.stderr || '', exitCode: e.status ?? 1 };
}
}
function settings(): any {
return JSON.parse(fs.readFileSync(settingsFile, 'utf-8'));
}
// ----------------------------------------------------------------------
// add-event
// ----------------------------------------------------------------------
describe('add-event', () => {
test('registers a PreToolUse hook with matcher + source tag', () => {
const r = run([
'add-event',
'--event', 'PreToolUse',
'--matcher', '(AskUserQuestion|mcp__.*__AskUserQuestion)',
'--command', '/abs/path/to/question-preference-hook',
'--source', 'plan-tune-cathedral',
'--timeout', '5',
]);
expect(r.exitCode).toBe(0);
const s = settings();
expect(s.hooks.PreToolUse).toHaveLength(1);
expect(s.hooks.PreToolUse[0].matcher).toBe('(AskUserQuestion|mcp__.*__AskUserQuestion)');
expect(s.hooks.PreToolUse[0]._gstack_source).toBe('plan-tune-cathedral');
expect(s.hooks.PreToolUse[0].hooks[0].command).toBe('/abs/path/to/question-preference-hook');
expect(s.hooks.PreToolUse[0].hooks[0].timeout).toBe(5);
});
test('registers a PostToolUse hook independently of PreToolUse', () => {
run([
'add-event',
'--event', 'PreToolUse',
'--matcher', 'AskUserQuestion',
'--command', '/pre',
'--source', 'plan-tune-cathedral',
]);
const r = run([
'add-event',
'--event', 'PostToolUse',
'--matcher', 'AskUserQuestion',
'--command', '/post',
'--source', 'plan-tune-cathedral',
]);
expect(r.exitCode).toBe(0);
const s = settings();
expect(s.hooks.PreToolUse).toHaveLength(1);
expect(s.hooks.PostToolUse).toHaveLength(1);
expect(s.hooks.PreToolUse[0].hooks[0].command).toBe('/pre');
expect(s.hooks.PostToolUse[0].hooks[0].command).toBe('/post');
});
test('idempotent: re-adding same (event, matcher, source) updates in place', () => {
run([
'add-event',
'--event', 'PreToolUse',
'--matcher', 'AskUserQuestion',
'--command', '/v1',
'--source', 'plan-tune-cathedral',
]);
run([
'add-event',
'--event', 'PreToolUse',
'--matcher', 'AskUserQuestion',
'--command', '/v2',
'--source', 'plan-tune-cathedral',
]);
const s = settings();
expect(s.hooks.PreToolUse).toHaveLength(1);
expect(s.hooks.PreToolUse[0].hooks[0].command).toBe('/v2');
});
test('preserves unrelated existing hooks', () => {
fs.writeFileSync(
settingsFile,
JSON.stringify({
hooks: {
PreToolUse: [
{
matcher: 'Bash',
hooks: [{ type: 'command', command: '/user-own-hook' }],
},
],
},
}, null, 2),
);
run([
'add-event',
'--event', 'PreToolUse',
'--matcher', 'AskUserQuestion',
'--command', '/gstack-hook',
'--source', 'plan-tune-cathedral',
]);
const s = settings();
expect(s.hooks.PreToolUse).toHaveLength(2);
// User's Bash hook still present
const bash = s.hooks.PreToolUse.find((e: any) => e.matcher === 'Bash');
expect(bash).toBeDefined();
expect(bash.hooks[0].command).toBe('/user-own-hook');
});
test('writes a timestamped backup before mutating', () => {
fs.writeFileSync(settingsFile, JSON.stringify({ existing: 'value' }));
run([
'add-event',
'--event', 'PreToolUse',
'--matcher', 'AskUserQuestion',
'--command', '/gstack',
'--source', 'plan-tune-cathedral',
]);
const backups = fs
.readdirSync(tmpDir)
.filter((f) => f.startsWith('settings.json.bak.'));
expect(backups.length).toBeGreaterThanOrEqual(1);
const backupContent = JSON.parse(fs.readFileSync(path.join(tmpDir, backups[0]), 'utf-8'));
expect(backupContent.existing).toBe('value');
expect(backupContent.hooks).toBeUndefined();
});
test('rejects invalid --event', () => {
const r = run([
'add-event',
'--event', 'NotAnEvent',
'--command', '/x',
'--source', 'plan-tune',
]);
expect(r.exitCode).not.toBe(0);
expect(r.stderr).toMatch(/invalid --event/);
});
});
// ----------------------------------------------------------------------
// remove-source
// ----------------------------------------------------------------------
describe('remove-source', () => {
test('removes all entries with a given source tag, leaves others alone', () => {
fs.writeFileSync(
settingsFile,
JSON.stringify({
hooks: {
PreToolUse: [
{ matcher: 'Bash', hooks: [{ command: '/keep-me' }] },
],
},
}),
);
run([
'add-event',
'--event', 'PreToolUse',
'--matcher', 'AskUserQuestion',
'--command', '/a',
'--source', 'plan-tune-cathedral',
]);
run([
'add-event',
'--event', 'PostToolUse',
'--matcher', 'AskUserQuestion',
'--command', '/b',
'--source', 'plan-tune-cathedral',
]);
const r = run(['remove-source', '--source', 'plan-tune-cathedral']);
expect(r.exitCode).toBe(0);
expect(r.stdout).toMatch(/removed 2 hook/);
const s = settings();
expect(s.hooks.PostToolUse).toBeUndefined();
expect(s.hooks.PreToolUse).toHaveLength(1);
expect(s.hooks.PreToolUse[0].hooks[0].command).toBe('/keep-me');
});
test('safely no-ops when settings.json missing', () => {
const r = run(['remove-source', '--source', 'plan-tune-cathedral']);
expect(r.exitCode).toBe(0);
});
});
// ----------------------------------------------------------------------
// diff-event
// ----------------------------------------------------------------------
describe('diff-event', () => {
test('emits BEFORE + AFTER without mutating settings.json', () => {
fs.writeFileSync(settingsFile, JSON.stringify({ existing: 'value' }));
const r = run([
'diff-event',
'--event', 'PreToolUse',
'--matcher', 'AskUserQuestion',
'--command', '/gstack',
'--source', 'plan-tune-cathedral',
]);
expect(r.exitCode).toBe(0);
expect(r.stdout).toContain('--- BEFORE');
expect(r.stdout).toContain('--- AFTER');
expect(r.stdout).toContain('plan-tune-cathedral');
// Settings file unchanged.
expect(JSON.parse(fs.readFileSync(settingsFile, 'utf-8'))).toEqual({ existing: 'value' });
});
});
// ----------------------------------------------------------------------
// rollback
// ----------------------------------------------------------------------
describe('rollback', () => {
test('restores latest backup', () => {
fs.writeFileSync(settingsFile, JSON.stringify({ original: true }));
run([
'add-event',
'--event', 'PreToolUse',
'--matcher', 'AskUserQuestion',
'--command', '/gstack',
'--source', 'plan-tune-cathedral',
]);
expect(settings().hooks).toBeDefined();
const r = run(['rollback']);
expect(r.exitCode).toBe(0);
const s = settings();
expect(s.original).toBe(true);
expect(s.hooks).toBeUndefined();
});
test('fails clearly when no backup pointer exists', () => {
const r = run(['rollback']);
expect(r.exitCode).not.toBe(0);
expect(r.stderr).toMatch(/no backup pointer/);
});
});
// ----------------------------------------------------------------------
// list-sources
// ----------------------------------------------------------------------
describe('list-sources', () => {
test('shows source-tagged hooks across all events', () => {
run([
'add-event',
'--event', 'PreToolUse',
'--matcher', 'AskUserQuestion',
'--command', '/pre',
'--source', 'plan-tune-cathedral',
]);
run([
'add-event',
'--event', 'PostToolUse',
'--matcher', 'AskUserQuestion',
'--command', '/post',
'--source', 'plan-tune-cathedral',
]);
const r = run(['list-sources']);
expect(r.exitCode).toBe(0);
expect(r.stdout).toContain('PreToolUse');
expect(r.stdout).toContain('PostToolUse');
expect(r.stdout).toContain('plan-tune-cathedral');
});
test('empty when no settings file', () => {
const r = run(['list-sources']);
expect(r.exitCode).toBe(0);
expect(r.stdout).toMatch(/no settings file/);
});
});
+159
View File
@@ -0,0 +1,159 @@
/**
* GSTACK_STATE_ROOT override — verifies the 3 plan-tune bins honor
* GSTACK_STATE_ROOT as a higher-priority override over GSTACK_HOME.
*
* Surfaced by plan-tune cathedral D16 (Codex outside voice): tests can't
* isolate from real ~/.gstack today because the bins ignore STATE_ROOT.
* Without this override, the cathedral's E2E + integration tests would
* silently pollute the user's real profile.
*
* Contract:
* - GSTACK_STATE_ROOT set → bins write under STATE_ROOT (HOME ignored).
* - Only GSTACK_HOME set → bins write under HOME (existing behavior).
* - Neither set → falls back to $HOME/.gstack (existing behavior).
* - Both set → STATE_ROOT wins.
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { spawnSync } from 'child_process';
const ROOT = path.resolve(import.meta.dir, '..');
const BIN_LOG = path.join(ROOT, 'bin', 'gstack-question-log');
const BIN_PREF = path.join(ROOT, 'bin', 'gstack-question-preference');
const BIN_DEV = path.join(ROOT, 'bin', 'gstack-developer-profile');
let stateRoot: string;
let homeRoot: string;
beforeEach(() => {
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-state-'));
homeRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-home-'));
});
afterEach(() => {
fs.rmSync(stateRoot, { recursive: true, force: true });
fs.rmSync(homeRoot, { recursive: true, force: true });
});
function runBin(
bin: string,
args: string[],
env: Record<string, string | undefined>,
): { stdout: string; stderr: string; status: number } {
const cleaned: Record<string, string> = {};
for (const [k, v] of Object.entries({ ...process.env, ...env })) {
if (v !== undefined) cleaned[k] = v;
}
// Strip these from process.env so the override matrix is clean.
if (env.GSTACK_STATE_ROOT === undefined) delete cleaned.GSTACK_STATE_ROOT;
if (env.GSTACK_HOME === undefined) delete cleaned.GSTACK_HOME;
const res = spawnSync(bin, args, {
env: cleaned,
encoding: 'utf-8',
cwd: ROOT,
});
return {
stdout: res.stdout ?? '',
stderr: res.stderr ?? '',
status: res.status ?? -1,
};
}
const SAMPLE_LOG = {
skill: 'plan-tune',
question_id: 'state-root-test',
question_summary: 'Test STATE_ROOT honoring',
category: 'clarification',
door_type: 'two-way',
options_count: 2,
user_choice: 'a',
recommended: 'a',
session_id: 'state-root-test-session',
};
describe('gstack-question-log honors GSTACK_STATE_ROOT', () => {
test('STATE_ROOT set, HOME unset → writes under STATE_ROOT', () => {
const r = runBin(BIN_LOG, [JSON.stringify(SAMPLE_LOG)], {
GSTACK_STATE_ROOT: stateRoot,
GSTACK_HOME: undefined,
});
expect(r.status).toBe(0);
// The slug is derived from cwd; just check at least one log file exists.
const projectDirs = fs.readdirSync(path.join(stateRoot, 'projects'));
expect(projectDirs.length).toBeGreaterThanOrEqual(1);
const logPath = path.join(stateRoot, 'projects', projectDirs[0], 'question-log.jsonl');
expect(fs.existsSync(logPath)).toBe(true);
});
test('STATE_ROOT wins over HOME when both set', () => {
const r = runBin(BIN_LOG, [JSON.stringify(SAMPLE_LOG)], {
GSTACK_STATE_ROOT: stateRoot,
GSTACK_HOME: homeRoot,
});
expect(r.status).toBe(0);
// STATE_ROOT must have the file.
const stateProjects = fs.readdirSync(path.join(stateRoot, 'projects'));
expect(stateProjects.length).toBeGreaterThanOrEqual(1);
// HOME must NOT have a projects dir (or it must be empty).
const homeProjectsPath = path.join(homeRoot, 'projects');
if (fs.existsSync(homeProjectsPath)) {
const homeProjects = fs.readdirSync(homeProjectsPath);
expect(homeProjects.length).toBe(0);
}
});
test('only HOME set → preserves existing behavior (writes under HOME)', () => {
const r = runBin(BIN_LOG, [JSON.stringify(SAMPLE_LOG)], {
GSTACK_STATE_ROOT: undefined,
GSTACK_HOME: homeRoot,
});
expect(r.status).toBe(0);
const homeProjects = fs.readdirSync(path.join(homeRoot, 'projects'));
expect(homeProjects.length).toBeGreaterThanOrEqual(1);
// STATE_ROOT must NOT have anything.
const stateProjectsPath = path.join(stateRoot, 'projects');
if (fs.existsSync(stateProjectsPath)) {
expect(fs.readdirSync(stateProjectsPath).length).toBe(0);
}
});
});
describe('gstack-question-preference honors GSTACK_STATE_ROOT', () => {
test('STATE_ROOT set → preferences file lives under STATE_ROOT', () => {
const write = runBin(
BIN_PREF,
[
'--write',
JSON.stringify({
question_id: 'state-root-pref-test',
preference: 'never-ask',
source: 'plan-tune',
}),
],
{ GSTACK_STATE_ROOT: stateRoot, GSTACK_HOME: undefined },
);
expect(write.status).toBe(0);
const projectDirs = fs.readdirSync(path.join(stateRoot, 'projects'));
expect(projectDirs.length).toBeGreaterThanOrEqual(1);
const prefPath = path.join(stateRoot, 'projects', projectDirs[0], 'question-preferences.json');
expect(fs.existsSync(prefPath)).toBe(true);
const prefs = JSON.parse(fs.readFileSync(prefPath, 'utf-8'));
expect(prefs['state-root-pref-test']).toBe('never-ask');
});
});
describe('gstack-developer-profile honors GSTACK_STATE_ROOT', () => {
test('STATE_ROOT set → profile file lives under STATE_ROOT, not HOME', () => {
// --read creates a stub profile if missing.
const r = runBin(BIN_DEV, ['--read'], {
GSTACK_STATE_ROOT: stateRoot,
GSTACK_HOME: homeRoot,
});
expect(r.status).toBe(0);
expect(fs.existsSync(path.join(stateRoot, 'developer-profile.json'))).toBe(true);
expect(fs.existsSync(path.join(homeRoot, 'developer-profile.json'))).toBe(false);
});
});
+50
View File
@@ -191,6 +191,13 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
// /plan-tune (v1 observational)
'plan-tune-inspect': ['plan-tune/**', 'scripts/question-registry.ts', 'scripts/psychographic-signals.ts', 'scripts/one-way-doors.ts', 'bin/gstack-question-log', 'bin/gstack-question-preference', 'bin/gstack-developer-profile'],
// /plan-tune cathedral (T16 — 5 E2E scenarios, all gate per D12)
'plan-tune-hook-capture': ['hosts/claude/hooks/**', 'bin/gstack-question-log', 'bin/gstack-developer-profile', 'plan-tune/**'],
'plan-tune-enforcement': ['hosts/claude/hooks/**', 'bin/gstack-question-preference', 'scripts/question-registry.ts'],
'plan-tune-annotation': ['hosts/claude/hooks/**', 'scripts/declared-annotation.ts', 'scripts/psychographic-signals.ts', 'scripts/question-registry.ts'],
'plan-tune-codex-import': ['bin/gstack-codex-session-import', 'bin/gstack-question-log', 'docs/spikes/codex-session-format.md'],
'plan-tune-dream-cycle': ['bin/gstack-distill-free-text', 'bin/gstack-distill-apply', 'hosts/claude/hooks/**', 'plan-tune/**'],
// Codex offering verification
'codex-offered-office-hours': ['office-hours/**', 'scripts/gen-skill-docs.ts'],
'codex-offered-ceo-review': ['plan-ceo-review/**', 'scripts/gen-skill-docs.ts'],
@@ -378,6 +385,35 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
// /spec end-to-end via PTY — exercises the full Phase 1→5 pipeline
// including --execute spawn. Periodic-tier — paid + non-deterministic.
'spec-execute': ['spec/**', 'test/skill-e2e-spec-execute.test.ts'],
// /office-hours brain-writeback path under fake gbrain CLI (v1.50.0.0
// T7). Drives /office-hours with a regenerated SKILL.md that has the
// compressed GBRAIN_SAVE_RESULTS block + a fake gbrain on PATH; asserts
// the agent calls `gbrain put office-hours/<slug>` with valid YAML
// frontmatter. Touched by anything that changes resolver output, gen
// pipeline, detection helper, refresh subcommand, or the on-demand
// docs the resolver points to.
'office-hours-brain-writeback': [
'scripts/resolvers/gbrain.ts',
'scripts/gen-skill-docs.ts',
'bin/gstack-gbrain-detect',
'bin/gstack-config',
'office-hours/SKILL.md.tmpl',
'docs/gbrain-write-surfaces.md',
'test/fixtures/office-hours-brain-writeback/**',
'test/skill-e2e-office-hours-brain-writeback.test.ts',
],
// gbrain CLI real round-trip against a local PGLite store (v1.50.0.0
// T11). Proves the gbrain CLI persistence contract gstack relies on —
// a `gbrain put` followed by `gbrain get` returns the body. Skips if
// VOYAGE_API_KEY is unset OR gbrain CLI not on PATH. Touched by the
// resolver (which emits the CLI shape) and the test itself.
'gbrain-roundtrip-local': [
'scripts/resolvers/gbrain.ts',
'test/skill-e2e-gbrain-roundtrip-local.test.ts',
],
};
/**
@@ -425,6 +461,13 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
// Office Hours
'office-hours-spec-review': 'gate',
// Brain-writeback E2E — periodic per cost (claude -p) + non-deterministic
// (model interprets the gbrain instruction). Matches nearby
// setup-gbrain-path4-* tier classification.
'office-hours-brain-writeback': 'periodic',
// GBrain CLI round-trip — periodic per Voyage embedding cost (~$0.001/run)
// and external-API-dependency (skips cleanly if VOYAGE_API_KEY unset).
'gbrain-roundtrip-local': 'periodic',
'office-hours-forcing-energy': 'gate', // V1.1 mode-posture regression gate (Sonnet generator)
// 'office-hours-builder-wildness' retiered to periodic in v1.32 contributor
// wave: this is an LLM-judge creativity score (axis_a ≥4 on a "wildness"
@@ -528,6 +571,13 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
// /plan-tune — gate (core v1 DX promise: plain-English intent routing)
'plan-tune-inspect': 'gate',
// /plan-tune cathedral (T16 per D12 — all gate)
'plan-tune-hook-capture': 'gate',
'plan-tune-enforcement': 'gate',
'plan-tune-annotation': 'gate',
'plan-tune-codex-import': 'gate',
'plan-tune-dream-cycle': 'gate',
// Codex offering verification
'codex-offered-office-hours': 'gate',
'codex-offered-ceo-review': 'gate',
+220
View File
@@ -0,0 +1,220 @@
/**
* Layer 8 memory cache + injection (plan-tune cathedral T12).
*
* Verifies the PreToolUse hook reads ~/.gstack/free-text-memory.json and
* surfaces matching nuggets via additionalContext on the hook response.
* Cache: per-session memory-cache.json populated on first read, sub-1ms
* thereafter (D13 perf).
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { spawnSync } from 'child_process';
const ROOT = path.resolve(import.meta.dir, '..');
const HOOK = path.join(ROOT, 'hosts', 'claude', 'hooks', 'question-preference-hook');
let stateRoot: string;
let fixtureCwd: string;
let cwdSlug: string;
beforeEach(() => {
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-memcache-'));
cwdSlug = 'memcache-fixture';
fixtureCwd = path.join(stateRoot, cwdSlug);
fs.mkdirSync(fixtureCwd, { recursive: true });
});
afterEach(() => {
fs.rmSync(stateRoot, { recursive: true, force: true });
});
function writeMemory(nuggets: Array<{ nugget: string; applies_to_signal_keys: string[]; applied_at?: string }>) {
fs.writeFileSync(path.join(stateRoot, 'free-text-memory.json'), JSON.stringify({ nuggets }));
}
function runHook(stdin: object): { stdout: string; stderr: string; status: number; parsed: any } {
const env: Record<string, string> = {};
for (const [k, v] of Object.entries(process.env)) {
if (v !== undefined) env[k] = v;
}
env.GSTACK_STATE_ROOT = stateRoot;
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
delete env.GSTACK_HOME;
const res = spawnSync(HOOK, [], {
env,
input: JSON.stringify({ ...stdin, cwd: fixtureCwd }),
encoding: 'utf-8',
cwd: ROOT,
});
let parsed: any = null;
try { parsed = JSON.parse(res.stdout || '{}'); } catch {}
return {
stdout: res.stdout ?? '',
stderr: res.stderr ?? '',
status: res.status ?? -1,
parsed,
};
}
// ----------------------------------------------------------------------
// Injection behavior
// ----------------------------------------------------------------------
describe('memory injection', () => {
test('injects matching nugget into additionalContext on defer', () => {
writeMemory([
{
nugget: 'User prefers verbose explanations with tradeoffs',
applies_to_signal_keys: ['detail-preference'],
applied_at: '2026-05-01T00:00:00Z',
},
]);
// ship-todos-reorganize has signal_key 'detail-preference' per registry.
const r = runHook({
session_id: 's1',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-1',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-todos-reorganize> Reorganize?',
options: ['A) Accept (recommended)', 'B) Skip'],
},
],
},
});
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
expect(r.parsed?.hookSpecificOutput?.additionalContext).toContain('verbose explanations');
});
test('does not inject when no nugget matches the signal_key', () => {
writeMemory([
{
nugget: 'Unrelated nugget',
applies_to_signal_keys: ['totally-different-key'],
},
]);
const r = runHook({
session_id: 's2',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-2',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-todos-reorganize> Reorganize?',
options: ['A) Accept (recommended)', 'B) Skip'],
},
],
},
});
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
expect(r.parsed?.hookSpecificOutput?.additionalContext).toBeUndefined();
});
test('caps to 3 most-recent nuggets when many match', () => {
writeMemory([
{ nugget: 'old-1', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-01-01T00:00:00Z' },
{ nugget: 'old-2', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-02-01T00:00:00Z' },
{ nugget: 'old-3', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-03-01T00:00:00Z' },
{ nugget: 'old-4', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-04-01T00:00:00Z' },
{ nugget: 'newest', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-05-01T00:00:00Z' },
]);
const r = runHook({
session_id: 's3',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-3',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-todos-reorganize> Reorganize?',
options: ['A) Accept (recommended)', 'B) Skip'],
},
],
},
});
const ctx = r.parsed?.hookSpecificOutput?.additionalContext || '';
expect(ctx).toContain('newest');
expect(ctx).toContain('old-4');
expect(ctx).toContain('old-3');
expect(ctx).not.toContain('old-1');
});
test('memory injection works alongside deny enforcement', () => {
writeMemory([
{
nugget: 'User prefers reorganizing for clarity',
applies_to_signal_keys: ['detail-preference'],
applied_at: '2026-05-01T00:00:00Z',
},
]);
// Set a never-ask preference and check both deny AND memory are surfaced.
fs.mkdirSync(path.join(stateRoot, 'projects', cwdSlug), { recursive: true });
fs.writeFileSync(
path.join(stateRoot, 'projects', cwdSlug, 'question-preferences.json'),
JSON.stringify({ 'ship-todos-reorganize': 'never-ask' }),
);
const r = runHook({
session_id: 's4',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-4',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-todos-reorganize> Reorganize?',
options: ['A) Accept (recommended)', 'B) Skip'],
},
],
},
});
// ship-todos-reorganize is two-way per registry — enforcement should fire.
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('deny');
expect(r.parsed?.hookSpecificOutput?.permissionDecisionReason).toContain('plan-tune auto-decide');
// Memory context isn't injected on deny path (it's already in the reason),
// but the deny reason should mention the auto-decision clearly.
});
});
// ----------------------------------------------------------------------
// Cache behavior
// ----------------------------------------------------------------------
describe('per-session memory cache', () => {
test('first read writes cache; subsequent reads use cache', () => {
writeMemory([
{ nugget: 'cached nugget', applies_to_signal_keys: ['detail-preference'] },
]);
runHook({
session_id: 'cache-test',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-c1',
tool_input: {
questions: [
{ question: '<gstack-qid:ship-todos-reorganize> Q', options: ['A', 'B'] },
],
},
});
const cachePath = path.join(stateRoot, 'sessions', 'cache-test', 'memory-cache.json');
expect(fs.existsSync(cachePath)).toBe(true);
const cached = JSON.parse(fs.readFileSync(cachePath, 'utf-8'));
expect(cached.nuggets).toHaveLength(1);
expect(cached.nuggets[0].nugget).toBe('cached nugget');
});
test('cache miss when canonical file empty/missing → empty nuggets', () => {
const r = runHook({
session_id: 'empty',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-e',
tool_input: {
questions: [
{ question: '<gstack-qid:ship-todos-reorganize> Q', options: ['A', 'B'] },
],
},
});
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
expect(r.parsed?.hookSpecificOutput?.additionalContext).toBeUndefined();
});
});
+212
View File
@@ -0,0 +1,212 @@
/**
* Plan-tune v1.49 gate regression tests.
*
* v1.49 shipped two prose-driven implicit gates inside plan-tune/SKILL.md.tmpl
* Step 0:
* - Consent gate: question_tuning=false AND ~/.gstack/.question-tuning-prompted missing
* → run "Consent + opt-in".
* - Setup gate: question_tuning=true AND declared empty AND
* ~/.gstack/.declared-setup-prompted missing → run "5-Q setup".
*
* The gates are evaluated by the agent reading the template's bash + prose.
* The cathedral (T5/T6) replaces enforcement with hooks, but it must NOT break
* these v1.49 gates — they're the only path from "feature off" to "feature on"
* for first-time users.
*
* Three regression tests, all FREE tier, IRON RULE (no opt-out):
* 1. consent-gate fires under the right conditions and stops re-firing after marker.
* 2. setup-gate fires under the right conditions and stops re-firing after marker.
* 3. marker idempotency: re-invoking after either decision produces zero re-prompts.
*
* Strategy: exercise the helpers the gates depend on (gstack-config get,
* developer-profile.json schema, marker file paths). If those break, the
* gates break. Plus a static-template assertion so the gate language can't
* be silently deleted from the template.
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { spawnSync } from 'child_process';
const ROOT = path.resolve(import.meta.dir, '..');
const BIN_CONFIG = path.join(ROOT, 'bin', 'gstack-config');
const BIN_DEV = path.join(ROOT, 'bin', 'gstack-developer-profile');
const SKILL_TMPL = path.join(ROOT, 'plan-tune', 'SKILL.md.tmpl');
let stateRoot: string;
beforeEach(() => {
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-gate-'));
});
afterEach(() => {
fs.rmSync(stateRoot, { recursive: true, force: true });
});
function runBin(
bin: string,
args: string[],
): { stdout: string; stderr: string; status: number } {
const env: Record<string, string> = {};
for (const [k, v] of Object.entries(process.env)) {
if (v !== undefined) env[k] = v;
}
env.GSTACK_STATE_ROOT = stateRoot;
delete env.GSTACK_HOME;
const res = spawnSync(bin, args, { env, encoding: 'utf-8', cwd: ROOT });
return {
stdout: res.stdout ?? '',
stderr: res.stderr ?? '',
status: res.status ?? -1,
};
}
/**
* Simulate the consent-gate check as the agent would evaluate it from
* the template's Step 0 prose. Mirrors exactly the conditions in
* plan-tune/SKILL.md.tmpl §"Implicit gates run first" → "Consent gate."
*/
function evaluateConsentGate(): boolean {
const qt = runBin(BIN_CONFIG, ['get', 'question_tuning']).stdout.trim() || 'false';
const markerPath = path.join(stateRoot, '.question-tuning-prompted');
return qt === 'false' && !fs.existsSync(markerPath);
}
/**
* Simulate the setup-gate check. Mirrors plan-tune/SKILL.md.tmpl §"Setup gate."
*/
function evaluateSetupGate(): boolean {
const qt = runBin(BIN_CONFIG, ['get', 'question_tuning']).stdout.trim() || 'false';
const profilePath = path.join(stateRoot, 'developer-profile.json');
let declaredEmpty = true;
if (fs.existsSync(profilePath)) {
const profile = JSON.parse(fs.readFileSync(profilePath, 'utf-8'));
declaredEmpty = !profile.declared || Object.keys(profile.declared).length === 0;
}
const markerPath = path.join(stateRoot, '.declared-setup-prompted');
return qt === 'true' && declaredEmpty && !fs.existsSync(markerPath);
}
// ---------------------------------------------------------------
// Test 1: consent gate fires + idempotent on marker write
// ---------------------------------------------------------------
describe('v1.49 consent gate', () => {
test('fires when question_tuning=false AND no marker', () => {
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
expect(evaluateConsentGate()).toBe(true);
});
test('does NOT fire after marker is written (decline path)', () => {
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
fs.writeFileSync(path.join(stateRoot, '.question-tuning-prompted'), '');
expect(evaluateConsentGate()).toBe(false);
});
test('does NOT fire after question_tuning flipped to true (accept path)', () => {
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
expect(evaluateConsentGate()).toBe(false);
});
});
// ---------------------------------------------------------------
// Test 2: setup gate fires + idempotent on marker write
// ---------------------------------------------------------------
describe('v1.49 setup gate', () => {
test('fires when question_tuning=true AND declared empty AND no marker', () => {
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
// --read creates a stub profile with empty declared.
runBin(BIN_DEV, ['--read']);
expect(evaluateSetupGate()).toBe(true);
});
test('does NOT fire after declared populated (post-setup)', () => {
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
runBin(BIN_DEV, ['--read']);
// Simulate setup completion: populate declared.
const profilePath = path.join(stateRoot, 'developer-profile.json');
const profile = JSON.parse(fs.readFileSync(profilePath, 'utf-8'));
profile.declared = {
scope_appetite: 0.85,
risk_tolerance: 0.7,
detail_preference: 0.5,
autonomy: 0.5,
architecture_care: 0.85,
};
fs.writeFileSync(profilePath, JSON.stringify(profile, null, 2));
expect(evaluateSetupGate()).toBe(false);
});
test('does NOT fire after marker is written even if declared still empty (bail path)', () => {
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
runBin(BIN_DEV, ['--read']);
fs.writeFileSync(path.join(stateRoot, '.declared-setup-prompted'), '');
expect(evaluateSetupGate()).toBe(false);
});
test('does NOT fire when question_tuning still false (consent comes first)', () => {
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
runBin(BIN_DEV, ['--read']);
expect(evaluateSetupGate()).toBe(false);
});
});
// ---------------------------------------------------------------
// Test 3: marker idempotency across re-invocations
// ---------------------------------------------------------------
describe('v1.49 marker idempotency', () => {
test('consent gate stays silent across 5 re-invocations after one decline', () => {
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
fs.writeFileSync(path.join(stateRoot, '.question-tuning-prompted'), '');
for (let i = 0; i < 5; i++) {
expect(evaluateConsentGate()).toBe(false);
}
});
test('setup gate stays silent across 5 re-invocations after one bail', () => {
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
runBin(BIN_DEV, ['--read']);
fs.writeFileSync(path.join(stateRoot, '.declared-setup-prompted'), '');
for (let i = 0; i < 5; i++) {
expect(evaluateSetupGate()).toBe(false);
}
});
test('both markers honored independently', () => {
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
runBin(BIN_DEV, ['--read']);
// Touch consent marker only; setup gate should still fire.
fs.writeFileSync(path.join(stateRoot, '.question-tuning-prompted'), '');
expect(evaluateConsentGate()).toBe(false);
expect(evaluateSetupGate()).toBe(true);
});
});
// ---------------------------------------------------------------
// Test 4: static-template assertion (catches accidental deletion of gate prose)
// ---------------------------------------------------------------
describe('v1.49 gate prose survives in skill template', () => {
const tmpl = fs.readFileSync(SKILL_TMPL, 'utf-8');
test('Consent gate condition is present', () => {
expect(tmpl).toMatch(/Consent gate/i);
expect(tmpl).toMatch(/question-tuning-prompted/);
expect(tmpl).toMatch(/question_tuning.*false/);
});
test('Setup gate condition is present', () => {
expect(tmpl).toMatch(/Setup gate/i);
expect(tmpl).toMatch(/declared-setup-prompted/);
expect(tmpl).toMatch(/declared.*empty/i);
});
test('marker writes documented for both gates', () => {
expect(tmpl).toMatch(/touch.*question-tuning-prompted/);
expect(tmpl).toMatch(/touch.*declared-setup-prompted/);
});
});
+285
View File
@@ -0,0 +1,285 @@
/**
* PostToolUse hook (plan-tune cathedral T5) — unit tests.
*
* Feeds the hook synthetic Claude Code hook payloads via stdin and asserts
* the resulting question-log.jsonl reflects the right schema. Covers:
* - Marker-first question_id (D18 progressive markers)
* - Hash fallback when no marker
* - source=hook tagging
* - source=auq-other when free_text present
* - Dedup on (source, tool_use_id) composite (D3)
* - Hook exits 0 even on malformed input (never blocks user session)
* - mcp__*__AskUserQuestion matcher acceptance
* - "(recommended)" label parse → recommended field populated
* - Refuse-on-ambiguous: two (recommended) labels → recommended omitted
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { spawnSync } from 'child_process';
const ROOT = path.resolve(import.meta.dir, '..');
const HOOK = path.join(ROOT, 'hosts', 'claude', 'hooks', 'question-log-hook');
let stateRoot: string;
beforeEach(() => {
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-hooklog-'));
// Pre-create slug-resolved project dir so the bin's gstack-slug doesn't
// recompute every time.
});
afterEach(() => {
fs.rmSync(stateRoot, { recursive: true, force: true });
});
function runHook(stdin: object): { stdout: string; stderr: string; status: number } {
const env: Record<string, string> = {};
for (const [k, v] of Object.entries(process.env)) {
if (v !== undefined) env[k] = v;
}
env.GSTACK_STATE_ROOT = stateRoot;
delete env.GSTACK_HOME;
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
const res = spawnSync(HOOK, [], {
env,
input: JSON.stringify(stdin),
encoding: 'utf-8',
cwd: ROOT,
});
return {
stdout: res.stdout ?? '',
stderr: res.stderr ?? '',
status: res.status ?? -1,
};
}
function readLog(): Array<Record<string, unknown>> {
const projectDirs = fs.existsSync(path.join(stateRoot, 'projects'))
? fs.readdirSync(path.join(stateRoot, 'projects'))
: [];
const all: Array<Record<string, unknown>> = [];
for (const d of projectDirs) {
const f = path.join(stateRoot, 'projects', d, 'question-log.jsonl');
if (!fs.existsSync(f)) continue;
const lines = fs.readFileSync(f, 'utf-8').trim().split('\n').filter(Boolean);
for (const l of lines) {
try {
all.push(JSON.parse(l));
} catch {
// skip malformed
}
}
}
return all;
}
// ----------------------------------------------------------------------
// Native AskUserQuestion capture
// ----------------------------------------------------------------------
describe('PostToolUse hook (native AskUserQuestion)', () => {
test('captures one event per question with source=hook and tool_use_id', () => {
const r = runHook({
session_id: 'sess1',
hook_event_name: 'PostToolUse',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-1',
tool_input: {
questions: [
{
question: 'D1 — Test capture\nRecommendation: A',
options: ['A) Accept (recommended)', 'B) Reject'],
multiSelect: false,
},
],
},
tool_response: {
answers: [{ option_label: 'A) Accept (recommended)' }],
},
cwd: ROOT,
});
expect(r.status).toBe(0);
const events = readLog();
expect(events.length).toBe(1);
expect(events[0].source).toBe('hook');
expect(events[0].tool_use_id).toBe('tu-1');
expect(events[0].session_id).toBe('sess1');
expect(typeof events[0].question_id).toBe('string');
expect((events[0].question_id as string).startsWith('hook-')).toBe(true);
expect(events[0].user_choice).toContain('Accept');
// Recommended parsed from (recommended) label
expect(events[0].recommended).toContain('Accept');
});
test('marker-first question_id when <gstack-qid:foo> present', () => {
runHook({
session_id: 'sess2',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-2',
tool_input: {
questions: [
{
question: 'D2 — Marker test <gstack-qid:ship-test-failure-triage>\nRecommendation: A',
options: ['A) Fix now (recommended)', 'B) Investigate', 'C) Ack and ship'],
},
],
},
tool_response: { answers: [{ option_label: 'A) Fix now (recommended)' }] },
cwd: ROOT,
});
const events = readLog();
expect(events.length).toBe(1);
expect(events[0].question_id).toBe('ship-test-failure-triage');
// Marker stripped from summary
expect((events[0].question_summary as string).includes('<gstack-qid:')).toBe(false);
});
});
// ----------------------------------------------------------------------
// MCP AskUserQuestion variant (Conductor)
// ----------------------------------------------------------------------
describe('PostToolUse hook (mcp__*__AskUserQuestion variant)', () => {
test('accepts mcp__conductor__AskUserQuestion tool_name', () => {
const r = runHook({
session_id: 'sess3',
tool_name: 'mcp__conductor__AskUserQuestion',
tool_use_id: 'tu-3',
tool_input: {
questions: [{ question: 'Test', options: ['A', 'B'] }],
},
tool_response: { answers: [{ option_label: 'A' }] },
cwd: ROOT,
});
expect(r.status).toBe(0);
expect(readLog().length).toBe(1);
});
test('ignores unrelated tool_name (defensive)', () => {
const r = runHook({
session_id: 'sess4',
tool_name: 'Bash',
tool_use_id: 'tu-4',
tool_input: {},
cwd: ROOT,
});
expect(r.status).toBe(0);
expect(readLog().length).toBe(0);
});
});
// ----------------------------------------------------------------------
// Free-text capture (Layer 8 dream cycle)
// ----------------------------------------------------------------------
describe('PostToolUse hook (free-text "Other" responses)', () => {
test('source=auq-other and free_text populated when user types free text', () => {
runHook({
session_id: 'sess5',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-5',
tool_input: {
questions: [{ question: 'D5 — Other test', options: ['A', 'B'] }],
},
tool_response: {
answers: [
{
option_label: 'Other',
free_text: 'I always include tests with new features',
},
],
},
cwd: ROOT,
});
const events = readLog();
expect(events.length).toBe(1);
expect(events[0].source).toBe('auq-other');
expect(events[0].free_text).toContain('always include tests');
});
});
// ----------------------------------------------------------------------
// Dedup
// ----------------------------------------------------------------------
describe('PostToolUse hook (dedup on source + tool_use_id)', () => {
test('second fire with same (source, tool_use_id) is dropped', () => {
const payload = {
session_id: 'sess6',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-6',
tool_input: { questions: [{ question: 'Dedup test', options: ['A'] }] },
tool_response: { answers: [{ option_label: 'A' }] },
cwd: ROOT,
};
runHook(payload);
runHook(payload);
expect(readLog().length).toBe(1);
});
});
// ----------------------------------------------------------------------
// Refuse-on-ambiguous (D2 safety)
// ----------------------------------------------------------------------
describe('PostToolUse hook (recommended parser safety)', () => {
test('two (recommended) labels → recommended field omitted', () => {
runHook({
session_id: 'sess7',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-7',
tool_input: {
questions: [
{
question: 'Ambiguous test',
options: ['A) Foo (recommended)', 'B) Bar (recommended)'],
},
],
},
tool_response: { answers: [{ option_label: 'A) Foo (recommended)' }] },
cwd: ROOT,
});
const events = readLog();
expect(events.length).toBe(1);
expect(events[0].recommended).toBeUndefined();
});
});
// ----------------------------------------------------------------------
// Crash safety
// ----------------------------------------------------------------------
describe('PostToolUse hook (crash safety)', () => {
test('exits 0 on empty stdin', () => {
const env: Record<string, string> = {};
for (const [k, v] of Object.entries(process.env)) {
if (v !== undefined) env[k] = v;
}
env.GSTACK_STATE_ROOT = stateRoot;
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
const res = spawnSync(HOOK, [], { env, input: '', encoding: 'utf-8' });
expect(res.status).toBe(0);
});
test('exits 0 on malformed JSON', () => {
const env: Record<string, string> = {};
for (const [k, v] of Object.entries(process.env)) {
if (v !== undefined) env[k] = v;
}
env.GSTACK_STATE_ROOT = stateRoot;
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
const res = spawnSync(HOOK, [], {
env,
input: 'not json',
encoding: 'utf-8',
});
expect(res.status).toBe(0);
// Error logged to hook-errors.log
const errLog = path.join(stateRoot, 'hook-errors.log');
expect(fs.existsSync(errLog)).toBe(true);
expect(fs.readFileSync(errLog, 'utf-8')).toContain('stdin parse failed');
});
});
+385
View File
@@ -0,0 +1,385 @@
/**
* PreToolUse enforcement hook (plan-tune cathedral T6) — unit tests.
*
* Covers:
* - never-ask + marker + two-way + clean recommendation → deny+reason
* - never-ask + no marker → defer (D18 marker gate)
* - never-ask + one-way → defer (safety override)
* - never-ask + ambiguous recommendation → defer (D2 refuse-on-ambiguous)
* - always-ask → defer
* - no preference → defer
* - project preference wins over global (D8 precedence)
* - global preference applies when no project preference set
* - mcp__*__AskUserQuestion matcher accepted
* - empty stdin → defer (crash safety)
* - auto-decided event logged via gstack-question-log (PostToolUse won't fire)
* - auto-decided marker written to ~/.gstack/sessions/<id>/.auto-decided-<tool_use_id>
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { spawnSync } from 'child_process';
const ROOT = path.resolve(import.meta.dir, '..');
const HOOK = path.join(ROOT, 'hosts', 'claude', 'hooks', 'question-preference-hook');
let stateRoot: string;
let cwdSlug: string;
let fixtureCwd: string;
beforeEach(() => {
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-prefhook-'));
cwdSlug = 'fixture-slug';
fs.mkdirSync(path.join(stateRoot, 'projects', cwdSlug), { recursive: true });
// Real directory that the hook can chdir() into. gstack-slug derives the
// slug from the basename of this cwd (no .git => basename fallback path).
fixtureCwd = path.join(stateRoot, cwdSlug);
fs.mkdirSync(fixtureCwd, { recursive: true });
});
afterEach(() => {
fs.rmSync(stateRoot, { recursive: true, force: true });
});
function writeProjectPref(questionId: string, preference: string): void {
const f = path.join(stateRoot, 'projects', cwdSlug, 'question-preferences.json');
let prefs: Record<string, string> = {};
if (fs.existsSync(f)) prefs = JSON.parse(fs.readFileSync(f, 'utf-8'));
prefs[questionId] = preference;
fs.writeFileSync(f, JSON.stringify(prefs, null, 2));
}
function writeGlobalPref(questionId: string, preference: string): void {
const f = path.join(stateRoot, 'global-question-preferences.json');
let prefs: Record<string, string> = {};
if (fs.existsSync(f)) prefs = JSON.parse(fs.readFileSync(f, 'utf-8'));
prefs[questionId] = preference;
fs.writeFileSync(f, JSON.stringify(prefs, null, 2));
}
function runHook(stdin: object, cwd?: string): {
stdout: string;
stderr: string;
status: number;
parsed: any;
} {
const env: Record<string, string> = {};
for (const [k, v] of Object.entries(process.env)) {
if (v !== undefined) env[k] = v;
}
env.GSTACK_STATE_ROOT = stateRoot;
delete env.GSTACK_HOME;
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
const res = spawnSync(HOOK, [], {
env,
input: JSON.stringify({ ...stdin, cwd: cwd || fixtureCwd }),
encoding: 'utf-8',
cwd: ROOT,
});
let parsed: any = null;
try { parsed = JSON.parse(res.stdout || '{}'); } catch {}
return {
stdout: res.stdout ?? '',
stderr: res.stderr ?? '',
status: res.status ?? -1,
parsed,
};
}
function autoDecidedEvents(): Array<Record<string, unknown>> {
const f = path.join(stateRoot, 'projects', cwdSlug, 'question-log.jsonl');
if (!fs.existsSync(f)) return [];
return fs
.readFileSync(f, 'utf-8')
.trim()
.split('\n')
.filter(Boolean)
.map((l) => JSON.parse(l))
.filter((e) => e.source === 'auto-decided');
}
// ----------------------------------------------------------------------
// Defer paths
// ----------------------------------------------------------------------
describe('defers (no enforcement)', () => {
test('no preference set → defer', () => {
const r = runHook({
session_id: 's1',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-1',
tool_input: {
questions: [
{ question: '<gstack-qid:test-q> Need approval?', options: ['A) Yes (recommended)', 'B) No'] },
],
},
});
expect(r.status).toBe(0);
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
});
test('marker missing → defer (D18)', () => {
writeProjectPref('test-q', 'never-ask');
const r = runHook({
session_id: 's2',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-2',
tool_input: {
questions: [
{ question: 'No marker here', options: ['A) Yes (recommended)', 'B) No'] },
],
},
});
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
});
test('always-ask preference → defer', () => {
writeProjectPref('test-q', 'always-ask');
const r = runHook({
session_id: 's3',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-3',
tool_input: {
questions: [
{ question: '<gstack-qid:test-q> Yes?', options: ['A) Yes (recommended)', 'B) No'] },
],
},
});
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
});
test('empty stdin → defer (crash safety)', () => {
const env: Record<string, string> = {};
for (const [k, v] of Object.entries(process.env)) {
if (v !== undefined) env[k] = v;
}
env.GSTACK_STATE_ROOT = stateRoot;
const res = spawnSync(HOOK, [], { env, input: '', encoding: 'utf-8' });
expect(res.status).toBe(0);
const parsed = JSON.parse(res.stdout || '{}');
expect(parsed.hookSpecificOutput?.permissionDecision).toBe('defer');
});
test('non-AUQ tool_name → defer (defensive)', () => {
writeProjectPref('test-q', 'never-ask');
const r = runHook({ session_id: 's4', tool_name: 'Bash', tool_use_id: 'tu-4', tool_input: {} });
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
});
});
// ----------------------------------------------------------------------
// Enforcement paths (deny+reason)
// ----------------------------------------------------------------------
describe('enforces never-ask preferences', () => {
test('marker + never-ask + two-way + clean recommendation → deny', () => {
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
const r = runHook({
session_id: 's5',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-5',
tool_input: {
questions: [
{
question:
'<gstack-qid:ship-pre-landing-review-fix> Pre-landing review flagged issue.',
options: ['A) Fix now (recommended)', 'B) Skip'],
},
],
},
});
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('deny');
expect(r.parsed?.hookSpecificOutput?.permissionDecisionReason).toContain('plan-tune auto-decide');
expect(r.parsed?.hookSpecificOutput?.permissionDecisionReason).toContain('Fix now');
});
test('one-way door → defer even with never-ask (safety override)', () => {
writeProjectPref('ship-test-failure-triage', 'never-ask');
const r = runHook({
session_id: 's6',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-6',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-test-failure-triage> Tests failed.',
options: ['A) Fix now (recommended)', 'B) Investigate', 'C) Ack and ship'],
},
],
},
});
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
});
test('ambiguous recommendation (two labels) → defer (D2 refuse-on-ambiguous)', () => {
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
const r = runHook({
session_id: 's7',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-7',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-pre-landing-review-fix> Ambiguous',
options: ['A) Fix now (recommended)', 'B) Skip (recommended)'],
},
],
},
});
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
});
test('no recommendation marker AND no prose match → defer', () => {
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
const r = runHook({
session_id: 's8',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-8',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-pre-landing-review-fix> No rec',
options: ['A) Foo', 'B) Bar'],
},
],
},
});
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
});
});
// ----------------------------------------------------------------------
// Precedence (D8)
// ----------------------------------------------------------------------
describe('precedence: project wins over global (D8)', () => {
test('project never-ask + global always-ask → enforce never-ask', () => {
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
writeGlobalPref('ship-pre-landing-review-fix', 'always-ask');
const r = runHook({
session_id: 's9',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-9',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-pre-landing-review-fix> P?',
options: ['A) Fix (recommended)', 'B) Skip'],
},
],
},
});
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('deny');
});
test('only global never-ask → enforce (fallback path)', () => {
writeGlobalPref('ship-pre-landing-review-fix', 'never-ask');
const r = runHook({
session_id: 's10',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-10',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-pre-landing-review-fix> P?',
options: ['A) Fix (recommended)', 'B) Skip'],
},
],
},
});
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('deny');
});
test('project always-ask + global never-ask → defer (project wins)', () => {
writeProjectPref('ship-pre-landing-review-fix', 'always-ask');
writeGlobalPref('ship-pre-landing-review-fix', 'never-ask');
const r = runHook({
session_id: 's11',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-11',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-pre-landing-review-fix> P?',
options: ['A) Fix (recommended)', 'B) Skip'],
},
],
},
});
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
});
});
// ----------------------------------------------------------------------
// MCP matcher acceptance
// ----------------------------------------------------------------------
describe('MCP variant', () => {
test('mcp__conductor__AskUserQuestion accepted and enforced', () => {
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
const r = runHook({
session_id: 's12',
tool_name: 'mcp__conductor__AskUserQuestion',
tool_use_id: 'tu-12',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-pre-landing-review-fix> P?',
options: ['A) Fix (recommended)', 'B) Skip'],
},
],
},
});
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('deny');
});
});
// ----------------------------------------------------------------------
// Auto-decided event logging (since PostToolUse never fires on deny)
// ----------------------------------------------------------------------
describe('auto-decided event tagging', () => {
test('logs source=auto-decided event when enforcing', () => {
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
runHook({
session_id: 's13',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-13',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-pre-landing-review-fix> P?',
options: ['A) Fix (recommended)', 'B) Skip'],
},
],
},
}, fixtureCwd);
const events = autoDecidedEvents();
expect(events.length).toBe(1);
expect(events[0].question_id).toBe('ship-pre-landing-review-fix');
expect(events[0].user_choice).toContain('Fix');
expect(events[0].tool_use_id).toBe('tu-13');
});
test('writes .auto-decided-<tool_use_id> marker for PostToolUse coordination', () => {
writeProjectPref('ship-pre-landing-review-fix', 'never-ask');
runHook({
session_id: 's14',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-14',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-pre-landing-review-fix> P?',
options: ['A) Fix (recommended)', 'B) Skip'],
},
],
},
});
const markerPath = path.join(stateRoot, 'sessions', 's14', '.auto-decided-tu-14');
expect(fs.existsSync(markerPath)).toBe(true);
});
});
+10 -3
View File
@@ -35,11 +35,18 @@ function listTrackedSkillMd(): string[] {
return out.split("\n").filter((line) => line.trim().length > 0);
}
describe("scripts/resolvers/gbrain.ts — no put_page in emitted instructions (regression for #1346)", () => {
it("resolver source ships only `gbrain put` instructions, not the renamed `put_page`", () => {
describe("scripts/resolvers/gbrain.ts — no `gbrain put_page` CLI subcommand in emitted instructions (regression for #1346)", () => {
it("resolver source ships only `gbrain put` CLI instructions, not the renamed `gbrain put_page`", () => {
// We're guarding against the v0.18 CLI subcommand rename
// (`gbrain put_page <slug>` → `gbrain put <slug>`). The MCP op
// `mcp__gbrain__put_page` is a legitimately separate identifier (the
// MCP-layer write op, unrelated to the CLI rename) and may still
// appear in resolver output as a fallback reference for the
// calibration-take write-back path. So check the CLI subcommand
// shape specifically: `gbrain put_page` with a space.
const src = readFileSync(RESOLVER_PATH, "utf-8");
const stripped = stripComments(src);
expect(stripped).not.toContain("put_page");
expect(stripped).not.toContain("gbrain put_page");
});
it("every tracked SKILL.md file is free of the renamed gbrain put_page subcommand", () => {
+137
View File
@@ -0,0 +1,137 @@
/**
* Resolver regression pin for generateGBrainSaveResults +
* generateGBrainContextLoad (compressed in v1.50.0.0).
*
* Two coverage stories:
* 1. **Wiring symmetry**: all 5 planning skills (office-hours, plan-ceo-review,
* plan-eng-review, plan-design-review, plan-devex-review) get the correct
* slug prefix + tag in the emitted save instructions.
* 2. **Token-budget pin**: post-compression, each block stays under a chars
* ceiling so a future "let me just add one more line" refactor doesn't
* silently re-inflate the prompt cost back toward the ~1000-token
* naive-un-suppression baseline.
*
* Gate-tier, free, pure import + render — no host generation, no claude -p.
*/
import { describe, test, expect } from 'bun:test';
import {
generateGBrainContextLoad,
generateGBrainSaveResults,
} from '../scripts/resolvers/gbrain';
import { HOST_PATHS } from '../scripts/resolvers/types';
import type { TemplateContext } from '../scripts/resolvers/types';
function buildCtx(skillName: string): TemplateContext {
return {
skillName,
tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
host: 'claude',
paths: HOST_PATHS.claude,
};
}
// Per-skill expected slug prefix + tag. If you add a new planning skill,
// add it here AND in scripts/resolvers/gbrain.ts skillSaveMap. If you rename
// one, this test will fail loudly — that's the regression pin working.
const PLANNING_SKILLS: Array<{ skill: string; slugPrefix: string; tag: string; title: string }> = [
{ skill: 'office-hours', slugPrefix: 'office-hours/', tag: 'design-doc', title: 'Office Hours' },
{ skill: 'plan-ceo-review', slugPrefix: 'ceo-plans/', tag: 'ceo-plan', title: 'CEO Plan' },
{ skill: 'plan-eng-review', slugPrefix: 'eng-reviews/', tag: 'eng-review', title: 'Eng Review' },
{ skill: 'plan-design-review', slugPrefix: 'design-reviews/', tag: 'design-review', title: 'Design Review' },
{ skill: 'plan-devex-review', slugPrefix: 'devex-reviews/', tag: 'devex-review', title: 'Devex Review' },
];
describe('generateGBrainSaveResults — wiring + compression pin', () => {
test.each(PLANNING_SKILLS)(
'$skill emits gbrain put $slugPrefix... with $tag tag',
({ skill, slugPrefix, tag, title }) => {
const out = generateGBrainSaveResults(buildCtx(skill));
// Uses gbrain put (v0.18+ subcommand), not deprecated put_page MCP op.
expect(out).toContain('gbrain put');
expect(out).not.toContain('put_page');
// Per-skill slug prefix is exactly what skillSaveMap declares.
expect(out).toContain(`"${slugPrefix}<feature-slug>"`);
// Title prefix + tag match the metadata.
expect(out).toContain(`title: "${title}:`);
expect(out).toContain(`tags: [${tag},`);
// Skip-header is present so agent can short-circuit when gbrain is absent.
expect(out).toContain('Skip this entire section if `gbrain` is not on PATH');
// Compact: points to docs/gbrain-write-surfaces.md for full template.
expect(out).toContain('docs/gbrain-write-surfaces.md');
},
);
test('all 5 planning skills produce output under ~600 chars (~150 tokens)', () => {
// Token-budget pin. Naive un-suppression would emit ~1000 tokens (~4000 chars)
// per skill. Compressed target: ~150 tokens (~600 chars). Generous ceiling
// at 750 chars to leave room for the heredoc structure without inviting a
// gradual re-inflation of the prose.
const CEILING_CHARS = 750;
for (const { skill } of PLANNING_SKILLS) {
const out = generateGBrainSaveResults(buildCtx(skill));
if (out.length > CEILING_CHARS) {
throw new Error(
`generateGBrainSaveResults('${skill}') emitted ${out.length} chars (~${Math.round(out.length / 4)} tokens), ` +
`exceeds ceiling of ${CEILING_CHARS} chars (~${Math.round(CEILING_CHARS / 4)} tokens). ` +
`If you added necessary content, move the verbose prose into ` +
`docs/gbrain-write-surfaces.md §Save Template (which the agent reads on demand) and ` +
`keep the inline block as a short pointer + per-skill metadata. ` +
`See gbrain.ts T4/v1.50.0.0 compression rationale.`,
);
}
}
});
test('unmapped skill name falls through to compact generic template', () => {
const out = generateGBrainSaveResults(buildCtx('no-such-skill'));
// Generic fallback still emits gbrain put + skip-header + docs pointer.
expect(out).toContain('gbrain put');
expect(out).toContain('Skip this entire section if `gbrain` is not on PATH');
expect(out).toContain('docs/gbrain-write-surfaces.md');
// Should NOT contain a per-skill slug prefix from the map (would mean we
// accidentally regressed to the per-skill path for an unmapped skill).
for (const { slugPrefix } of PLANNING_SKILLS) {
expect(out).not.toContain(`"${slugPrefix}<feature-slug>"`);
}
});
});
describe('generateGBrainContextLoad — compression pin', () => {
test('emits skip-header and docs pointer, stays under ~500 chars', () => {
// Same compression discipline as SAVE_RESULTS. Context load was ~350-450
// tokens before compression; target ~80 tokens (~320 chars). Ceiling
// generous at 500 chars to leave room for skill-specific suffixes.
const out = generateGBrainContextLoad(buildCtx('plan-ceo-review'));
expect(out).toContain('Skip this entire section if `gbrain` is not on PATH');
expect(out).toContain('docs/gbrain-write-surfaces.md');
expect(out).toContain('gbrain search');
expect(out).toContain('gbrain get_page');
if (out.length > 500) {
throw new Error(
`generateGBrainContextLoad emitted ${out.length} chars (~${Math.round(out.length / 4)} tokens), ` +
`exceeds ceiling of 500 chars (~125 tokens). ` +
`Move verbose prose to docs/gbrain-write-surfaces.md §Context Load.`,
);
}
});
test('/investigate gets the data-research routing suffix', () => {
const out = generateGBrainContextLoad(buildCtx('investigate'));
expect(out).toContain('data-research');
});
test('non-investigate skills do NOT get the data-research suffix', () => {
for (const { skill } of PLANNING_SKILLS) {
const out = generateGBrainContextLoad(buildCtx(skill));
expect(out).not.toContain('data-research');
}
});
});
+95
View File
@@ -0,0 +1,95 @@
/**
* D9 salience privacy gate (T17).
*
* Verifies that fetchSalience strips entries whose slugs don't match the
* allowlist prefixes BEFORE writing the digest to disk. Sensitive content
* (family, therapy, reflection) is never persisted into the cache.
*
* Gate-tier, free.
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import { SALIENCE_DEFAULT_ALLOWLIST } from '../scripts/brain-cache-spec';
const ORIGINAL_ENV = process.env.GSTACK_SALIENCE_ALLOWLIST;
beforeEach(() => {
delete require.cache[require.resolve('../bin/gstack-brain-cache')];
});
afterEach(() => {
if (ORIGINAL_ENV) process.env.GSTACK_SALIENCE_ALLOWLIST = ORIGINAL_ENV;
else delete process.env.GSTACK_SALIENCE_ALLOWLIST;
});
async function importCache(): Promise<typeof import('../bin/gstack-brain-cache')> {
return (await import('../bin/gstack-brain-cache')) as typeof import('../bin/gstack-brain-cache');
}
describe('salience allowlist gate', () => {
test('default allowlist permits projects/ + gstack/ + concepts/', async () => {
const mod = await importCache();
expect(mod.isSalienceSlugAllowed('projects/myrepo', SALIENCE_DEFAULT_ALLOWLIST)).toBe(true);
expect(mod.isSalienceSlugAllowed('gstack/product/helsinki', SALIENCE_DEFAULT_ALLOWLIST)).toBe(true);
expect(mod.isSalienceSlugAllowed('concepts/some-idea', SALIENCE_DEFAULT_ALLOWLIST)).toBe(true);
});
test('default allowlist BLOCKS personal/ + family/ + therapy/ + reflections', async () => {
const mod = await importCache();
expect(mod.isSalienceSlugAllowed('personal/reflection-2026-05', SALIENCE_DEFAULT_ALLOWLIST)).toBe(false);
expect(mod.isSalienceSlugAllowed('family/in-laws/ngo-kim-shing', SALIENCE_DEFAULT_ALLOWLIST)).toBe(false);
expect(mod.isSalienceSlugAllowed('therapy-session/2026-05-15', SALIENCE_DEFAULT_ALLOWLIST)).toBe(false);
expect(mod.isSalienceSlugAllowed('reflection/notes', SALIENCE_DEFAULT_ALLOWLIST)).toBe(false);
});
test('isSalienceSlugAllowed handles empty allowlist (blocks everything)', async () => {
const mod = await importCache();
expect(mod.isSalienceSlugAllowed('anything/at-all', [])).toBe(false);
});
test('isSalienceSlugAllowed handles arbitrary prefixes', async () => {
const mod = await importCache();
expect(mod.isSalienceSlugAllowed('custom/scope', ['custom/'])).toBe(true);
expect(mod.isSalienceSlugAllowed('other/scope', ['custom/'])).toBe(false);
});
test('getSalienceAllowlist returns default when env unset and config silent', async () => {
delete process.env.GSTACK_SALIENCE_ALLOWLIST;
const mod = await importCache();
const list = mod.getSalienceAllowlist();
expect(Array.isArray(list)).toBe(true);
expect(list.length).toBeGreaterThan(0);
// Should at minimum contain the curated defaults
expect(list).toContain('projects/');
expect(list).toContain('gstack/');
});
test('GSTACK_SALIENCE_ALLOWLIST env override is honored', async () => {
process.env.GSTACK_SALIENCE_ALLOWLIST = 'custom-a/,custom-b/,custom-c/';
const mod = await importCache();
const list = mod.getSalienceAllowlist();
expect(list).toEqual(['custom-a/', 'custom-b/', 'custom-c/']);
});
test('GSTACK_SALIENCE_ALLOWLIST with whitespace is trimmed', async () => {
process.env.GSTACK_SALIENCE_ALLOWLIST = ' projects/ , gstack/ , concepts/ ';
const mod = await importCache();
const list = mod.getSalienceAllowlist();
expect(list).toEqual(['projects/', 'gstack/', 'concepts/']);
});
test('empty env value falls through to default (not empty list)', async () => {
process.env.GSTACK_SALIENCE_ALLOWLIST = '';
const mod = await importCache();
const list = mod.getSalienceAllowlist();
expect(list.length).toBeGreaterThan(0);
});
test('default allowlist contains nothing sensitive', async () => {
const sensitivePrefixes = ['personal', 'family', 'therapy', 'reflection', 'private', 'medical', 'health'];
for (const prefix of sensitivePrefixes) {
const matched = SALIENCE_DEFAULT_ALLOWLIST.some((p) => p.startsWith(prefix));
expect(matched).toBe(false);
}
});
});
+108
View File
@@ -0,0 +1,108 @@
/**
* Schema-version cache migration (D4 A4 / T19).
*
* When gstack-core@1.x.y bumps and the cached _meta.json records an older
* schema_version, the cache layer triggers a FULL rebuild for the affected
* scope (not just delete-the-stale-file). Verifies the rebuild path is
* invoked AND the cache files for that scope are wiped before refresh.
*
* Gate-tier, free, ~50ms.
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
// Per-test timeout: schema-mismatch path triggers a full-scope rebuild, which
// fans out to refreshEntity for each of 7 per-project entities. Each refresh
// shells out to gbrain with a 10s internal timeout. Total worst case ~70s.
// We allow 60s here to give the test room without flaking on a slow brain.
const SLOW_TIMEOUT = 60_000;
import { mkdtempSync, existsSync, writeFileSync, readFileSync, rmSync, mkdirSync } from 'fs';
import { join } from 'path';
import { tmpdir } from 'os';
import { GSTACK_SCHEMA_PACK_VERSION } from '../scripts/brain-cache-spec';
let TMP_HOME: string;
const ORIGINAL_HOME = process.env.GSTACK_HOME;
beforeEach(() => {
TMP_HOME = mkdtempSync(join(tmpdir(), 'gstack-schema-test-'));
process.env.GSTACK_HOME = TMP_HOME;
delete require.cache[require.resolve('../bin/gstack-brain-cache')];
});
afterEach(() => {
if (ORIGINAL_HOME) process.env.GSTACK_HOME = ORIGINAL_HOME;
else delete process.env.GSTACK_HOME;
try { rmSync(TMP_HOME, { recursive: true, force: true }); } catch { /* best effort */ }
});
async function importCache(): Promise<typeof import('../bin/gstack-brain-cache')> {
return (await import('../bin/gstack-brain-cache')) as typeof import('../bin/gstack-brain-cache');
}
describe('schema-version cache migration (D4 A4)', () => {
test('cache file with mismatched schema_version triggers wipe-and-rebuild attempt', { timeout: SLOW_TIMEOUT }, async () => {
const mod = await importCache();
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
mkdirSync(cacheDir, { recursive: true });
const stalePath = join(cacheDir, 'product.md');
writeFileSync(stalePath, '# stale-from-old-schema\n');
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
schema_version: '0.5.0', // old version
endpoint_hash: 'local',
last_refresh: { product: Date.now() }, // fresh by TTL
last_attempt: {},
}));
// cmdGet should detect schema mismatch and try to rebuild. Since brain is
// unreachable in the test env, the rebuild fails and the stale file is
// gone (wiped during the rebuild attempt).
mod.cmdGet('product', 'helsinki'); // triggers wipe-and-rebuild attempt
// After rebuild attempt with unreachable brain, the stale file is wiped
// and _meta.json shows the current schema_version.
expect(existsSync(stalePath)).toBe(false);
const newMeta = JSON.parse(readFileSync(join(cacheDir, '_meta.json'), 'utf-8'));
expect(newMeta.schema_version).toBe(GSTACK_SCHEMA_PACK_VERSION);
});
test('matching schema_version + fresh TTL is warm hit (no rebuild)', { timeout: SLOW_TIMEOUT }, async () => {
const mod = await importCache();
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
mkdirSync(cacheDir, { recursive: true });
const productPath = join(cacheDir, 'product.md');
writeFileSync(productPath, '# fresh content\n');
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
schema_version: GSTACK_SCHEMA_PACK_VERSION,
endpoint_hash: mod.detectEndpointHash(),
last_refresh: { product: Date.now() },
last_attempt: {},
}));
const result = mod.cmdGet('product', 'helsinki');
expect(result.state).toBe('warm');
expect(readFileSync(result.path, 'utf-8')).toBe('# fresh content\n');
});
test('rebuild wipes ALL files in scope, not just the one being read', { timeout: SLOW_TIMEOUT }, async () => {
const mod = await importCache();
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
mkdirSync(cacheDir, { recursive: true });
writeFileSync(join(cacheDir, 'product.md'), '# stale product\n');
writeFileSync(join(cacheDir, 'brand.md'), '# stale brand\n');
writeFileSync(join(cacheDir, 'developer-persona.md'), '# stale persona\n');
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
schema_version: '0.5.0',
endpoint_hash: 'local',
last_refresh: { product: Date.now(), brand: Date.now(), 'developer-persona': Date.now() },
last_attempt: {},
}));
mod.cmdGet('product', 'helsinki'); // triggers wipe-and-rebuild attempt
// All per-project files wiped (rebuild attempt cleared the scope)
expect(existsSync(join(cacheDir, 'product.md'))).toBe(false);
expect(existsSync(join(cacheDir, 'brand.md'))).toBe(false);
expect(existsSync(join(cacheDir, 'developer-persona.md'))).toBe(false);
});
});
+13 -9
View File
@@ -41,20 +41,24 @@ import { logBudgetOverride } from './helpers/budget-override';
* v1.45.0.0 T5 — hard eval cost cap.
*
* Per-tier defaults (override via env):
* EVALS_BUDGET_HARD_CAP_GATE default $25/run
* EVALS_BUDGET_HARD_CAP_PERIODIC default $70/run
* EVALS_BUDGET_HARD_CAP umbrella cap if a tier-specific isn't set; default $30
* EVALS_BUDGET_HARD_CAP_GATE default $200/run
* EVALS_BUDGET_HARD_CAP_PERIODIC default $500/run
* EVALS_BUDGET_HARD_CAP umbrella cap if a tier-specific isn't set; default $300
* EVALS_BUDGET_OVERRIDE_REASON if set, override fires AND audit-logs to
* ~/.gstack/analytics/spend-overrides.jsonl
*
* Caps are dollars-per-run, not dollars-per-test. A test that legitimately
* gets more expensive should bake into the baseline; a runaway eval (infinite
* retry, model price change) gets stopped here.
* Caps are dollars-per-run, not dollars-per-test. The cap exists to catch
* runaway evals (infinite retry, model price change, prompt-blowup bug),
* NOT to gate legitimate scope growth. Set high enough that real growth
* never trips it — only obvious-bug territory does. Adjusted v1.52.0.0
* (cathedral cap audit): $25 → $200 gate, $70 → $500 periodic. Prior
* defaults tripped on normal-scope expansion; new ceilings are 8× the
* historical worst-case eval run.
*/
const DEFAULT_HARD_CAP_USD = Number(process.env.EVALS_BUDGET_HARD_CAP) || 30;
const DEFAULT_HARD_CAP_USD = Number(process.env.EVALS_BUDGET_HARD_CAP) || 300;
const TIER_CAPS: Record<'e2e' | 'llm-judge', number> = {
e2e: Number(process.env.EVALS_BUDGET_HARD_CAP_GATE) || DEFAULT_HARD_CAP_USD,
'llm-judge': Number(process.env.EVALS_BUDGET_HARD_CAP_PERIODIC) || Math.max(70, DEFAULT_HARD_CAP_USD),
e2e: Number(process.env.EVALS_BUDGET_HARD_CAP_GATE) || Math.min(200, DEFAULT_HARD_CAP_USD),
'llm-judge': Number(process.env.EVALS_BUDGET_HARD_CAP_PERIODIC) || Math.max(500, DEFAULT_HARD_CAP_USD),
};
function currentGitBranch(): string {
@@ -0,0 +1,162 @@
/**
* E2E: real gbrain CLI round-trip against a local PGLite engine.
*
* Replaces the manual local probe documented in earlier drafts of
* docs/gbrain-write-surfaces.md. The matched-pair check the user asked
* for v1.50.0.0: "is the data we hope to save actually being saved?"
*
* What this proves:
* - The gbrain CLI subcommand shape gstack ships (`gbrain put <slug>
* --content "<markdown with frontmatter>"`) actually persists to a
* real PGLite store.
* - The page is retrievable via `gbrain get <slug>` with body + title
* intact (frontmatter is allowed to be reformatted by gbrain — we
* check semantic fields, not byte-exact YAML).
* - The `office-hours/<slug>` slug namespace works (no rejection,
* no auto-rewrite).
*
* What this does NOT prove (out of scope, owned elsewhere):
* - Agent obedience to the resolver instructions — that's the
* fake-CLI E2E (test/skill-e2e-office-hours-brain-writeback.test.ts).
* - Remote-MCP persistence — that's the write-shape E2E
* (test/skill-e2e-gbrain-roundtrip-remote.test.ts).
* - gbrain's own internal correctness — gbrain has its own test suite;
* this is a contract smoke test, not gbrain validation.
*
* Periodic tier. Real gbrain init + put triggers one Voyage embedding
* call (~$0.001/run). Skips when VOYAGE_API_KEY is unset OR gbrain is
* not on PATH, so CI without secrets degrades gracefully.
*/
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import { execFileSync } from 'child_process';
import { mkdtempSync, rmSync } from 'fs';
import { tmpdir } from 'os';
import { join } from 'path';
import {
describeIfSelected,
testConcurrentIfSelected,
runId,
createEvalCollector,
} from './helpers/e2e-helpers';
const evalCollector = createEvalCollector('e2e-gbrain-roundtrip-local');
function gbrainOnPath(): boolean {
try {
execFileSync('gbrain', ['--version'], { stdio: 'pipe', timeout: 5_000 });
return true;
} catch {
return false;
}
}
const SHOULD_RUN_GUARDS_OK =
gbrainOnPath() && !!process.env.VOYAGE_API_KEY;
describeIfSelected(
'GBrain local PGLite round-trip E2E',
['gbrain-roundtrip-local'],
() => {
let tmpHome: string;
const slug = `office-hours/roundtrip-test-${Date.now()}`;
const body = `# Roundtrip test
This is a deterministic round-trip test page used by the gstack v1.50.0.0
brain-writeback verification. Generated at ${new Date().toISOString()}.
If gbrain persisted this correctly, you should see this exact body when
you run \`gbrain get "${slug}"\`.`;
beforeAll(() => {
if (!SHOULD_RUN_GUARDS_OK) {
// Will skip via testConcurrentIfSelected gate; nothing to set up.
tmpHome = '';
return;
}
tmpHome = mkdtempSync(join(tmpdir(), 'gbrain-roundtrip-'));
// Initialize a real PGLite gbrain in the isolated temp HOME. Explicit
// --embedding-model required because the local env has multiple
// providers ready (voyage + zeroentropyai); gbrain refuses to guess.
execFileSync(
'gbrain',
['init', '--pglite', '--embedding-model', 'voyage:voyage-code-3'],
{
env: { ...process.env, HOME: tmpHome },
stdio: ['ignore', 'pipe', 'pipe'],
timeout: 60_000,
},
);
});
afterAll(() => {
if (tmpHome) {
try {
rmSync(tmpHome, { recursive: true, force: true });
} catch {
// best effort
}
}
});
testConcurrentIfSelected(
'gbrain-roundtrip-local',
async () => {
if (!SHOULD_RUN_GUARDS_OK) {
console.log(
'[skip] gbrain CLI not on PATH or VOYAGE_API_KEY unset; ' +
'this E2E proves the gbrain CLI persistence contract gstack relies on. ' +
'Run locally with `VOYAGE_API_KEY=... bun test ...` to verify before shipping.',
);
return;
}
const content = `---
title: "Office Hours: Roundtrip Test"
tags: [design-doc, roundtrip-test]
---
${body}`;
// PUT the page.
execFileSync('gbrain', ['put', slug, '--content', content], {
env: { ...process.env, HOME: tmpHome },
stdio: ['ignore', 'pipe', 'pipe'],
timeout: 30_000,
});
// GET it back.
const retrieved = execFileSync('gbrain', ['get', slug], {
env: { ...process.env, HOME: tmpHome },
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'pipe'],
timeout: 10_000,
});
// The body MUST survive verbatim — every line of what we wrote
// must appear in what we got back. (Frontmatter reformatting is
// gbrain's prerogative; body text is data we own.)
for (const line of body.split('\n')) {
if (line.trim()) {
expect(retrieved).toContain(line);
}
}
// Title is in the frontmatter — assert it's present (gbrain
// strips the constant prefix "title: " quote handling can vary).
expect(retrieved).toContain('Roundtrip Test');
// Tag survived.
expect(retrieved).toContain('design-doc');
expect(retrieved).toContain('roundtrip-test');
// Sanity: the doc isn't empty or a 404 error.
expect(retrieved.length).toBeGreaterThan(body.length);
expect(retrieved).not.toContain('page_not_found');
expect(retrieved).not.toContain('Page not found');
},
120_000,
);
},
);
@@ -0,0 +1,306 @@
/**
* E2E: /office-hours brain-writeback path under fake gbrain CLI.
*
* The matched-pair check for v1.50.0.0's "brain-aware planning actually
* works under Claude Code" headline: prove that when a user runs
* /office-hours with gbrain on PATH, the agent actually calls
* `gbrain put office-hours/<slug>` with valid frontmatter.
*
* Approach:
* 1. Regenerate office-hours/SKILL.md with --respect-detection against
* a temp GSTACK_HOME that has detected:true. Snapshot the rendered
* content (which now contains the compressed SAVE_RESULTS block),
* then restore the canonical no-gbrain version so the working tree
* stays clean.
* 2. Write the snapshot into a temp workdir's office-hours/SKILL.md.
* Also write docs/gbrain-write-surfaces.md so the agent can read the
* template on demand (the compact block points to it).
* 3. Write a fake `gbrain` shell script into workdir/bin/ with robust
* argv quoting (printf %q) so heredoc payloads in --content survive
* shell-to-shell. The fake logs every invocation + writes payloads
* to a per-slug file for inspection.
* 4. Run /office-hours via runSkillTest with workdir/bin/ first on PATH.
* Feed a deterministic founder pitch + auto-decide instructions.
* 5. Assert the argv log contains `gbrain put office-hours/<slug>`, the
* payload file exists with valid YAML frontmatter, and entity stubs
* were created.
*
* Periodic tier (~$0.50-1/run via claude -p, matches nearby
* setup-gbrain-path4-* tests at touchfiles.ts:496-498).
*
* NOT verified by this test (out of scope, owned by docs/gbrain-write-surfaces.md):
* - That gbrain itself persists what `gbrain put` is told (gbrain's
* own contract)
* - That `.gbrain-source` doesn't re-route writes (gbrain's contract)
* - Source-targeting (no way to fake source resolution in a stub CLI)
*/
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import { execFileSync, spawnSync } from 'child_process';
import {
chmodSync,
copyFileSync,
existsSync,
mkdirSync,
mkdtempSync,
readFileSync,
readdirSync,
rmSync,
writeFileSync,
} from 'fs';
import { tmpdir } from 'os';
import { join } from 'path';
import { runSkillTest } from './helpers/session-runner';
import {
ROOT,
runId,
describeIfSelected,
testConcurrentIfSelected,
logCost,
recordE2E,
createEvalCollector,
} from './helpers/e2e-helpers';
const evalCollector = createEvalCollector('e2e-office-hours-brain-writeback');
describeIfSelected(
'Office Hours Brain Writeback E2E',
['office-hours-brain-writeback'],
() => {
let workDir: string;
let callsLogPath: string;
let payloadDir: string;
beforeAll(() => {
workDir = mkdtempSync(join(tmpdir(), 'skill-e2e-brain-writeback-'));
const run = (cmd: string, args: string[]) =>
spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 });
run('git', ['init', '-b', 'main']);
run('git', ['config', 'user.email', 'test@test.com']);
run('git', ['config', 'user.name', 'Test']);
// Copy the founder pitch fixture into the workdir.
const briefSrc = join(
ROOT,
'test',
'fixtures',
'office-hours-brain-writeback',
'brief.md',
);
copyFileSync(briefSrc, join(workDir, 'pitch.md'));
// Generate a brain-aware office-hours/SKILL.md (with --respect-detection
// against a temp GSTACK_HOME). Snapshot the content, restore the
// canonical version, write the snapshot into the workdir.
const tmpHome = mkdtempSync(join(tmpdir(), 'gbrain-detect-home-'));
writeFileSync(
join(tmpHome, 'gbrain-detection.json'),
JSON.stringify({
gbrain_local_status: 'ok',
gbrain_on_path: true,
gbrain_version: 'test-0.41.0',
}),
);
const skillPath = join(ROOT, 'office-hours', 'SKILL.md');
const originalSkill = readFileSync(skillPath, 'utf-8');
try {
execFileSync(
'bun',
[
'run',
'scripts/gen-skill-docs.ts',
'--host',
'claude',
'--respect-detection',
],
{
cwd: ROOT,
env: { ...process.env, GSTACK_HOME: tmpHome },
stdio: ['ignore', 'pipe', 'pipe'],
timeout: 60_000,
},
);
const brainAwareSkill = readFileSync(skillPath, 'utf-8');
if (!brainAwareSkill.includes('gbrain put "office-hours/')) {
throw new Error(
'Regenerated office-hours/SKILL.md does not contain gbrain put block. ' +
'Detection override may be broken — see test/gbrain-detection-override.test.ts.',
);
}
mkdirSync(join(workDir, 'office-hours'), { recursive: true });
writeFileSync(join(workDir, 'office-hours', 'SKILL.md'), brainAwareSkill);
} finally {
// Always restore the canonical SKILL.md so the working tree stays clean.
writeFileSync(skillPath, originalSkill);
rmSync(tmpHome, { recursive: true, force: true });
}
// Copy docs/gbrain-write-surfaces.md so the compact resolver block's
// on-demand reference resolves (the agent may read it for the full
// template; we don't require this read but make it available).
const docsSrc = join(ROOT, 'docs', 'gbrain-write-surfaces.md');
const docsDst = join(workDir, 'docs', 'gbrain-write-surfaces.md');
mkdirSync(join(workDir, 'docs'), { recursive: true });
copyFileSync(docsSrc, docsDst);
// Set up the fake gbrain CLI with robust argv quoting + payload capture.
callsLogPath = join(workDir, 'gbrain-calls.log');
payloadDir = join(workDir, 'gbrain-payloads');
mkdirSync(payloadDir, { recursive: true });
const binDir = join(workDir, 'bin');
mkdirSync(binDir, { recursive: true });
const fakeGbrain = `#!/bin/bash
# Fake gbrain CLI for E2E test. Logs every invocation with shell-safe quoting
# (printf %q) so --content "$(cat <<'EOF' ... EOF)" payloads survive intact.
{ printf 'gbrain'; for a in "$@"; do printf ' %q' "$a"; done; printf '\\n'; } \\
>> "${callsLogPath}"
case "$1" in
--version) echo "gbrain test-0.41.0"; exit 0 ;;
search) echo "[]"; exit 0 ;;
get_page) echo ""; exit 0 ;;
put)
SLUG="$2"
shift 2
while [ -n "$1" ]; do
if [ "$1" = "--content" ]; then
PAYLOAD_DIR="${payloadDir}"
mkdir -p "$PAYLOAD_DIR/$(dirname "$SLUG")"
printf '%s' "$2" > "$PAYLOAD_DIR/$SLUG.md"
break
fi
shift
done
exit 0
;;
esac
exit 0
`;
const fakePath = join(binDir, 'gbrain');
writeFileSync(fakePath, fakeGbrain);
chmodSync(fakePath, 0o755);
run('git', ['add', '.']);
run('git', ['commit', '-m', 'fixture']);
});
afterAll(() => {
try {
rmSync(workDir, { recursive: true, force: true });
} catch {
// best effort
}
});
testConcurrentIfSelected(
'office-hours-brain-writeback',
async () => {
const result = await runSkillTest({
prompt: `Read office-hours/SKILL.md for the workflow.
Read pitch.md — that's a founder pitch coming to office hours. Select Startup Mode. Skip any AskUserQuestion — this is non-interactive; auto-decide the recommended option for any question.
For the diagnostic, assume the founder confirmed Q1 (strongest evidence = "230 from a single tweet + 51 paying creators in 6 weeks"), Q2 (status quo = "creators write ad-hoc checks or use opaque Patreon-style platforms"), and Q3 (forcing question already asked).
Generate the design doc per Phase 5. The feature-slug value to substitute into the SAVE_RESULTS template's \`<feature-slug>\` placeholder is exactly 'pixel-fund' (no path prefix — the template already provides the prefix). The \`gbrain\` binary is on PATH at ${workDir}/bin/gbrain. Apply the SAVE_RESULTS template literally: the slug should land at \`<prefix>/pixel-fund\` per the resolver shape, with the actual design doc markdown body in the --content payload. Then enrich entity stubs for any named people or companies mentioned in the pitch.
This is a test of the brain-writeback path. Do NOT skip the gbrain save step under any circumstance — the runtime guard ("skip if gbrain not on PATH") does NOT apply here because gbrain IS available. Do NOT explore gbrain --help; follow the SAVE_RESULTS template's exact CLI shape. If you encounter any AskUserQuestion, auto-decide recommended.`,
workingDirectory: workDir,
maxTurns: 12,
timeout: 360_000,
testName: 'office-hours-brain-writeback',
runId,
model: 'claude-sonnet-4-6',
extraEnv: {
PATH: `${join(workDir, 'bin')}:${process.env.PATH || ''}`,
},
});
logCost('/office-hours (BRAIN WRITEBACK)', result);
recordE2E(
evalCollector,
'/office-hours-brain-writeback',
'Office Hours Brain Writeback E2E',
result,
{
passed: ['success', 'error_max_turns'].includes(result.exitReason),
},
);
expect(['success', 'error_max_turns']).toContain(result.exitReason);
// The headline assertion: agent actually called gbrain put on the
// expected slug.
if (!existsSync(callsLogPath)) {
throw new Error(
`No gbrain calls log at ${callsLogPath}. ` +
`Agent likely did NOT invoke gbrain at all. ` +
`Check that office-hours/SKILL.md in the workdir contains the gbrain put block.`,
);
}
const callsLog = readFileSync(callsLogPath, 'utf-8');
console.log('--- gbrain calls log ---');
console.log(callsLog);
console.log('--- end calls log ---');
expect(callsLog).toContain('gbrain put');
// Agent obedience: the slug should contain 'pixel-fund' somewhere
// (preferably under the office-hours/ prefix). The strict slug
// SHAPE (office-hours/<slug>) is already pinned by the resolver
// unit test (test/resolvers-gbrain-save-results.test.ts); this
// E2E proves the agent actually invokes gbrain put with the
// payload, not the resolver's literal output shape.
expect(callsLog).toMatch(/gbrain put .*pixel-fund/);
// Payload file exists. Agent may write to office-hours/pixel-fund.md
// (resolver-faithful) OR pixel-fund.md (agent dropped prefix); both
// are acceptable here because the YAML frontmatter is the real
// contract test. Search the payload tree for any *.md file that
// contains 'pixel-fund' in the path.
const findPayload = (dir: string): string | null => {
if (!existsSync(dir)) return null;
for (const entry of readdirSync(dir, { withFileTypes: true })) {
const full = join(dir, entry.name);
if (entry.isDirectory()) {
const nested = findPayload(full);
if (nested) return nested;
} else if (entry.name.includes('pixel-fund')) {
return full;
}
}
return null;
};
const payloadPath = findPayload(payloadDir);
if (!payloadPath) {
throw new Error(
`Agent called gbrain put but no payload file with 'pixel-fund' ` +
`in name was written to ${payloadDir}. Check the fake gbrain ` +
`--content parser for argv quoting issues.`,
);
}
const payload = readFileSync(payloadPath, 'utf-8');
expect(payload).toMatch(/^---\s*\n/);
expect(payload).toContain('title:');
expect(payload).toContain('tags:');
expect(payload.length).toBeGreaterThan(200);
// Entity stubs: agents are inconsistent about whether they use
// 'entities/<name>' (resolver doc) or 'entity/<name>' (singular).
// We accept either — the test asserts that AT LEAST ONE entity
// stub call exists, not the exact slug shape.
const entityCallMatches =
callsLog.match(/gbrain put entit(?:y|ies)\//g) || [];
if (entityCallMatches.length === 0) {
console.warn(
'No entity stub calls in gbrain calls log. Resolver instructs ' +
'entity extraction but it is best-effort.',
);
} else {
console.log(
`Entity stub calls observed: ${entityCallMatches.length}`,
);
}
},
420_000,
);
},
);
+458
View File
@@ -0,0 +1,458 @@
/**
* /plan-tune cathedral E2E (T16) — 5 scenarios, all gate tier per D12.
*
* Each scenario verifies that the cathedral's substrate works end-to-end
* against a real `claude -p` invocation. Unit tests in test/{question-log-hook,
* question-preference-hook, declared-annotation, distill-*}.test.ts cover
* deterministic plumbing; this file proves the agent obeys the hook
* contracts in a live session.
*
* Touchfile registration in test/helpers/touchfiles.ts:
* - plan-tune-hook-capture
* - plan-tune-enforcement
* - plan-tune-annotation
* - plan-tune-codex-import
* - plan-tune-dream-cycle
*
* Each scenario uses GSTACK_STATE_ROOT to isolate from the user's real
* ~/.gstack (per cathedral T1 + Codex D16 fix). Cost budget ~$3-4/scenario.
*/
import { beforeAll, afterAll, expect } from 'bun:test';
import {
ROOT,
describeIfSelected,
testConcurrentIfSelected,
copyDirSync,
createEvalCollector,
finalizeEvalCollector,
} from './helpers/e2e-helpers';
import { spawnSync } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
const collector = createEvalCollector('e2e-plan-tune-cathedral');
afterAll(() => {
finalizeEvalCollector(collector);
});
/** Scaffold a fixture project with the bins + scripts the cathedral needs. */
function scaffoldFixture(prefix: string): { workDir: string; stateRoot: string; slug: string } {
const workDir = fs.mkdtempSync(path.join(os.tmpdir(), prefix));
const stateRoot = path.join(workDir, '.gstack-state');
fs.mkdirSync(stateRoot, { recursive: true });
// git init so gstack-slug resolves a deterministic slug.
spawnSync('git', ['init', '-b', 'main'], { cwd: workDir, stdio: 'pipe' });
spawnSync('git', ['config', 'user.email', 't@t.com'], { cwd: workDir, stdio: 'pipe' });
spawnSync('git', ['config', 'user.name', 'T'], { cwd: workDir, stdio: 'pipe' });
fs.writeFileSync(path.join(workDir, 'README.md'), '# cathedral fixture\n');
spawnSync('git', ['add', '.'], { cwd: workDir, stdio: 'pipe' });
spawnSync('git', ['commit', '-m', 'init'], { cwd: workDir, stdio: 'pipe' });
// Copy bins.
const binDir = path.join(workDir, 'bin');
fs.mkdirSync(binDir, { recursive: true });
for (const script of [
'gstack-slug',
'gstack-config',
'gstack-paths',
'gstack-question-log',
'gstack-question-preference',
'gstack-developer-profile',
'gstack-codex-session-import',
'gstack-distill-free-text',
'gstack-distill-apply',
]) {
const src = path.join(ROOT, 'bin', script);
if (fs.existsSync(src)) {
fs.copyFileSync(src, path.join(binDir, script));
fs.chmodSync(path.join(binDir, script), 0o755);
}
}
// Copy scripts that the bins import.
const scriptsDir = path.join(workDir, 'scripts');
fs.mkdirSync(scriptsDir, { recursive: true });
for (const f of [
'question-registry.ts',
'psychographic-signals.ts',
'archetypes.ts',
'one-way-doors.ts',
'declared-annotation.ts',
]) {
const src = path.join(ROOT, 'scripts', f);
if (fs.existsSync(src)) fs.copyFileSync(src, path.join(scriptsDir, f));
}
// Copy hooks dir.
copyDirSync(path.join(ROOT, 'hosts', 'claude', 'hooks'), path.join(workDir, 'hosts', 'claude', 'hooks'));
const slug = path.basename(workDir).replace(/[^a-zA-Z0-9._-]/g, '');
return { workDir, stateRoot, slug };
}
function cleanupFixture(workDir: string): void {
try {
fs.rmSync(workDir, { recursive: true, force: true });
} catch {
// best-effort
}
}
// ---------------------------------------------------------------------------
// Scenario 1: Hook capture — PostToolUse hook writes to question-log.jsonl
// ---------------------------------------------------------------------------
describeIfSelected('PlanTune cathedral E2E: hook capture', ['plan-tune-hook-capture'], () => {
let fixture: ReturnType<typeof scaffoldFixture>;
beforeAll(() => {
fixture = scaffoldFixture('cathedral-cap-');
});
afterAll(() => {
cleanupFixture(fixture.workDir);
});
testConcurrentIfSelected('hook directly invoked → log fills', async () => {
// Direct hook invocation simulates Claude Code's PostToolUse delivery.
// E2E verifies the hook + bin chain works against real bins on disk
// (the unit test exercises this with mocks).
const hookPath = path.join(fixture.workDir, 'hosts', 'claude', 'hooks', 'question-log-hook');
const payload = {
session_id: 'cathedral-e2e-cap',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-cap-1',
tool_input: {
questions: [
{
question:
'D1 — Cathedral E2E capture <gstack-qid:ship-test-failure-triage>\nRecommendation: A',
options: ['A) Fix now (recommended)', 'B) Investigate'],
},
],
},
tool_response: { answers: [{ option_label: 'A) Fix now (recommended)' }] },
cwd: fixture.workDir,
};
const res = spawnSync(hookPath, [], {
env: {
...process.env,
GSTACK_STATE_ROOT: fixture.stateRoot,
GSTACK_QUESTION_LOG_NO_DERIVE: '1',
},
input: JSON.stringify(payload),
encoding: 'utf-8',
});
expect(res.status).toBe(0);
const logPath = path.join(fixture.stateRoot, 'projects', fixture.slug, 'question-log.jsonl');
expect(fs.existsSync(logPath)).toBe(true);
const lines = fs.readFileSync(logPath, 'utf-8').trim().split('\n');
expect(lines.length).toBeGreaterThanOrEqual(1);
const evt = JSON.parse(lines[0]);
expect(evt.source).toBe('hook');
expect(evt.question_id).toBe('ship-test-failure-triage');
});
});
// ---------------------------------------------------------------------------
// Scenario 2: Enforcement — never-ask preference + marker + 2-way → deny
// ---------------------------------------------------------------------------
describeIfSelected('PlanTune cathedral E2E: enforcement', ['plan-tune-enforcement'], () => {
let fixture: ReturnType<typeof scaffoldFixture>;
beforeAll(() => {
fixture = scaffoldFixture('cathedral-enf-');
fs.mkdirSync(path.join(fixture.stateRoot, 'projects', fixture.slug), { recursive: true });
fs.writeFileSync(
path.join(fixture.stateRoot, 'projects', fixture.slug, 'question-preferences.json'),
JSON.stringify({ 'ship-changelog-voice-polish': 'never-ask' }),
);
});
afterAll(() => {
cleanupFixture(fixture.workDir);
});
testConcurrentIfSelected('PreToolUse hook denies + logs auto-decided event', async () => {
const hookPath = path.join(
fixture.workDir,
'hosts',
'claude',
'hooks',
'question-preference-hook',
);
const payload = {
session_id: 'cathedral-e2e-enf',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-enf-1',
tool_input: {
questions: [
{
question:
'<gstack-qid:ship-changelog-voice-polish> Polish CHANGELOG entry?',
options: ['A) Accept (recommended)', 'B) Skip'],
},
],
},
cwd: fixture.workDir,
};
const res = spawnSync(hookPath, [], {
env: {
...process.env,
GSTACK_STATE_ROOT: fixture.stateRoot,
GSTACK_QUESTION_LOG_NO_DERIVE: '1',
},
input: JSON.stringify(payload),
encoding: 'utf-8',
});
expect(res.status).toBe(0);
const parsed = JSON.parse(res.stdout || '{}');
expect(parsed.hookSpecificOutput?.permissionDecision).toBe('deny');
expect(parsed.hookSpecificOutput?.permissionDecisionReason).toContain('Accept');
// Auto-decided event was logged.
const logPath = path.join(fixture.stateRoot, 'projects', fixture.slug, 'question-log.jsonl');
expect(fs.existsSync(logPath)).toBe(true);
const events = fs
.readFileSync(logPath, 'utf-8')
.trim()
.split('\n')
.filter(Boolean)
.map((l) => JSON.parse(l));
const auto = events.filter((e) => e.source === 'auto-decided');
expect(auto.length).toBe(1);
expect(auto[0].question_id).toBe('ship-changelog-voice-polish');
});
});
// ---------------------------------------------------------------------------
// Scenario 3: Annotation — declared profile injected via additionalContext
// ---------------------------------------------------------------------------
describeIfSelected('PlanTune cathedral E2E: annotation', ['plan-tune-annotation'], () => {
let fixture: ReturnType<typeof scaffoldFixture>;
beforeAll(() => {
fixture = scaffoldFixture('cathedral-ann-');
// Strong declared profile that should annotate any signal_key=detail-preference question.
fs.writeFileSync(
path.join(fixture.stateRoot, 'developer-profile.json'),
JSON.stringify({ declared: { detail_preference: 0.9 } }),
);
// Seed a memory nugget for the matching signal_key.
fs.writeFileSync(
path.join(fixture.stateRoot, 'free-text-memory.json'),
JSON.stringify({
nuggets: [
{
nugget: 'User prefers verbose explanations with tradeoffs',
applies_to_signal_keys: ['detail-preference'],
applied_at: new Date().toISOString(),
},
],
}),
);
});
afterAll(() => {
cleanupFixture(fixture.workDir);
});
testConcurrentIfSelected('PreToolUse hook surfaces memory nugget on defer', async () => {
const hookPath = path.join(
fixture.workDir,
'hosts',
'claude',
'hooks',
'question-preference-hook',
);
const payload = {
session_id: 'cathedral-e2e-ann',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-ann-1',
tool_input: {
questions: [
{
question: '<gstack-qid:ship-todos-reorganize> Reorganize TODOs?',
options: ['A) Accept (recommended)', 'B) Skip'],
},
],
},
cwd: fixture.workDir,
};
const res = spawnSync(hookPath, [], {
env: {
...process.env,
GSTACK_STATE_ROOT: fixture.stateRoot,
GSTACK_QUESTION_LOG_NO_DERIVE: '1',
},
input: JSON.stringify(payload),
encoding: 'utf-8',
});
expect(res.status).toBe(0);
const parsed = JSON.parse(res.stdout || '{}');
expect(parsed.hookSpecificOutput?.permissionDecision).toBe('defer');
expect(parsed.hookSpecificOutput?.additionalContext).toContain('verbose explanations');
});
});
// ---------------------------------------------------------------------------
// Scenario 4: Codex import — JSONL session → import bin → log fills
// ---------------------------------------------------------------------------
describeIfSelected('PlanTune cathedral E2E: codex import', ['plan-tune-codex-import'], () => {
let fixture: ReturnType<typeof scaffoldFixture>;
let sessionFile: string;
beforeAll(() => {
fixture = scaffoldFixture('cathedral-cdx-');
sessionFile = path.join(fixture.workDir, 'rollout-cathedral.jsonl');
const lines = [
JSON.stringify({
type: 'session_meta',
payload: { id: 'cathedral-sess-1', cwd: fixture.workDir },
}),
JSON.stringify({
timestamp: new Date().toISOString(),
type: 'event_msg',
payload: {
type: 'agent_message',
message:
'D1 — Cathedral import <gstack-qid:plan-eng-review-scope-reduce>\nRecommendation: A\nA) Reduce (recommended)\nB) Keep',
},
}),
JSON.stringify({
timestamp: new Date().toISOString(),
type: 'event_msg',
payload: { type: 'user_message', message: 'A' },
}),
];
fs.writeFileSync(sessionFile, lines.join('\n') + '\n');
});
afterAll(() => {
cleanupFixture(fixture.workDir);
});
testConcurrentIfSelected('importer extracts events with codex-import-marker source', async () => {
const bin = path.join(fixture.workDir, 'bin', 'gstack-codex-session-import');
const res = spawnSync(bin, [sessionFile], {
env: {
...process.env,
GSTACK_STATE_ROOT: fixture.stateRoot,
GSTACK_QUESTION_LOG_NO_DERIVE: '1',
},
encoding: 'utf-8',
cwd: fixture.workDir,
});
expect(res.status).toBe(0);
expect(res.stdout).toContain('IMPORTED: 1');
const logPath = path.join(fixture.stateRoot, 'projects', fixture.slug, 'question-log.jsonl');
expect(fs.existsSync(logPath)).toBe(true);
const events = fs
.readFileSync(logPath, 'utf-8')
.trim()
.split('\n')
.filter(Boolean)
.map((l) => JSON.parse(l));
expect(events.length).toBe(1);
expect(events[0].source).toBe('codex-import-marker');
expect(events[0].question_id).toBe('plan-eng-review-scope-reduce');
});
});
// ---------------------------------------------------------------------------
// Scenario 5: Dream cycle round-trip — capture → distill (mocked) → apply →
// re-fire → memory injection
// ---------------------------------------------------------------------------
describeIfSelected('PlanTune cathedral E2E: dream cycle', ['plan-tune-dream-cycle'], () => {
let fixture: ReturnType<typeof scaffoldFixture>;
beforeAll(() => {
fixture = scaffoldFixture('cathedral-dream-');
// Seed proposals file directly (the SDK call is exercised by the unit
// test; here we verify apply → re-fire round-trip on top of a known
// proposal shape).
fs.mkdirSync(path.join(fixture.stateRoot, 'projects', fixture.slug), { recursive: true });
fs.writeFileSync(
path.join(fixture.stateRoot, 'projects', fixture.slug, 'distillation-proposals.json'),
JSON.stringify({
generated_at: new Date().toISOString(),
source_event_count: 1,
proposals: [
{
kind: 'memory-nugget',
confidence: 0.95,
nugget: 'User wants every fix tested before shipping',
applies_to_signal_keys: ['test-discipline'],
source_quotes: ['always add tests for any fix'],
},
],
}),
);
});
afterAll(() => {
cleanupFixture(fixture.workDir);
});
testConcurrentIfSelected('apply → re-fire → memory injected via additionalContext', async () => {
// 1. Apply the proposal via gstack-distill-apply.
const applyBin = path.join(fixture.workDir, 'bin', 'gstack-distill-apply');
const applyRes = spawnSync(applyBin, ['--proposal', '0'], {
env: { ...process.env, GSTACK_STATE_ROOT: fixture.stateRoot },
encoding: 'utf-8',
cwd: fixture.workDir,
});
expect(applyRes.status).toBe(0);
// Memory file should now contain the nugget.
const memPath = path.join(fixture.stateRoot, 'free-text-memory.json');
expect(fs.existsSync(memPath)).toBe(true);
const mem = JSON.parse(fs.readFileSync(memPath, 'utf-8'));
expect(mem.nuggets.length).toBe(1);
// 2. Re-fire a question whose signal_key matches the nugget. PreToolUse
// hook should surface the nugget via additionalContext.
const hookPath = path.join(
fixture.workDir,
'hosts',
'claude',
'hooks',
'question-preference-hook',
);
const payload = {
session_id: 'cathedral-e2e-dream',
tool_name: 'AskUserQuestion',
tool_use_id: 'tu-dream-1',
tool_input: {
questions: [
{
question:
'<gstack-qid:plan-eng-review-test-gap> Add tests for this gap?',
options: ['A) Add (recommended)', 'B) Skip'],
},
],
},
cwd: fixture.workDir,
};
const hookRes = spawnSync(hookPath, [], {
env: {
...process.env,
GSTACK_STATE_ROOT: fixture.stateRoot,
GSTACK_QUESTION_LOG_NO_DERIVE: '1',
},
input: JSON.stringify(payload),
encoding: 'utf-8',
});
expect(hookRes.status).toBe(0);
const parsed = JSON.parse(hookRes.stdout || '{}');
expect(parsed.hookSpecificOutput?.additionalContext).toContain('User wants every fix tested');
});
});
+96
View File
@@ -0,0 +1,96 @@
/**
* Per-skill brain preflight token budget enforcement (T21 / T19).
*
* Asserts that the GENERATED BRAIN_PREFLIGHT block per skill stays within
* its per-skill byte budget (SKILL_PREFLIGHT_BUDGET_BYTES from
* brain-cache-spec). Also asserts the autoplan-wide total stays under
* AUTOPLAN_PREFLIGHT_BUDGET_BYTES.
*
* What's being measured: the SIZE OF THE INSTRUCTIONS injected into the
* skill's SKILL.md by the resolver, NOT the size of the cache digests at
* runtime. Runtime digest budgets are enforced separately by the cache
* CLI's truncateToBudget. This test catches resolver-side bloat: if
* generateBrainPreflight grows verbose, the instructions themselves eat
* the skill's context budget.
*
* Gate-tier, free.
*/
import { describe, test, expect } from 'bun:test';
import { generateBrainPreflight, generateBrainCacheRefresh, generateBrainWriteBack } from '../scripts/resolvers/gbrain';
import {
SKILL_DIGEST_SUBSETS,
SKILL_PREFLIGHT_BUDGET_BYTES,
AUTOPLAN_PREFLIGHT_BUDGET_BYTES,
} from '../scripts/brain-cache-spec';
import { HOST_PATHS } from '../scripts/resolvers/types';
import type { TemplateContext } from '../scripts/resolvers/types';
function buildCtx(skillName: string): TemplateContext {
return {
skillName,
tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
host: 'claude',
paths: HOST_PATHS.claude,
};
}
function totalBrainBytes(skillName: string): number {
const preflight = generateBrainPreflight(buildCtx(skillName));
const refresh = generateBrainCacheRefresh(buildCtx(skillName));
const writeBack = generateBrainWriteBack(buildCtx(skillName));
return Buffer.byteLength(preflight + refresh + writeBack, 'utf-8');
}
describe('per-skill preflight token budget', () => {
test('every preflight skill stays under per-skill BRAIN_* budget (3x cap, instructions vs runtime data)', () => {
// The per-skill budget governs RUNTIME digest data, not instruction text.
// Instruction text (resolver output) should fit within 3x the runtime
// budget — anything more means the instructions themselves are bloated.
for (const [skill, budget] of Object.entries(SKILL_PREFLIGHT_BUDGET_BYTES)) {
const bytes = totalBrainBytes(skill);
const cap = budget * 3;
expect(bytes).toBeLessThanOrEqual(cap);
}
});
test('autoplan: sum across 4 plan-* skills stays under AUTOPLAN_PREFLIGHT_BUDGET_BYTES × 3 (instructions)', () => {
const autoplanSkills = ['plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review'];
const total = autoplanSkills.reduce((sum, s) => sum + totalBrainBytes(s), 0);
// Same 3x rationale: AUTOPLAN budget governs runtime data, instructions
// get more headroom.
expect(total).toBeLessThanOrEqual(AUTOPLAN_PREFLIGHT_BUDGET_BYTES * 3);
});
test('non-preflight skills emit zero brain bytes', () => {
const nonPlanning = ['ship', 'qa', 'investigate', 'retro', 'design-review'];
for (const skill of nonPlanning) {
expect(totalBrainBytes(skill)).toBe(0);
}
});
test('preflight bytes are positive for every registered preflight skill', () => {
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
expect(totalBrainBytes(skill)).toBeGreaterThan(0);
}
});
});
describe('autoplan total preflight budget (T21 / D7)', () => {
test('autoplan total under 25 KB instruction cap × 3 (75 KB instruction budget)', () => {
const autoplanSkills = ['plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review'];
const total = autoplanSkills.reduce((sum, s) => sum + totalBrainBytes(s), 0);
// The 75 KB cap on instructions across the 4-skill autoplan; runtime
// digest budget is the lower 25 KB cap, separately tested above.
expect(total).toBeLessThan(75 * 1024);
});
test('per-skill subset emits its expected entity references in the preflight block', () => {
for (const [skill, subset] of Object.entries(SKILL_DIGEST_SUBSETS)) {
const preflight = generateBrainPreflight(buildCtx(skill));
for (const entity of subset) {
expect(preflight).toContain(`gstack-brain-cache get ${entity}`);
}
}
});
});
+8 -7
View File
@@ -37,13 +37,14 @@ import { logBudgetOverride } from './helpers/budget-override';
const REPO_ROOT = path.resolve(import.meta.dir, '..');
const BASELINE_PATH = path.join(REPO_ROOT, 'test', 'fixtures', 'parity-baseline-v1.47.0.0.json');
// Default per-skill ratio is 1.05 (5% growth tolerance). T4 catalog trim
// MOVES text from frontmatter (always-loaded catalog) to a body section
// ("## When to invoke"), so small skills with already-short descriptions
// see a tiny body growth from the section header itself (~20 bytes). The
// 5% per-skill tolerance accommodates that while still catching real bloat;
// the always-loaded catalog cost is enforced separately with a hard ceiling.
const DEFAULT_RATIO = 1.05;
// Default per-skill ratio is 1.50 (50% growth tolerance). Adjusted v1.52.0.0
// (cathedral cap audit) from 1.05 → 1.50: a 5% ratio tripped on legitimate
// feature additions (e.g., plan-tune cathedral T13 grew SKILL.md ×1.24
// adding load-bearing Dream cycle + Audit unmarked + Recent auto-decisions
// surfaces). Real bloat is 2-3×; this catches that while not tripping on
// normal feature scope. The always-loaded catalog cost is enforced
// separately with a hard ceiling.
const DEFAULT_RATIO = 1.50;
const RATIO = Number(process.env.GSTACK_SIZE_BUDGET_RATIO) || DEFAULT_RATIO;
interface Regression {
+87
View File
@@ -0,0 +1,87 @@
/**
* Phase 2 calibration write-back fence-block fallback (T19).
*
* The BRAIN_WRITE_BACK resolver output describes two paths:
* 1. Preferred: mcp__gbrain__takes_add op (upstream gbrain v0.42+, T8)
* 2. Fallback: mcp__gbrain__put_page with a gstack:takes fence block
*
* Until T8 ships, the fallback is the only path. Verify the resolver output
* mentions the fence-block fallback explicitly so the agent knows what to
* do when takes_add returns MCPMethodNotFound.
*
* Gate-tier, free, pure import + render.
*/
import { describe, test, expect } from 'bun:test';
import { generateBrainWriteBack } from '../scripts/resolvers/gbrain';
import { SKILL_DIGEST_SUBSETS, SKILL_CALIBRATION_WEIGHTS } from '../scripts/brain-cache-spec';
import { HOST_PATHS } from '../scripts/resolvers/types';
import type { TemplateContext } from '../scripts/resolvers/types';
function buildCtx(skillName: string): TemplateContext {
return {
skillName,
tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
host: 'claude',
paths: HOST_PATHS.claude,
};
}
describe('Phase 2 write-back fence-block fallback', () => {
test('every preflight skill emits write-back with fallback path documented', () => {
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
const out = generateBrainWriteBack(buildCtx(skill));
// Mentions takes_add (preferred)
expect(out).toContain('takes_add');
// Mentions put_page fallback
expect(out).toContain('put_page');
// Mentions the takes fence-block syntax
expect(out).toContain('takes');
}
});
test('write-back guidance gates on BRAIN_CALIBRATION_WRITEBACK feature flag', () => {
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
const out = generateBrainWriteBack(buildCtx(skill));
expect(out).toContain('BRAIN_CALIBRATION_WRITEBACK');
}
});
test('write-back guidance gates on brain_trust_policy == personal', () => {
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
const out = generateBrainWriteBack(buildCtx(skill));
expect(out).toContain('personal');
expect(out).toContain('brain_trust_policy');
}
});
test('write-back emits the kind=bet take frontmatter shape', () => {
const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
expect(out).toContain('kind: bet');
expect(out).toContain('holder:');
expect(out).toContain('claim:');
expect(out).toContain('weight:');
expect(out).toContain('since_date:');
expect(out).toContain('expected_resolution:');
expect(out).toContain('source_skill:');
});
test('per-skill weight matches SKILL_CALIBRATION_WEIGHTS', () => {
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
const weight = SKILL_CALIBRATION_WEIGHTS[skill];
if (weight == null) continue;
const out = generateBrainWriteBack(buildCtx(skill));
expect(out).toContain(`weight: ${weight}`);
}
});
test('write-back invalidates affected cache digests after write', () => {
const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
expect(out).toContain('gstack-brain-cache invalidate');
});
test('non-preflight skill gets empty write-back (no Phase 2 path)', () => {
expect(generateBrainWriteBack(buildCtx('ship'))).toBe('');
expect(generateBrainWriteBack(buildCtx('qa'))).toBe('');
});
});
+161
View File
@@ -0,0 +1,161 @@
/**
* User-slug identity resolution chain (T16 / D4 A3).
*
* Verifies the gstack-config resolve-user-slug subcommand walks the
* documented fallback chain:
* 1. mcp__gbrain__whoami.client_name (skipped when gbrain not on PATH)
* 2. $USER env var
* 3. sha8($(git config user.email))
* 4. anonymous-<sha8(hostname)>
*
* Result is persisted under user_slug_at_<endpoint-hash> for stability.
* Test isolation via GSTACK_HOME and HOME env overrides.
*
* Gate-tier, free, ~50ms.
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import { mkdtempSync, existsSync, readFileSync, writeFileSync, rmSync, mkdirSync } from 'fs';
import { join } from 'path';
import { tmpdir } from 'os';
import { spawnSync } from 'child_process';
const REPO_ROOT = process.cwd();
const CONFIG_BIN = join(REPO_ROOT, 'bin', 'gstack-config');
let TMP_HOME: string;
const ORIGINAL = {
HOME: process.env.HOME,
GSTACK_HOME: process.env.GSTACK_HOME,
USER: process.env.USER,
};
function runConfig(args: string[], extraEnv: Record<string, string> = {}): { stdout: string; status: number; stderr: string } {
const result = spawnSync(CONFIG_BIN, args, {
encoding: 'utf-8',
env: {
...process.env,
...extraEnv,
},
timeout: 5000,
});
return { stdout: result.stdout || '', status: result.status ?? -1, stderr: result.stderr || '' };
}
beforeEach(() => {
TMP_HOME = mkdtempSync(join(tmpdir(), 'gstack-user-slug-test-'));
process.env.GSTACK_HOME = TMP_HOME;
});
afterEach(() => {
for (const [k, v] of Object.entries(ORIGINAL)) {
if (v !== undefined) process.env[k] = v;
else delete (process.env as Record<string, unknown>)[k];
}
try { rmSync(TMP_HOME, { recursive: true, force: true }); } catch { /* best effort */ }
});
describe('endpoint-hash subcommand', () => {
test('returns deterministic 8-char hex or literal "local"', () => {
const result = runConfig(['endpoint-hash'], { GSTACK_HOME: TMP_HOME });
expect(result.status).toBe(0);
const out = result.stdout.trim();
expect(out === 'local' || /^[a-f0-9]{8}$/.test(out) || /^[a-f0-9]{16}$/.test(out)).toBe(true);
});
});
describe('resolve-user-slug fallback chain', () => {
test('uses $USER when set (layer 2)', () => {
const result = runConfig(['resolve-user-slug'], { GSTACK_HOME: TMP_HOME, USER: 'alice-test' });
expect(result.status).toBe(0);
expect(result.stdout.trim()).toBe('alice-test');
});
test('lowercases + dash-normalizes $USER', () => {
const result = runConfig(['resolve-user-slug'], { GSTACK_HOME: TMP_HOME, USER: 'Alice Test' });
expect(result.status).toBe(0);
// Spaces become dashes, uppercase becomes lowercase
expect(result.stdout.trim()).toMatch(/^alice-test$/i);
});
test('falls through past empty $USER to git email or anonymous', () => {
const result = runConfig(['resolve-user-slug'], { GSTACK_HOME: TMP_HOME, USER: '' });
expect(result.status).toBe(0);
const slug = result.stdout.trim();
expect(slug.length).toBeGreaterThan(0);
// Should be either email-<sha8> or anonymous-<sha8>
expect(slug).toMatch(/^(email-|anonymous-)[a-f0-9]+$|^[a-zA-Z0-9-]+$/);
});
test('persists resolution to user_slug_at_<hash> on first call', () => {
runConfig(['resolve-user-slug'], { GSTACK_HOME: TMP_HOME, USER: 'persisttest' });
const configFile = join(TMP_HOME, 'config.yaml');
expect(existsSync(configFile)).toBe(true);
const content = readFileSync(configFile, 'utf-8');
expect(content).toMatch(/^user_slug_at_[a-f0-9]+:\s+persisttest/m);
});
test('subsequent calls return same slug (stable across sessions)', () => {
const first = runConfig(['resolve-user-slug'], { GSTACK_HOME: TMP_HOME, USER: 'stabletest' });
const second = runConfig(['resolve-user-slug'], { GSTACK_HOME: TMP_HOME, USER: 'changed-after' });
// Second call ignores new $USER because the slug was already persisted.
expect(first.stdout.trim()).toBe('stabletest');
expect(second.stdout.trim()).toBe('stabletest');
});
});
describe('brain_trust_policy@<hash> namespace', () => {
test('default value is "unset"', () => {
const result = runConfig(['get', 'brain_trust_policy@deadbeef'], { GSTACK_HOME: TMP_HOME });
expect(result.status).toBe(0);
expect(result.stdout).toBe('unset');
});
test('set + get roundtrip works', () => {
const setResult = runConfig(['set', 'brain_trust_policy@deadbeef', 'personal'], { GSTACK_HOME: TMP_HOME });
expect(setResult.status).toBe(0);
const getResult = runConfig(['get', 'brain_trust_policy@deadbeef'], { GSTACK_HOME: TMP_HOME });
expect(getResult.stdout).toBe('personal');
});
test('invalid value falls back to unset with warning', () => {
const result = runConfig(['set', 'brain_trust_policy@deadbeef', 'invalid-value'], { GSTACK_HOME: TMP_HOME });
expect(result.status).toBe(0);
expect(result.stderr).toContain('not recognized');
const getResult = runConfig(['get', 'brain_trust_policy@deadbeef'], { GSTACK_HOME: TMP_HOME });
expect(getResult.stdout).toBe('unset');
});
test('shared value accepted', () => {
runConfig(['set', 'brain_trust_policy@deadbeef', 'shared'], { GSTACK_HOME: TMP_HOME });
const getResult = runConfig(['get', 'brain_trust_policy@deadbeef'], { GSTACK_HOME: TMP_HOME });
expect(getResult.stdout).toBe('shared');
});
test('per-endpoint policies dont collide', () => {
runConfig(['set', 'brain_trust_policy@aaaaaaaa', 'personal'], { GSTACK_HOME: TMP_HOME });
runConfig(['set', 'brain_trust_policy@bbbbbbbb', 'shared'], { GSTACK_HOME: TMP_HOME });
const a = runConfig(['get', 'brain_trust_policy@aaaaaaaa'], { GSTACK_HOME: TMP_HOME });
const b = runConfig(['get', 'brain_trust_policy@bbbbbbbb'], { GSTACK_HOME: TMP_HOME });
expect(a.stdout).toBe('personal');
expect(b.stdout).toBe('shared');
});
});
describe('key validation', () => {
test('rejects keys with disallowed characters', () => {
const result = runConfig(['get', 'bad-key'], { GSTACK_HOME: TMP_HOME });
expect(result.status).not.toBe(0);
expect(result.stderr).toContain('alphanumeric');
});
test('accepts plain alphanumeric/underscore keys', () => {
const result = runConfig(['get', 'proactive'], { GSTACK_HOME: TMP_HOME });
expect(result.status).toBe(0);
});
test('accepts @<hex-hash> suffix on key', () => {
const result = runConfig(['get', 'brain_trust_policy@abc123ff'], { GSTACK_HOME: TMP_HOME });
expect(result.status).toBe(0);
});
});