mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-19 08:10:08 +02:00
Merge remote-tracking branch 'origin/main' into garrytan/pdf-emoji-font-fallback
# Conflicts: # CHANGELOG.md # VERSION # package.json
This commit is contained in:
@@ -0,0 +1,164 @@
|
||||
/**
|
||||
* brain-cache roundtrip integration tests (T2a / T19).
|
||||
*
|
||||
* Exercises the non-MCP-dependent parts of the cache layer:
|
||||
* - Path resolution per scope (cross-project vs per-project)
|
||||
* - Atomic _meta.json write/read
|
||||
* - TTL staleness detection
|
||||
* - Invalidate clears last_refresh
|
||||
* - Schema-version mismatch triggers rebuild attempt (D4 A4)
|
||||
* - Endpoint switch triggers rebuild attempt
|
||||
*
|
||||
* The brain-reachable refresh path (MCP fetch + compress) is tested
|
||||
* separately in brain-cache-stale-but-usable.test.ts using a mocked
|
||||
* spawnGbrain. T2a focuses on the cache-state machine.
|
||||
*
|
||||
* Uses tmp GSTACK_HOME per-test to avoid polluting the real ~/.gstack/.
|
||||
* Gate-tier, free, ~50ms.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import { mkdtempSync, existsSync, writeFileSync, readFileSync, rmSync, mkdirSync, readdirSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
|
||||
let TMP_HOME: string;
|
||||
const ORIGINAL_HOME = process.env.GSTACK_HOME;
|
||||
|
||||
beforeEach(() => {
|
||||
TMP_HOME = mkdtempSync(join(tmpdir(), 'gstack-cache-test-'));
|
||||
process.env.GSTACK_HOME = TMP_HOME;
|
||||
// Reload the cache module fresh per test so it picks up the new HOME.
|
||||
delete require.cache[require.resolve('../bin/gstack-brain-cache')];
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (ORIGINAL_HOME) process.env.GSTACK_HOME = ORIGINAL_HOME;
|
||||
else delete process.env.GSTACK_HOME;
|
||||
try { rmSync(TMP_HOME, { recursive: true, force: true }); } catch { /* best effort */ }
|
||||
});
|
||||
|
||||
async function importCache(): Promise<typeof import('../bin/gstack-brain-cache')> {
|
||||
return (await import('../bin/gstack-brain-cache')) as typeof import('../bin/gstack-brain-cache');
|
||||
}
|
||||
|
||||
describe('brain-cache paths', () => {
|
||||
test('cross-project entity (user-profile) lives in ~/.gstack/brain-cache/', async () => {
|
||||
const mod = await importCache();
|
||||
const path = mod.entityPath('user-profile', null);
|
||||
expect(path).toBe(join(TMP_HOME, 'brain-cache', 'user-profile.md'));
|
||||
});
|
||||
|
||||
test('per-project entity (product) lives in ~/.gstack/projects/<slug>/brain-cache/', async () => {
|
||||
const mod = await importCache();
|
||||
const path = mod.entityPath('product', 'helsinki');
|
||||
expect(path).toBe(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', 'product.md'));
|
||||
});
|
||||
|
||||
test('throws on unknown entity', async () => {
|
||||
const mod = await importCache();
|
||||
expect(() => mod.entityPath('not-an-entity', null)).toThrow();
|
||||
});
|
||||
|
||||
test('per-project entity without slug throws', async () => {
|
||||
const mod = await importCache();
|
||||
expect(() => mod.entityPath('product', null)).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('brain-cache meta lifecycle', () => {
|
||||
test('cmdMeta on empty cache returns valid fresh meta', async () => {
|
||||
const mod = await importCache();
|
||||
const meta = mod.cmdMeta('helsinki');
|
||||
expect(meta.schema_version).toMatch(/^\d+\.\d+\.\d+$/);
|
||||
expect(meta.endpoint_hash).toMatch(/^[a-f0-9]{1,8}$|^local$/);
|
||||
expect(meta.last_refresh).toEqual({});
|
||||
});
|
||||
|
||||
test('cmdInvalidate writes meta even if no prior refresh', async () => {
|
||||
const mod = await importCache();
|
||||
mod.cmdInvalidate('product', 'helsinki');
|
||||
const meta = mod.cmdMeta('helsinki');
|
||||
// last_refresh remains empty (we just delete an absent key — that's a no-op
|
||||
// but the meta file is now written to disk).
|
||||
expect(meta.last_refresh.product).toBeUndefined();
|
||||
expect(existsSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '_meta.json'))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('brain-cache endpoint detection', () => {
|
||||
test('detectEndpointHash returns "local" when no ~/.claude.json gbrain MCP', async () => {
|
||||
// We don't write ~/.claude.json in the temp env, so this falls through to local.
|
||||
const mod = await importCache();
|
||||
// The user's real ~/.claude.json may have an MCP server; in that case the hash
|
||||
// will be a real sha8. Either way, it's a stable string.
|
||||
const hash = mod.detectEndpointHash();
|
||||
expect(typeof hash).toBe('string');
|
||||
expect(hash.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('brain-cache schema mismatch behavior', () => {
|
||||
test('schema-version mismatch in meta triggers full-rebuild attempt on next get', async () => {
|
||||
const mod = await importCache();
|
||||
// Pre-seed meta with a different schema version, and a cache file that's
|
||||
// recent enough to be "warm" by TTL but stale by schema version.
|
||||
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
|
||||
mkdirSync(cacheDir, { recursive: true });
|
||||
writeFileSync(join(cacheDir, 'product.md'), '# stale-from-old-schema\n');
|
||||
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
|
||||
schema_version: '0.0.1',
|
||||
endpoint_hash: mod.detectEndpointHash(),
|
||||
last_refresh: { product: Date.now() },
|
||||
last_attempt: {},
|
||||
}));
|
||||
|
||||
const result = mod.cmdGet('product', 'helsinki');
|
||||
// Brain is unreachable in this test (no gbrain mock), so refresh fails and
|
||||
// the file gets deleted by the rebuild step. State should be 'missing' or
|
||||
// 'stale-fallback' depending on whether the rebuild left a file behind.
|
||||
expect(['missing', 'cold-refreshed', 'stale-fallback']).toContain(result.state);
|
||||
});
|
||||
});
|
||||
|
||||
describe('brain-cache state machine', () => {
|
||||
test('warm: pre-seeded fresh cache returns warm without touching brain', async () => {
|
||||
const mod = await importCache();
|
||||
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
|
||||
mkdirSync(cacheDir, { recursive: true });
|
||||
const productContent = '# Product: helsinki\n\nA test product.\n';
|
||||
writeFileSync(join(cacheDir, 'product.md'), productContent);
|
||||
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
|
||||
schema_version: '1.0.0', // matches GSTACK_SCHEMA_PACK_VERSION
|
||||
endpoint_hash: mod.detectEndpointHash(),
|
||||
last_refresh: { product: Date.now() }, // fresh
|
||||
last_attempt: {},
|
||||
}));
|
||||
const result = mod.cmdGet('product', 'helsinki');
|
||||
expect(result.state).toBe('warm');
|
||||
expect(readFileSync(result.path, 'utf-8')).toBe(productContent);
|
||||
});
|
||||
|
||||
test('missing: no cache + no brain returns missing state', async () => {
|
||||
const mod = await importCache();
|
||||
const result = mod.cmdGet('brand', 'helsinki');
|
||||
expect(result.state).toBe('missing');
|
||||
});
|
||||
|
||||
test('stale-fallback: stale cache with unreachable brain returns stale-fallback', async () => {
|
||||
const mod = await importCache();
|
||||
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
|
||||
mkdirSync(cacheDir, { recursive: true });
|
||||
writeFileSync(join(cacheDir, 'product.md'), '# stale\n');
|
||||
// Set last_refresh way in the past (> 1d TTL for product)
|
||||
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
|
||||
schema_version: '1.0.0',
|
||||
endpoint_hash: mod.detectEndpointHash(),
|
||||
last_refresh: { product: 0 }, // epoch start = very stale
|
||||
last_attempt: {},
|
||||
}));
|
||||
const result = mod.cmdGet('product', 'helsinki');
|
||||
// Brain unreachable → cold refresh fails → stale-but-usable fallback
|
||||
expect(result.state).toBe('stale-fallback');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,169 @@
|
||||
/**
|
||||
* Brain cache spec internal-consistency invariants (T14 / D2).
|
||||
*
|
||||
* Asserts that scripts/brain-cache-spec.ts is self-consistent:
|
||||
* - Every skill's subset only references entities that exist.
|
||||
* - Per-skill budget cap is achievable given per-entity caps.
|
||||
* - Cross-project entities are clearly distinguished from per-project.
|
||||
* - Invalidation graph has no dangling skill references.
|
||||
* - Helper functions throw on unknown names (defensive).
|
||||
*
|
||||
* Gate-tier, free, pure import + assertion. Runs in <100ms.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import {
|
||||
BRAIN_CACHE_ENTITIES,
|
||||
SKILL_DIGEST_SUBSETS,
|
||||
SKILL_PREFLIGHT_BUDGET_BYTES,
|
||||
AUTOPLAN_PREFLIGHT_BUDGET_BYTES,
|
||||
SALIENCE_DEFAULT_ALLOWLIST,
|
||||
SKILL_CALIBRATION_WEIGHTS,
|
||||
TRANSPORT_DEFAULT_POLICY,
|
||||
USER_SLUG_RESOLUTION_ORDER,
|
||||
GSTACK_SCHEMA_PACK_NAME,
|
||||
GSTACK_SCHEMA_PACK_VERSION,
|
||||
CACHE_REFRESH_LOCK_TIMEOUT_MS,
|
||||
SKILL_RUN_RETENTION_DAYS,
|
||||
getCacheFile,
|
||||
getSkillSubset,
|
||||
getSkillBudget,
|
||||
getInvalidationTargets,
|
||||
getPreflightSkills,
|
||||
getMaxSubsetBytes,
|
||||
} from '../scripts/brain-cache-spec';
|
||||
|
||||
describe('brain-cache-spec internal consistency', () => {
|
||||
test('every skill subset references only known entities', () => {
|
||||
const entityNames = new Set(Object.keys(BRAIN_CACHE_ENTITIES));
|
||||
for (const [skill, subset] of Object.entries(SKILL_DIGEST_SUBSETS)) {
|
||||
for (const name of subset) {
|
||||
expect(entityNames.has(name)).toBe(true);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('every skill with a subset has a budget', () => {
|
||||
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
|
||||
expect(SKILL_PREFLIGHT_BUDGET_BYTES[skill]).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
|
||||
test('per-skill budget is achievable given per-entity budgets', () => {
|
||||
// Per-entity budgets are hard ceilings on each digest's own file size.
|
||||
// Per-skill budget is enforced by the compressor on the SUM injected into
|
||||
// the skill's preflight context — the same entity may be sampled (top-N)
|
||||
// rather than verbatim. So sum may legitimately exceed skill budget; the
|
||||
// compressor trims at write time. We allow up to 3x as a sanity ceiling
|
||||
// (caught test/skill-preflight-budget.test.ts enforces the real cap).
|
||||
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
|
||||
const maxBytes = getMaxSubsetBytes(skill);
|
||||
const skillBudget = getSkillBudget(skill);
|
||||
expect(maxBytes).toBeLessThanOrEqual(skillBudget * 3);
|
||||
}
|
||||
});
|
||||
|
||||
test('autoplan total budget covers the 4 plan-* skills (excluding office-hours)', () => {
|
||||
const autoplanSkills = ['plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review'];
|
||||
const sum = autoplanSkills.reduce((acc, s) => acc + getSkillBudget(s), 0);
|
||||
expect(sum).toBeLessThanOrEqual(AUTOPLAN_PREFLIGHT_BUDGET_BYTES);
|
||||
});
|
||||
|
||||
test('every entity has a positive TTL and a positive budget', () => {
|
||||
for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) {
|
||||
expect(entity.ttl_ms).toBeGreaterThan(0);
|
||||
expect(entity.budget_bytes).toBeGreaterThan(0);
|
||||
expect(entity.file).toMatch(/\.md$/);
|
||||
expect(['cross-project', 'per-project']).toContain(entity.scope);
|
||||
}
|
||||
});
|
||||
|
||||
test('user-profile is the only cross-project entity', () => {
|
||||
const crossProject = Object.entries(BRAIN_CACHE_ENTITIES)
|
||||
.filter(([_, e]) => e.scope === 'cross-project')
|
||||
.map(([n]) => n);
|
||||
expect(crossProject).toEqual(['user-profile']);
|
||||
});
|
||||
|
||||
test('salience entity has shortest TTL (changes hourly)', () => {
|
||||
const ttls = Object.values(BRAIN_CACHE_ENTITIES).map((e) => e.ttl_ms);
|
||||
expect(BRAIN_CACHE_ENTITIES.salience.ttl_ms).toBe(Math.min(...ttls));
|
||||
});
|
||||
|
||||
test('salience allowlist has sane defaults (no personal/family/therapy)', () => {
|
||||
const blocked = ['personal/', 'family/', 'therapy/', 'reflection'];
|
||||
for (const prefix of blocked) {
|
||||
expect(SALIENCE_DEFAULT_ALLOWLIST.some((p) => p.startsWith(prefix))).toBe(false);
|
||||
}
|
||||
// Must contain at least projects/ + gstack/ (work-flow surfaces)
|
||||
expect(SALIENCE_DEFAULT_ALLOWLIST).toContain('projects/');
|
||||
expect(SALIENCE_DEFAULT_ALLOWLIST).toContain('gstack/');
|
||||
});
|
||||
|
||||
test('calibration weights are bounded 0-1 and present for all preflight skills', () => {
|
||||
for (const skill of getPreflightSkills()) {
|
||||
const weight = SKILL_CALIBRATION_WEIGHTS[skill];
|
||||
expect(weight).toBeGreaterThan(0);
|
||||
expect(weight).toBeLessThanOrEqual(1);
|
||||
}
|
||||
});
|
||||
|
||||
test('transport policy defaults exist for all transport modes', () => {
|
||||
const required = ['local-pglite', 'local-stdio', 'remote-http-single-tenant', 'remote-http-ambiguous'];
|
||||
for (const transport of required) {
|
||||
expect(TRANSPORT_DEFAULT_POLICY[transport]).toBeDefined();
|
||||
}
|
||||
// Local transports must default personal (D4 / Phase 1.5 default rule)
|
||||
expect(TRANSPORT_DEFAULT_POLICY['local-pglite']).toBe('personal');
|
||||
expect(TRANSPORT_DEFAULT_POLICY['local-stdio']).toBe('personal');
|
||||
// Ambiguous remote MUST require explicit ask (never silent default)
|
||||
expect(TRANSPORT_DEFAULT_POLICY['remote-http-ambiguous']).toBe('unset');
|
||||
});
|
||||
|
||||
test('user-slug resolution chain has 4 deterministic fallbacks ending in non-empty', () => {
|
||||
expect(USER_SLUG_RESOLUTION_ORDER.length).toBe(4);
|
||||
expect(USER_SLUG_RESOLUTION_ORDER[USER_SLUG_RESOLUTION_ORDER.length - 1]).toBe('anonymous_hostname_sha8');
|
||||
});
|
||||
|
||||
test('schema pack identity is stable strings', () => {
|
||||
expect(GSTACK_SCHEMA_PACK_NAME).toBe('gstack-core');
|
||||
expect(GSTACK_SCHEMA_PACK_VERSION).toMatch(/^\d+\.\d+\.\d+$/);
|
||||
});
|
||||
|
||||
test('refresh lock timeout matches /sync-gbrain convention (5 min)', () => {
|
||||
expect(CACHE_REFRESH_LOCK_TIMEOUT_MS).toBe(5 * 60_000);
|
||||
});
|
||||
|
||||
test('skill-run retention is 90 days per D10 lifecycle policy', () => {
|
||||
expect(SKILL_RUN_RETENTION_DAYS).toBe(90);
|
||||
});
|
||||
|
||||
test('invalidation graph: every "skill-run-write" target also depends on it', () => {
|
||||
// recent-decisions invalidates on skill-run-write — verify the contract holds
|
||||
const targets = getInvalidationTargets('skill-run-write');
|
||||
expect(targets).toContain('recent-decisions');
|
||||
});
|
||||
|
||||
test('invalidation graph: /plan-ceo-review invalidates product + goals + recent-decisions chain', () => {
|
||||
const targets = getInvalidationTargets('/plan-ceo-review');
|
||||
expect(targets).toContain('product');
|
||||
expect(targets).toContain('goals');
|
||||
});
|
||||
|
||||
test('helpers throw on unknown names (defensive)', () => {
|
||||
expect(() => getCacheFile('nonsense-entity')).toThrow();
|
||||
expect(() => getSkillSubset('not-a-skill')).toThrow();
|
||||
expect(() => getSkillBudget('not-a-skill')).toThrow();
|
||||
});
|
||||
|
||||
test('helpers return correct values for known names', () => {
|
||||
expect(getCacheFile('product')).toBe('product.md');
|
||||
expect(getSkillSubset('plan-eng-review')).toEqual(['product', 'recent-decisions']);
|
||||
expect(getSkillBudget('office-hours')).toBe(5120);
|
||||
});
|
||||
|
||||
test('all 5 preflight skills are real planning-skill names', () => {
|
||||
const expected = ['office-hours', 'plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review'];
|
||||
expect(getPreflightSkills().sort()).toEqual(expected.sort());
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,166 @@
|
||||
/**
|
||||
* Brain-aware planning resolver tests (T4 / T19).
|
||||
*
|
||||
* Verifies the three resolvers in scripts/resolvers/gbrain.ts:
|
||||
* - generateBrainPreflight — fires for preflight skills, empty for others
|
||||
* - generateBrainCacheRefresh — same gating
|
||||
* - generateBrainWriteBack — same gating; only weighted skills emit
|
||||
*
|
||||
* Gate-tier, free, pure import + render.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import {
|
||||
generateBrainPreflight,
|
||||
generateBrainCacheRefresh,
|
||||
generateBrainWriteBack,
|
||||
} from '../scripts/resolvers/gbrain';
|
||||
import { SKILL_DIGEST_SUBSETS } from '../scripts/brain-cache-spec';
|
||||
import { HOST_PATHS } from '../scripts/resolvers/types';
|
||||
import type { TemplateContext } from '../scripts/resolvers/types';
|
||||
|
||||
function buildCtx(skillName: string): TemplateContext {
|
||||
return {
|
||||
skillName,
|
||||
tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
|
||||
host: 'claude',
|
||||
paths: HOST_PATHS.claude,
|
||||
};
|
||||
}
|
||||
|
||||
describe('generateBrainPreflight', () => {
|
||||
test('emits content for every registered preflight skill', () => {
|
||||
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
|
||||
const out = generateBrainPreflight(buildCtx(skill));
|
||||
expect(out.length).toBeGreaterThan(0);
|
||||
expect(out).toContain('## Brain Context');
|
||||
expect(out).toContain('gstack-brain-cache get');
|
||||
}
|
||||
});
|
||||
|
||||
test('emits empty string for non-preflight skills (no behavior)', () => {
|
||||
const nonPlanning = ['ship', 'qa', 'investigate', 'retro', 'design-review'];
|
||||
for (const skill of nonPlanning) {
|
||||
expect(generateBrainPreflight(buildCtx(skill))).toBe('');
|
||||
}
|
||||
});
|
||||
|
||||
test('includes per-skill subset entities (office-hours loads 5 digests)', () => {
|
||||
const out = generateBrainPreflight(buildCtx('office-hours'));
|
||||
// office-hours loads: product, goals, user-profile, recent-decisions, salience
|
||||
expect(out).toContain('product');
|
||||
expect(out).toContain('goals');
|
||||
expect(out).toContain('user-profile');
|
||||
expect(out).toContain('recent-decisions');
|
||||
expect(out).toContain('salience');
|
||||
});
|
||||
|
||||
test('plan-eng-review loads minimal subset (2 digests)', () => {
|
||||
const out = generateBrainPreflight(buildCtx('plan-eng-review'));
|
||||
expect(out).toContain('product');
|
||||
expect(out).toContain('recent-decisions');
|
||||
// Should NOT load brand or developer-persona
|
||||
expect(out).not.toContain('gstack-brain-cache get brand');
|
||||
expect(out).not.toContain('gstack-brain-cache get developer-persona');
|
||||
});
|
||||
|
||||
test('mentions D9 salience privacy in the prose (transparency)', () => {
|
||||
const out = generateBrainPreflight(buildCtx('office-hours'));
|
||||
expect(out.toLowerCase()).toContain('privacy');
|
||||
expect(out.toLowerCase()).toContain('allowlist');
|
||||
});
|
||||
|
||||
test('user-profile is loaded WITHOUT --project flag (cross-project)', () => {
|
||||
const out = generateBrainPreflight(buildCtx('office-hours'));
|
||||
const userProfileLine = out.split('\n').find((l) => l.includes('user-profile')) || '';
|
||||
// user-profile is cross-project; the get call should NOT have --project
|
||||
// (the only --project mentions on that line are inside the comment, not in the get call)
|
||||
const getLine = out.split('\n').find((l) => l.includes('gstack-brain-cache get user-profile')) || '';
|
||||
expect(getLine).not.toContain('--project');
|
||||
});
|
||||
|
||||
test('per-project entities are loaded WITH --project "$SLUG"', () => {
|
||||
const out = generateBrainPreflight(buildCtx('plan-eng-review'));
|
||||
expect(out).toContain('--project "$SLUG"');
|
||||
});
|
||||
});
|
||||
|
||||
describe('generateBrainCacheRefresh', () => {
|
||||
test('emits refresh hook for preflight skills', () => {
|
||||
const out = generateBrainCacheRefresh(buildCtx('plan-ceo-review'));
|
||||
expect(out).toContain('Background Refresh');
|
||||
expect(out).toContain('gstack-brain-cache refresh');
|
||||
});
|
||||
|
||||
test('empty for non-preflight skills', () => {
|
||||
expect(generateBrainCacheRefresh(buildCtx('ship'))).toBe('');
|
||||
});
|
||||
|
||||
test('uses background backgrounding (does not block user)', () => {
|
||||
const out = generateBrainCacheRefresh(buildCtx('plan-ceo-review'));
|
||||
// Background refresh fires the cache refresh in a detached process
|
||||
expect(out).toContain('&');
|
||||
});
|
||||
});
|
||||
|
||||
describe('generateBrainWriteBack', () => {
|
||||
test('emits write-back block for all 5 weighted preflight skills', () => {
|
||||
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
|
||||
const out = generateBrainWriteBack(buildCtx(skill));
|
||||
expect(out.length).toBeGreaterThan(0);
|
||||
expect(out).toContain('Calibration Write-Back');
|
||||
expect(out).toContain('BRAIN_CALIBRATION_WRITEBACK');
|
||||
}
|
||||
});
|
||||
|
||||
test('empty for non-preflight skills', () => {
|
||||
expect(generateBrainWriteBack(buildCtx('ship'))).toBe('');
|
||||
});
|
||||
|
||||
test('includes per-skill calibration weight (E5)', () => {
|
||||
const ceo = generateBrainWriteBack(buildCtx('plan-ceo-review'));
|
||||
expect(ceo).toContain('weight: 0.8'); // SKILL_CALIBRATION_WEIGHTS['plan-ceo-review'] = 0.8
|
||||
|
||||
const office = generateBrainWriteBack(buildCtx('office-hours'));
|
||||
expect(office).toContain('weight: 0.9'); // strongest calibration weight
|
||||
|
||||
const design = generateBrainWriteBack(buildCtx('plan-design-review'));
|
||||
expect(design).toContain('weight: 0.5'); // weakest (design predictions are noisy)
|
||||
});
|
||||
|
||||
test('mentions personal trust policy gate (D11 codex tension)', () => {
|
||||
const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
|
||||
expect(out.toLowerCase()).toContain('personal');
|
||||
expect(out).toContain('brain_trust_policy');
|
||||
});
|
||||
|
||||
test('mentions fallback path when takes_add MCP op unavailable (upstream T8)', () => {
|
||||
const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
|
||||
expect(out).toContain('put_page');
|
||||
expect(out).toContain('takes');
|
||||
});
|
||||
|
||||
test('emits invalidation bash for affected cache digests', () => {
|
||||
const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
|
||||
// plan-ceo-review invalidates: product, goals, competitive-intel
|
||||
expect(out).toContain('gstack-brain-cache invalidate');
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolver registration in index.ts', () => {
|
||||
test('BRAIN_PREFLIGHT placeholder is registered', async () => {
|
||||
const { RESOLVERS } = await import('../scripts/resolvers/index');
|
||||
expect(RESOLVERS.BRAIN_PREFLIGHT).toBeDefined();
|
||||
expect(typeof RESOLVERS.BRAIN_PREFLIGHT).toBe('function');
|
||||
});
|
||||
|
||||
test('BRAIN_CACHE_REFRESH placeholder is registered', async () => {
|
||||
const { RESOLVERS } = await import('../scripts/resolvers/index');
|
||||
expect(RESOLVERS.BRAIN_CACHE_REFRESH).toBeDefined();
|
||||
});
|
||||
|
||||
test('BRAIN_WRITE_BACK placeholder is registered', async () => {
|
||||
const { RESOLVERS } = await import('../scripts/resolvers/index');
|
||||
expect(RESOLVERS.BRAIN_WRITE_BACK).toBeDefined();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,153 @@
|
||||
/**
|
||||
* Concurrent-refresh lockfile dedup (T15 / D3).
|
||||
*
|
||||
* When autoplan dispatches 4 planning skills back-to-back and they all hit a
|
||||
* cold-miss on the same digest, only ONE should actually fetch from the brain;
|
||||
* the rest dedup via the project-scoped lockfile at
|
||||
* ~/.gstack/projects/<slug>/brain-cache/.refresh.lock. Stale locks (process
|
||||
* dead, or older than CACHE_REFRESH_LOCK_TIMEOUT_MS) are taken over.
|
||||
*
|
||||
* Gate-tier, free, pure file-IO. Uses tmp GSTACK_HOME.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import { mkdtempSync, existsSync, writeFileSync, readFileSync, rmSync, mkdirSync, unlinkSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir, hostname } from 'os';
|
||||
|
||||
let TMP_HOME: string;
|
||||
const ORIGINAL_HOME = process.env.GSTACK_HOME;
|
||||
|
||||
beforeEach(() => {
|
||||
TMP_HOME = mkdtempSync(join(tmpdir(), 'gstack-lock-test-'));
|
||||
process.env.GSTACK_HOME = TMP_HOME;
|
||||
delete require.cache[require.resolve('../bin/gstack-brain-cache')];
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (ORIGINAL_HOME) process.env.GSTACK_HOME = ORIGINAL_HOME;
|
||||
else delete process.env.GSTACK_HOME;
|
||||
try { rmSync(TMP_HOME, { recursive: true, force: true }); } catch { /* best effort */ }
|
||||
});
|
||||
|
||||
async function importCache(): Promise<typeof import('../bin/gstack-brain-cache')> {
|
||||
return (await import('../bin/gstack-brain-cache')) as typeof import('../bin/gstack-brain-cache');
|
||||
}
|
||||
|
||||
describe('concurrent-refresh lockfile dedup', () => {
|
||||
test('first caller acquires lock; second concurrent caller deduplicates', async () => {
|
||||
const mod = await importCache();
|
||||
// Pre-create dirs to avoid Race On First Use.
|
||||
mkdirSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache'), { recursive: true });
|
||||
|
||||
let callbackRan = 0;
|
||||
// Hold the lock by entering withRefreshLock and stalling inside the callback.
|
||||
let outerResolve: (() => void) | null = null;
|
||||
const outer = new Promise<void>((r) => { outerResolve = r; });
|
||||
|
||||
const outerCall = (async () => {
|
||||
const result = mod.withRefreshLock('helsinki', () => {
|
||||
callbackRan++;
|
||||
// Block until the test signals release.
|
||||
const start = Date.now();
|
||||
while (!outerResolve) { /* spin briefly */ if (Date.now() - start > 100) break; }
|
||||
return 'first';
|
||||
});
|
||||
return result;
|
||||
})();
|
||||
|
||||
// Give outer call a tick to acquire lock.
|
||||
await new Promise((r) => setTimeout(r, 10));
|
||||
|
||||
// Inner call should dedup since the lock file exists with a fresh ts.
|
||||
// Manually verify by writing a fake lock and checking tryAcquireLock returns dedup.
|
||||
const lockFile = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '.refresh.lock');
|
||||
// Outer call already completed since the sync callback returns immediately.
|
||||
// Stand up an artificial lock to simulate concurrent in-flight refresh.
|
||||
writeFileSync(lockFile, JSON.stringify({
|
||||
pid: 999999, // unlikely-to-exist pid on host
|
||||
host: 'some-other-host',
|
||||
ts: Date.now(),
|
||||
}));
|
||||
const innerResult = mod.withRefreshLock('helsinki', () => 'inner');
|
||||
expect(innerResult).toBe('dedup');
|
||||
|
||||
// Cleanup
|
||||
try { unlinkSync(lockFile); } catch { /* best effort */ }
|
||||
|
||||
await outerCall;
|
||||
});
|
||||
|
||||
test('stale lock (older than timeout) is taken over', async () => {
|
||||
const mod = await importCache();
|
||||
mkdirSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache'), { recursive: true });
|
||||
const lockFile = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '.refresh.lock');
|
||||
// Lock is 10 minutes old — way past the 5-min timeout.
|
||||
writeFileSync(lockFile, JSON.stringify({
|
||||
pid: 999999,
|
||||
host: 'some-other-host',
|
||||
ts: Date.now() - 10 * 60_000,
|
||||
}));
|
||||
const result = mod.withRefreshLock('helsinki', () => 'took-over');
|
||||
expect(result).toBe('took-over');
|
||||
});
|
||||
|
||||
test('lock from same host with dead PID is taken over', async () => {
|
||||
const mod = await importCache();
|
||||
mkdirSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache'), { recursive: true });
|
||||
const lockFile = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '.refresh.lock');
|
||||
// Same host, but PID 999999 which is unlikely to exist.
|
||||
writeFileSync(lockFile, JSON.stringify({
|
||||
pid: 999999,
|
||||
host: hostname(),
|
||||
ts: Date.now(),
|
||||
}));
|
||||
const result = mod.withRefreshLock('helsinki', () => 'took-over-dead-pid');
|
||||
expect(result).toBe('took-over-dead-pid');
|
||||
});
|
||||
|
||||
test('lock is released after callback runs', async () => {
|
||||
const mod = await importCache();
|
||||
mkdirSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache'), { recursive: true });
|
||||
const lockFile = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '.refresh.lock');
|
||||
|
||||
mod.withRefreshLock('helsinki', () => 'done');
|
||||
|
||||
expect(existsSync(lockFile)).toBe(false);
|
||||
});
|
||||
|
||||
test('lock is released even when callback throws', async () => {
|
||||
const mod = await importCache();
|
||||
mkdirSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache'), { recursive: true });
|
||||
const lockFile = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '.refresh.lock');
|
||||
|
||||
expect(() => {
|
||||
mod.withRefreshLock('helsinki', () => {
|
||||
throw new Error('callback failed');
|
||||
});
|
||||
}).toThrow();
|
||||
|
||||
expect(existsSync(lockFile)).toBe(false);
|
||||
});
|
||||
|
||||
test('corrupt lock file is taken over (defensive)', async () => {
|
||||
const mod = await importCache();
|
||||
mkdirSync(join(TMP_HOME, 'projects', 'helsinki', 'brain-cache'), { recursive: true });
|
||||
const lockFile = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache', '.refresh.lock');
|
||||
writeFileSync(lockFile, 'not valid json {{{');
|
||||
|
||||
const result = mod.withRefreshLock('helsinki', () => 'recovered');
|
||||
expect(result).toBe('recovered');
|
||||
});
|
||||
|
||||
test('cross-project lock uses ~/.gstack/brain-cache/.refresh.lock', async () => {
|
||||
const mod = await importCache();
|
||||
mkdirSync(join(TMP_HOME, 'brain-cache'), { recursive: true });
|
||||
const lockFile = join(TMP_HOME, 'brain-cache', '.refresh.lock');
|
||||
|
||||
mod.withRefreshLock(null, () => 'cross-project');
|
||||
|
||||
// Lock file was created and then released
|
||||
expect(existsSync(lockFile)).toBe(false); // released
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,30 @@
|
||||
# Founder pitch — pixel.fund
|
||||
|
||||
Founder: Maya Chen (CEO, ex-Stripe), co-founder Aria Patel (CTO,
|
||||
ex-Robinhood). YC W26.
|
||||
|
||||
## What
|
||||
|
||||
A donation-budget tool for solo creators. Set a monthly $ floor for
|
||||
causes you care about, pixel.fund auto-allocates each dollar across your
|
||||
chosen orgs (Direct Relief, GiveDirectly, etc.) the moment a Stripe
|
||||
payout lands. One-line embeddable receipt. 1% platform fee.
|
||||
|
||||
## Traction
|
||||
|
||||
- 2026-04-01 launched private beta with 14 creators from her newsletter
|
||||
- 2026-05-15 hit 51 paying creators, $4,200 MRR
|
||||
- Waitlist of 230 from a single tweet by a tech-Twitter influencer
|
||||
- Two creators asked about a "team plan" (multi-seat) unprompted
|
||||
|
||||
## Status quo
|
||||
|
||||
Creators today either (a) write checks ad-hoc and forget about it, or
|
||||
(b) use Patreon-style platforms where the "cause" is opaque (general
|
||||
fund). Maya talked to 40 creators in YC interviews — 31 said they "want
|
||||
to give more but it's mental overhead."
|
||||
|
||||
## What Maya wants from office hours
|
||||
|
||||
Should she chase the team-plan signal, or go deeper on the solo flow
|
||||
first? She's two weeks from running out of YC dorm food.
|
||||
@@ -0,0 +1,193 @@
|
||||
/**
|
||||
* Regression pin for the setup-time gbrain detection → gen-skill-docs
|
||||
* override (T2 / v1.50.0.0).
|
||||
*
|
||||
* The override mechanism lives in scripts/gen-skill-docs.ts: when invoked
|
||||
* with --respect-detection, it reads ~/.gstack/gbrain-detection.json and
|
||||
* un-suppresses GBRAIN_CONTEXT_LOAD + GBRAIN_SAVE_RESULTS for hosts that
|
||||
* statically list them in suppressedResolvers (claude, codex, slate,
|
||||
* factory, opencode, openclaw, cursor, kiro).
|
||||
*
|
||||
* Tests drive gen-skill-docs as a subprocess against a temp GSTACK_HOME
|
||||
* with each detection state, then assert what landed in the generated
|
||||
* Claude-host SKILL.md. This is end-to-end through the actual override
|
||||
* pipeline — no mocking — so it catches regressions in either the loader
|
||||
* or the suppressedResolvers filter.
|
||||
*
|
||||
* Gate-tier, free, ~3-5s per test (gen-skill-docs runs the full skill
|
||||
* generation against the real repo; --host claude scopes to one host).
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { execFileSync } from 'child_process';
|
||||
import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'fs';
|
||||
import { tmpdir } from 'os';
|
||||
import { join } from 'path';
|
||||
|
||||
const REPO_ROOT = join(import.meta.dir, '..');
|
||||
|
||||
interface FixtureEnv {
|
||||
tmpHome: string;
|
||||
cleanup: () => void;
|
||||
}
|
||||
|
||||
function makeFixture(detectionJson: string | null): FixtureEnv {
|
||||
const tmpHome = mkdtempSync(join(tmpdir(), 'gbrain-detect-test-'));
|
||||
if (detectionJson !== null) {
|
||||
writeFileSync(join(tmpHome, 'gbrain-detection.json'), detectionJson);
|
||||
}
|
||||
return {
|
||||
tmpHome,
|
||||
cleanup: () => {
|
||||
try {
|
||||
rmSync(tmpHome, { recursive: true, force: true });
|
||||
} catch {
|
||||
// best effort
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Run gen-skill-docs with --respect-detection and an isolated GSTACK_HOME.
|
||||
* Returns the regenerated office-hours/SKILL.md content WITHOUT writing
|
||||
* over the committed file: we use --dry-run to keep the working tree
|
||||
* clean, then parse the output via re-reading the committed file... no,
|
||||
* that doesn't work for dry-run since dry-run doesn't write.
|
||||
*
|
||||
* Approach: generate to a temp output dir by running gen-skill-docs in a
|
||||
* temp checkout. Simpler alternative: actually regenerate, snapshot the
|
||||
* file content, then git-checkout the committed version back. We use this
|
||||
* since gen-skill-docs doesn't expose an output-path arg.
|
||||
*/
|
||||
function regenAndSnapshot(opts: {
|
||||
respectDetection: boolean;
|
||||
tmpHome: string;
|
||||
files: string[];
|
||||
}): Map<string, string> {
|
||||
// Save committed content so we can restore after snapshotting.
|
||||
const original = new Map<string, string>();
|
||||
for (const f of opts.files) {
|
||||
original.set(f, readFileSync(join(REPO_ROOT, f), 'utf-8'));
|
||||
}
|
||||
|
||||
const args = [
|
||||
'run',
|
||||
'scripts/gen-skill-docs.ts',
|
||||
'--host',
|
||||
'claude',
|
||||
];
|
||||
if (opts.respectDetection) args.push('--respect-detection');
|
||||
|
||||
try {
|
||||
execFileSync('bun', args, {
|
||||
cwd: REPO_ROOT,
|
||||
env: { ...process.env, GSTACK_HOME: opts.tmpHome },
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
timeout: 30_000,
|
||||
});
|
||||
|
||||
// Snapshot the regenerated content.
|
||||
const snapshot = new Map<string, string>();
|
||||
for (const f of opts.files) {
|
||||
snapshot.set(f, readFileSync(join(REPO_ROOT, f), 'utf-8'));
|
||||
}
|
||||
return snapshot;
|
||||
} finally {
|
||||
// Always restore so the test leaves the working tree clean.
|
||||
for (const [f, content] of original) {
|
||||
writeFileSync(join(REPO_ROOT, f), content);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
describe('gbrain detection override → gen-skill-docs', () => {
|
||||
// Single skill probe is enough to assert the override pipeline. The
|
||||
// resolver unit test (test/resolvers-gbrain-save-results.test.ts) covers
|
||||
// per-skill metadata correctness already.
|
||||
const PROBE_FILES = ['office-hours/SKILL.md'];
|
||||
|
||||
test('with detected:true, Claude-host SKILL.md gains brain-aware blocks', () => {
|
||||
const { tmpHome, cleanup } = makeFixture(
|
||||
JSON.stringify({ gbrain_local_status: 'ok', gbrain_on_path: true, gbrain_version: 'test-0.41.0' }),
|
||||
);
|
||||
try {
|
||||
const snap = regenAndSnapshot({
|
||||
respectDetection: true,
|
||||
tmpHome,
|
||||
files: PROBE_FILES,
|
||||
});
|
||||
const content = snap.get('office-hours/SKILL.md')!;
|
||||
|
||||
// GBRAIN_SAVE_RESULTS un-suppressed → resolver output rendered.
|
||||
expect(content).toContain('## Save Results to Brain');
|
||||
expect(content).toContain('gbrain put "office-hours/');
|
||||
expect(content).toContain('Skip this entire section if `gbrain` is not on PATH');
|
||||
|
||||
// GBRAIN_CONTEXT_LOAD also un-suppressed (D6 bundling).
|
||||
expect(content).toContain('## Brain Context Load');
|
||||
} finally {
|
||||
cleanup();
|
||||
}
|
||||
});
|
||||
|
||||
test('with detected:false (status != "ok"), brain blocks stay suppressed', () => {
|
||||
const { tmpHome, cleanup } = makeFixture(
|
||||
JSON.stringify({ gbrain_local_status: 'no-cli', gbrain_on_path: false, gbrain_version: null }),
|
||||
);
|
||||
try {
|
||||
const snap = regenAndSnapshot({
|
||||
respectDetection: true,
|
||||
tmpHome,
|
||||
files: PROBE_FILES,
|
||||
});
|
||||
const content = snap.get('office-hours/SKILL.md')!;
|
||||
|
||||
// GBRAIN_SAVE_RESULTS suppressed → no rendered block, no gbrain put line.
|
||||
expect(content).not.toContain('gbrain put "office-hours/');
|
||||
// Section header from the resolver also absent (resolver returns "").
|
||||
// BUT — the BRAIN_CACHE_REFRESH and BRAIN_WRITE_BACK resolvers are NOT
|
||||
// gated by detection (host-agnostic), so other "Brain ..." sections may
|
||||
// still appear. We only assert the SAVE_RESULTS-specific marker is gone.
|
||||
} finally {
|
||||
cleanup();
|
||||
}
|
||||
});
|
||||
|
||||
test('with NO detection file, brain blocks stay suppressed (same as detected:false)', () => {
|
||||
const { tmpHome, cleanup } = makeFixture(null);
|
||||
try {
|
||||
const snap = regenAndSnapshot({
|
||||
respectDetection: true,
|
||||
tmpHome,
|
||||
files: PROBE_FILES,
|
||||
});
|
||||
const content = snap.get('office-hours/SKILL.md')!;
|
||||
expect(content).not.toContain('gbrain put "office-hours/');
|
||||
} finally {
|
||||
cleanup();
|
||||
}
|
||||
});
|
||||
|
||||
test('without --respect-detection flag, detection file is IGNORED (CI canonical path)', () => {
|
||||
// Even if a detection file exists with detected:true, the default
|
||||
// `bun run gen:skill-docs` (CI) must produce no-gbrain output so the
|
||||
// committed SKILL.md stays reproducible regardless of any developer's
|
||||
// local gbrain install state.
|
||||
const { tmpHome, cleanup } = makeFixture(
|
||||
JSON.stringify({ gbrain_local_status: 'ok', gbrain_on_path: true, gbrain_version: 'test-0.41.0' }),
|
||||
);
|
||||
try {
|
||||
const snap = regenAndSnapshot({
|
||||
respectDetection: false,
|
||||
tmpHome,
|
||||
files: PROBE_FILES,
|
||||
});
|
||||
const content = snap.get('office-hours/SKILL.md')!;
|
||||
expect(content).not.toContain('gbrain put "office-hours/');
|
||||
expect(content).not.toContain('## Save Results to Brain');
|
||||
} finally {
|
||||
cleanup();
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,150 @@
|
||||
/**
|
||||
* gstack-core@1.0.0 schema pack validation (T1).
|
||||
*
|
||||
* Asserts the schema pack is well-formed and matches the v1.48 plan:
|
||||
* - Exactly 8 page types (7 entities + 1 take)
|
||||
* - Frontmatter shape is internally consistent
|
||||
* - Retention policies match SKILL_RUN_RETENTION_DAYS spec
|
||||
* - Link verbs only reference declared verbs
|
||||
* - JSON payload shape is acceptable to mcp__gbrain__schema_apply_mutations
|
||||
*
|
||||
* Gate-tier, free, pure import + assertion.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import {
|
||||
GSTACK_CORE_SCHEMA_PACK,
|
||||
getSchemaPackMutationPayload,
|
||||
getSchemaPackTypeNames,
|
||||
getRetentionPolicy,
|
||||
} from '../scripts/gstack-schema-pack';
|
||||
import {
|
||||
GSTACK_SCHEMA_PACK_NAME,
|
||||
GSTACK_SCHEMA_PACK_VERSION,
|
||||
} from '../scripts/brain-cache-spec';
|
||||
|
||||
describe('gstack-core schema pack', () => {
|
||||
test('identity matches brain-cache-spec constants', () => {
|
||||
expect(GSTACK_CORE_SCHEMA_PACK.name).toBe(GSTACK_SCHEMA_PACK_NAME);
|
||||
expect(GSTACK_CORE_SCHEMA_PACK.version).toBe(GSTACK_SCHEMA_PACK_VERSION);
|
||||
});
|
||||
|
||||
test('declares exactly 8 page types (7 entities + gstack/take)', () => {
|
||||
expect(GSTACK_CORE_SCHEMA_PACK.page_types.length).toBe(8);
|
||||
});
|
||||
|
||||
test('all 7 brain-cache entities have a matching schema page type', () => {
|
||||
const types = getSchemaPackTypeNames();
|
||||
const required = [
|
||||
'gstack/user-profile',
|
||||
'gstack/product',
|
||||
'gstack/goal',
|
||||
'gstack/developer-persona',
|
||||
'gstack/brand',
|
||||
'gstack/competitive-intel',
|
||||
'gstack/skill-run',
|
||||
];
|
||||
for (const name of required) {
|
||||
expect(types).toContain(name);
|
||||
}
|
||||
});
|
||||
|
||||
test('gstack/take exists with kind=bet supported (Phase 2 / E5)', () => {
|
||||
const take = GSTACK_CORE_SCHEMA_PACK.page_types.find((t) => t.type === 'gstack/take');
|
||||
expect(take).toBeDefined();
|
||||
const kind = take!.fields.find((f) => f.name === 'kind');
|
||||
expect(kind?.values).toContain('bet');
|
||||
expect(kind?.values).toContain('fact');
|
||||
});
|
||||
|
||||
test('every page type has a required type + slug field', () => {
|
||||
for (const def of GSTACK_CORE_SCHEMA_PACK.page_types) {
|
||||
const typeField = def.fields.find((f) => f.name === 'type');
|
||||
const slugField = def.fields.find((f) => f.name === 'slug');
|
||||
expect(typeField?.required).toBe(true);
|
||||
expect(slugField?.required).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
test('enum fields declare their values', () => {
|
||||
for (const def of GSTACK_CORE_SCHEMA_PACK.page_types) {
|
||||
for (const field of def.fields) {
|
||||
if (field.type === 'enum') {
|
||||
expect(field.values).toBeDefined();
|
||||
expect(field.values!.length).toBeGreaterThan(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('skill-run is the only archive-after-90d type', () => {
|
||||
const archived = GSTACK_CORE_SCHEMA_PACK.page_types
|
||||
.filter((t) => t.retention === 'archive-after-90d')
|
||||
.map((t) => t.type);
|
||||
expect(archived).toEqual(['gstack/skill-run']);
|
||||
});
|
||||
|
||||
test('gstack/take is never-archive (calibration scorecard preservation)', () => {
|
||||
expect(getRetentionPolicy('gstack/take')).toBe('never-archive');
|
||||
});
|
||||
|
||||
test('getRetentionPolicy throws on unknown type (defensive)', () => {
|
||||
expect(() => getRetentionPolicy('gstack/nonexistent')).toThrow();
|
||||
});
|
||||
|
||||
test('link verbs declared on emits_links are also in pack.link_verbs', () => {
|
||||
const declared = new Set(GSTACK_CORE_SCHEMA_PACK.link_verbs);
|
||||
for (const def of GSTACK_CORE_SCHEMA_PACK.page_types) {
|
||||
for (const link of def.emits_links ?? []) {
|
||||
expect(declared.has(link.verb)).toBe(true);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('link verbs only target declared gstack/ page types', () => {
|
||||
const declared = new Set(getSchemaPackTypeNames());
|
||||
for (const def of GSTACK_CORE_SCHEMA_PACK.page_types) {
|
||||
for (const link of def.emits_links ?? []) {
|
||||
expect(declared.has(link.target_type)).toBe(true);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('mutation payload is well-formed JSON', () => {
|
||||
const payload = getSchemaPackMutationPayload();
|
||||
expect(payload.schema_version).toBe(1);
|
||||
expect(payload.schema_pack).toBeDefined();
|
||||
expect(typeof payload.schema_pack.name).toBe('string');
|
||||
expect(Array.isArray(payload.schema_pack.page_types)).toBe(true);
|
||||
// round-trip through JSON to catch unserializable values (functions, undefined, etc.)
|
||||
const json = JSON.stringify(payload);
|
||||
const reparsed = JSON.parse(json);
|
||||
expect(reparsed.schema_pack.name).toBe(payload.schema_pack.name);
|
||||
});
|
||||
|
||||
test('gstack/product has expected emits_links graph (product → goal/persona/brand/etc.)', () => {
|
||||
const product = GSTACK_CORE_SCHEMA_PACK.page_types.find((t) => t.type === 'gstack/product')!;
|
||||
const verbs = (product.emits_links ?? []).map((l) => `${l.verb}:${l.target_type}`);
|
||||
expect(verbs).toContain('targets:gstack/goal');
|
||||
expect(verbs).toContain('observed_by:gstack/developer-persona');
|
||||
expect(verbs).toContain('has_brand:gstack/brand');
|
||||
expect(verbs).toContain('competes_with:gstack/competitive-intel');
|
||||
});
|
||||
|
||||
test('gstack/goal has lifecycle status enum (active/resolved/expired/archived)', () => {
|
||||
const goal = GSTACK_CORE_SCHEMA_PACK.page_types.find((t) => t.type === 'gstack/goal')!;
|
||||
const status = goal.fields.find((f) => f.name === 'status');
|
||||
expect(status?.values).toEqual(['active', 'resolved', 'expired', 'archived']);
|
||||
});
|
||||
|
||||
test('gstack/skill-run records the bet count for calibration coverage', () => {
|
||||
const sr = GSTACK_CORE_SCHEMA_PACK.page_types.find((t) => t.type === 'gstack/skill-run')!;
|
||||
const takesField = sr.fields.find((f) => f.name === 'takes_written');
|
||||
expect(takesField).toBeDefined();
|
||||
expect(takesField?.type).toBe('number');
|
||||
});
|
||||
|
||||
test('gstack/user-profile is never-archive (cross-project, long-lived)', () => {
|
||||
expect(getRetentionPolicy('gstack/user-profile')).toBe('never-archive');
|
||||
});
|
||||
});
|
||||
@@ -385,6 +385,35 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
// /spec end-to-end via PTY — exercises the full Phase 1→5 pipeline
|
||||
// including --execute spawn. Periodic-tier — paid + non-deterministic.
|
||||
'spec-execute': ['spec/**', 'test/skill-e2e-spec-execute.test.ts'],
|
||||
|
||||
// /office-hours brain-writeback path under fake gbrain CLI (v1.50.0.0
|
||||
// T7). Drives /office-hours with a regenerated SKILL.md that has the
|
||||
// compressed GBRAIN_SAVE_RESULTS block + a fake gbrain on PATH; asserts
|
||||
// the agent calls `gbrain put office-hours/<slug>` with valid YAML
|
||||
// frontmatter. Touched by anything that changes resolver output, gen
|
||||
// pipeline, detection helper, refresh subcommand, or the on-demand
|
||||
// docs the resolver points to.
|
||||
'office-hours-brain-writeback': [
|
||||
'scripts/resolvers/gbrain.ts',
|
||||
'scripts/gen-skill-docs.ts',
|
||||
'bin/gstack-gbrain-detect',
|
||||
'bin/gstack-config',
|
||||
'office-hours/SKILL.md.tmpl',
|
||||
'docs/gbrain-write-surfaces.md',
|
||||
'test/fixtures/office-hours-brain-writeback/**',
|
||||
'test/skill-e2e-office-hours-brain-writeback.test.ts',
|
||||
],
|
||||
|
||||
// gbrain CLI real round-trip against a local PGLite store (v1.50.0.0
|
||||
// T11). Proves the gbrain CLI persistence contract gstack relies on —
|
||||
// a `gbrain put` followed by `gbrain get` returns the body. Skips if
|
||||
// VOYAGE_API_KEY is unset OR gbrain CLI not on PATH. Touched by the
|
||||
// resolver (which emits the CLI shape) and the test itself.
|
||||
'gbrain-roundtrip-local': [
|
||||
'scripts/resolvers/gbrain.ts',
|
||||
'test/skill-e2e-gbrain-roundtrip-local.test.ts',
|
||||
],
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -432,6 +461,13 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
|
||||
// Office Hours
|
||||
'office-hours-spec-review': 'gate',
|
||||
// Brain-writeback E2E — periodic per cost (claude -p) + non-deterministic
|
||||
// (model interprets the gbrain instruction). Matches nearby
|
||||
// setup-gbrain-path4-* tier classification.
|
||||
'office-hours-brain-writeback': 'periodic',
|
||||
// GBrain CLI round-trip — periodic per Voyage embedding cost (~$0.001/run)
|
||||
// and external-API-dependency (skips cleanly if VOYAGE_API_KEY unset).
|
||||
'gbrain-roundtrip-local': 'periodic',
|
||||
'office-hours-forcing-energy': 'gate', // V1.1 mode-posture regression gate (Sonnet generator)
|
||||
// 'office-hours-builder-wildness' retiered to periodic in v1.32 contributor
|
||||
// wave: this is an LLM-judge creativity score (axis_a ≥4 on a "wildness"
|
||||
|
||||
@@ -35,11 +35,18 @@ function listTrackedSkillMd(): string[] {
|
||||
return out.split("\n").filter((line) => line.trim().length > 0);
|
||||
}
|
||||
|
||||
describe("scripts/resolvers/gbrain.ts — no put_page in emitted instructions (regression for #1346)", () => {
|
||||
it("resolver source ships only `gbrain put` instructions, not the renamed `put_page`", () => {
|
||||
describe("scripts/resolvers/gbrain.ts — no `gbrain put_page` CLI subcommand in emitted instructions (regression for #1346)", () => {
|
||||
it("resolver source ships only `gbrain put` CLI instructions, not the renamed `gbrain put_page`", () => {
|
||||
// We're guarding against the v0.18 CLI subcommand rename
|
||||
// (`gbrain put_page <slug>` → `gbrain put <slug>`). The MCP op
|
||||
// `mcp__gbrain__put_page` is a legitimately separate identifier (the
|
||||
// MCP-layer write op, unrelated to the CLI rename) and may still
|
||||
// appear in resolver output as a fallback reference for the
|
||||
// calibration-take write-back path. So check the CLI subcommand
|
||||
// shape specifically: `gbrain put_page` with a space.
|
||||
const src = readFileSync(RESOLVER_PATH, "utf-8");
|
||||
const stripped = stripComments(src);
|
||||
expect(stripped).not.toContain("put_page");
|
||||
expect(stripped).not.toContain("gbrain put_page");
|
||||
});
|
||||
|
||||
it("every tracked SKILL.md file is free of the renamed gbrain put_page subcommand", () => {
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
/**
|
||||
* Resolver regression pin for generateGBrainSaveResults +
|
||||
* generateGBrainContextLoad (compressed in v1.50.0.0).
|
||||
*
|
||||
* Two coverage stories:
|
||||
* 1. **Wiring symmetry**: all 5 planning skills (office-hours, plan-ceo-review,
|
||||
* plan-eng-review, plan-design-review, plan-devex-review) get the correct
|
||||
* slug prefix + tag in the emitted save instructions.
|
||||
* 2. **Token-budget pin**: post-compression, each block stays under a chars
|
||||
* ceiling so a future "let me just add one more line" refactor doesn't
|
||||
* silently re-inflate the prompt cost back toward the ~1000-token
|
||||
* naive-un-suppression baseline.
|
||||
*
|
||||
* Gate-tier, free, pure import + render — no host generation, no claude -p.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import {
|
||||
generateGBrainContextLoad,
|
||||
generateGBrainSaveResults,
|
||||
} from '../scripts/resolvers/gbrain';
|
||||
import { HOST_PATHS } from '../scripts/resolvers/types';
|
||||
import type { TemplateContext } from '../scripts/resolvers/types';
|
||||
|
||||
function buildCtx(skillName: string): TemplateContext {
|
||||
return {
|
||||
skillName,
|
||||
tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
|
||||
host: 'claude',
|
||||
paths: HOST_PATHS.claude,
|
||||
};
|
||||
}
|
||||
|
||||
// Per-skill expected slug prefix + tag. If you add a new planning skill,
|
||||
// add it here AND in scripts/resolvers/gbrain.ts skillSaveMap. If you rename
|
||||
// one, this test will fail loudly — that's the regression pin working.
|
||||
const PLANNING_SKILLS: Array<{ skill: string; slugPrefix: string; tag: string; title: string }> = [
|
||||
{ skill: 'office-hours', slugPrefix: 'office-hours/', tag: 'design-doc', title: 'Office Hours' },
|
||||
{ skill: 'plan-ceo-review', slugPrefix: 'ceo-plans/', tag: 'ceo-plan', title: 'CEO Plan' },
|
||||
{ skill: 'plan-eng-review', slugPrefix: 'eng-reviews/', tag: 'eng-review', title: 'Eng Review' },
|
||||
{ skill: 'plan-design-review', slugPrefix: 'design-reviews/', tag: 'design-review', title: 'Design Review' },
|
||||
{ skill: 'plan-devex-review', slugPrefix: 'devex-reviews/', tag: 'devex-review', title: 'Devex Review' },
|
||||
];
|
||||
|
||||
describe('generateGBrainSaveResults — wiring + compression pin', () => {
|
||||
test.each(PLANNING_SKILLS)(
|
||||
'$skill emits gbrain put $slugPrefix... with $tag tag',
|
||||
({ skill, slugPrefix, tag, title }) => {
|
||||
const out = generateGBrainSaveResults(buildCtx(skill));
|
||||
|
||||
// Uses gbrain put (v0.18+ subcommand), not deprecated put_page MCP op.
|
||||
expect(out).toContain('gbrain put');
|
||||
expect(out).not.toContain('put_page');
|
||||
|
||||
// Per-skill slug prefix is exactly what skillSaveMap declares.
|
||||
expect(out).toContain(`"${slugPrefix}<feature-slug>"`);
|
||||
|
||||
// Title prefix + tag match the metadata.
|
||||
expect(out).toContain(`title: "${title}:`);
|
||||
expect(out).toContain(`tags: [${tag},`);
|
||||
|
||||
// Skip-header is present so agent can short-circuit when gbrain is absent.
|
||||
expect(out).toContain('Skip this entire section if `gbrain` is not on PATH');
|
||||
|
||||
// Compact: points to docs/gbrain-write-surfaces.md for full template.
|
||||
expect(out).toContain('docs/gbrain-write-surfaces.md');
|
||||
},
|
||||
);
|
||||
|
||||
test('all 5 planning skills produce output under ~600 chars (~150 tokens)', () => {
|
||||
// Token-budget pin. Naive un-suppression would emit ~1000 tokens (~4000 chars)
|
||||
// per skill. Compressed target: ~150 tokens (~600 chars). Generous ceiling
|
||||
// at 750 chars to leave room for the heredoc structure without inviting a
|
||||
// gradual re-inflation of the prose.
|
||||
const CEILING_CHARS = 750;
|
||||
for (const { skill } of PLANNING_SKILLS) {
|
||||
const out = generateGBrainSaveResults(buildCtx(skill));
|
||||
if (out.length > CEILING_CHARS) {
|
||||
throw new Error(
|
||||
`generateGBrainSaveResults('${skill}') emitted ${out.length} chars (~${Math.round(out.length / 4)} tokens), ` +
|
||||
`exceeds ceiling of ${CEILING_CHARS} chars (~${Math.round(CEILING_CHARS / 4)} tokens). ` +
|
||||
`If you added necessary content, move the verbose prose into ` +
|
||||
`docs/gbrain-write-surfaces.md §Save Template (which the agent reads on demand) and ` +
|
||||
`keep the inline block as a short pointer + per-skill metadata. ` +
|
||||
`See gbrain.ts T4/v1.50.0.0 compression rationale.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('unmapped skill name falls through to compact generic template', () => {
|
||||
const out = generateGBrainSaveResults(buildCtx('no-such-skill'));
|
||||
|
||||
// Generic fallback still emits gbrain put + skip-header + docs pointer.
|
||||
expect(out).toContain('gbrain put');
|
||||
expect(out).toContain('Skip this entire section if `gbrain` is not on PATH');
|
||||
expect(out).toContain('docs/gbrain-write-surfaces.md');
|
||||
|
||||
// Should NOT contain a per-skill slug prefix from the map (would mean we
|
||||
// accidentally regressed to the per-skill path for an unmapped skill).
|
||||
for (const { slugPrefix } of PLANNING_SKILLS) {
|
||||
expect(out).not.toContain(`"${slugPrefix}<feature-slug>"`);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('generateGBrainContextLoad — compression pin', () => {
|
||||
test('emits skip-header and docs pointer, stays under ~500 chars', () => {
|
||||
// Same compression discipline as SAVE_RESULTS. Context load was ~350-450
|
||||
// tokens before compression; target ~80 tokens (~320 chars). Ceiling
|
||||
// generous at 500 chars to leave room for skill-specific suffixes.
|
||||
const out = generateGBrainContextLoad(buildCtx('plan-ceo-review'));
|
||||
expect(out).toContain('Skip this entire section if `gbrain` is not on PATH');
|
||||
expect(out).toContain('docs/gbrain-write-surfaces.md');
|
||||
expect(out).toContain('gbrain search');
|
||||
expect(out).toContain('gbrain get_page');
|
||||
if (out.length > 500) {
|
||||
throw new Error(
|
||||
`generateGBrainContextLoad emitted ${out.length} chars (~${Math.round(out.length / 4)} tokens), ` +
|
||||
`exceeds ceiling of 500 chars (~125 tokens). ` +
|
||||
`Move verbose prose to docs/gbrain-write-surfaces.md §Context Load.`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test('/investigate gets the data-research routing suffix', () => {
|
||||
const out = generateGBrainContextLoad(buildCtx('investigate'));
|
||||
expect(out).toContain('data-research');
|
||||
});
|
||||
|
||||
test('non-investigate skills do NOT get the data-research suffix', () => {
|
||||
for (const { skill } of PLANNING_SKILLS) {
|
||||
const out = generateGBrainContextLoad(buildCtx(skill));
|
||||
expect(out).not.toContain('data-research');
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,95 @@
|
||||
/**
|
||||
* D9 salience privacy gate (T17).
|
||||
*
|
||||
* Verifies that fetchSalience strips entries whose slugs don't match the
|
||||
* allowlist prefixes BEFORE writing the digest to disk. Sensitive content
|
||||
* (family, therapy, reflection) is never persisted into the cache.
|
||||
*
|
||||
* Gate-tier, free.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import { SALIENCE_DEFAULT_ALLOWLIST } from '../scripts/brain-cache-spec';
|
||||
|
||||
const ORIGINAL_ENV = process.env.GSTACK_SALIENCE_ALLOWLIST;
|
||||
|
||||
beforeEach(() => {
|
||||
delete require.cache[require.resolve('../bin/gstack-brain-cache')];
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (ORIGINAL_ENV) process.env.GSTACK_SALIENCE_ALLOWLIST = ORIGINAL_ENV;
|
||||
else delete process.env.GSTACK_SALIENCE_ALLOWLIST;
|
||||
});
|
||||
|
||||
async function importCache(): Promise<typeof import('../bin/gstack-brain-cache')> {
|
||||
return (await import('../bin/gstack-brain-cache')) as typeof import('../bin/gstack-brain-cache');
|
||||
}
|
||||
|
||||
describe('salience allowlist gate', () => {
|
||||
test('default allowlist permits projects/ + gstack/ + concepts/', async () => {
|
||||
const mod = await importCache();
|
||||
expect(mod.isSalienceSlugAllowed('projects/myrepo', SALIENCE_DEFAULT_ALLOWLIST)).toBe(true);
|
||||
expect(mod.isSalienceSlugAllowed('gstack/product/helsinki', SALIENCE_DEFAULT_ALLOWLIST)).toBe(true);
|
||||
expect(mod.isSalienceSlugAllowed('concepts/some-idea', SALIENCE_DEFAULT_ALLOWLIST)).toBe(true);
|
||||
});
|
||||
|
||||
test('default allowlist BLOCKS personal/ + family/ + therapy/ + reflections', async () => {
|
||||
const mod = await importCache();
|
||||
expect(mod.isSalienceSlugAllowed('personal/reflection-2026-05', SALIENCE_DEFAULT_ALLOWLIST)).toBe(false);
|
||||
expect(mod.isSalienceSlugAllowed('family/in-laws/ngo-kim-shing', SALIENCE_DEFAULT_ALLOWLIST)).toBe(false);
|
||||
expect(mod.isSalienceSlugAllowed('therapy-session/2026-05-15', SALIENCE_DEFAULT_ALLOWLIST)).toBe(false);
|
||||
expect(mod.isSalienceSlugAllowed('reflection/notes', SALIENCE_DEFAULT_ALLOWLIST)).toBe(false);
|
||||
});
|
||||
|
||||
test('isSalienceSlugAllowed handles empty allowlist (blocks everything)', async () => {
|
||||
const mod = await importCache();
|
||||
expect(mod.isSalienceSlugAllowed('anything/at-all', [])).toBe(false);
|
||||
});
|
||||
|
||||
test('isSalienceSlugAllowed handles arbitrary prefixes', async () => {
|
||||
const mod = await importCache();
|
||||
expect(mod.isSalienceSlugAllowed('custom/scope', ['custom/'])).toBe(true);
|
||||
expect(mod.isSalienceSlugAllowed('other/scope', ['custom/'])).toBe(false);
|
||||
});
|
||||
|
||||
test('getSalienceAllowlist returns default when env unset and config silent', async () => {
|
||||
delete process.env.GSTACK_SALIENCE_ALLOWLIST;
|
||||
const mod = await importCache();
|
||||
const list = mod.getSalienceAllowlist();
|
||||
expect(Array.isArray(list)).toBe(true);
|
||||
expect(list.length).toBeGreaterThan(0);
|
||||
// Should at minimum contain the curated defaults
|
||||
expect(list).toContain('projects/');
|
||||
expect(list).toContain('gstack/');
|
||||
});
|
||||
|
||||
test('GSTACK_SALIENCE_ALLOWLIST env override is honored', async () => {
|
||||
process.env.GSTACK_SALIENCE_ALLOWLIST = 'custom-a/,custom-b/,custom-c/';
|
||||
const mod = await importCache();
|
||||
const list = mod.getSalienceAllowlist();
|
||||
expect(list).toEqual(['custom-a/', 'custom-b/', 'custom-c/']);
|
||||
});
|
||||
|
||||
test('GSTACK_SALIENCE_ALLOWLIST with whitespace is trimmed', async () => {
|
||||
process.env.GSTACK_SALIENCE_ALLOWLIST = ' projects/ , gstack/ , concepts/ ';
|
||||
const mod = await importCache();
|
||||
const list = mod.getSalienceAllowlist();
|
||||
expect(list).toEqual(['projects/', 'gstack/', 'concepts/']);
|
||||
});
|
||||
|
||||
test('empty env value falls through to default (not empty list)', async () => {
|
||||
process.env.GSTACK_SALIENCE_ALLOWLIST = '';
|
||||
const mod = await importCache();
|
||||
const list = mod.getSalienceAllowlist();
|
||||
expect(list.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test('default allowlist contains nothing sensitive', async () => {
|
||||
const sensitivePrefixes = ['personal', 'family', 'therapy', 'reflection', 'private', 'medical', 'health'];
|
||||
for (const prefix of sensitivePrefixes) {
|
||||
const matched = SALIENCE_DEFAULT_ALLOWLIST.some((p) => p.startsWith(prefix));
|
||||
expect(matched).toBe(false);
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,108 @@
|
||||
/**
|
||||
* Schema-version cache migration (D4 A4 / T19).
|
||||
*
|
||||
* When gstack-core@1.x.y bumps and the cached _meta.json records an older
|
||||
* schema_version, the cache layer triggers a FULL rebuild for the affected
|
||||
* scope (not just delete-the-stale-file). Verifies the rebuild path is
|
||||
* invoked AND the cache files for that scope are wiped before refresh.
|
||||
*
|
||||
* Gate-tier, free, ~50ms.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
|
||||
// Per-test timeout: schema-mismatch path triggers a full-scope rebuild, which
|
||||
// fans out to refreshEntity for each of 7 per-project entities. Each refresh
|
||||
// shells out to gbrain with a 10s internal timeout. Total worst case ~70s.
|
||||
// We allow 60s here to give the test room without flaking on a slow brain.
|
||||
const SLOW_TIMEOUT = 60_000;
|
||||
import { mkdtempSync, existsSync, writeFileSync, readFileSync, rmSync, mkdirSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
import { GSTACK_SCHEMA_PACK_VERSION } from '../scripts/brain-cache-spec';
|
||||
|
||||
let TMP_HOME: string;
|
||||
const ORIGINAL_HOME = process.env.GSTACK_HOME;
|
||||
|
||||
beforeEach(() => {
|
||||
TMP_HOME = mkdtempSync(join(tmpdir(), 'gstack-schema-test-'));
|
||||
process.env.GSTACK_HOME = TMP_HOME;
|
||||
delete require.cache[require.resolve('../bin/gstack-brain-cache')];
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (ORIGINAL_HOME) process.env.GSTACK_HOME = ORIGINAL_HOME;
|
||||
else delete process.env.GSTACK_HOME;
|
||||
try { rmSync(TMP_HOME, { recursive: true, force: true }); } catch { /* best effort */ }
|
||||
});
|
||||
|
||||
async function importCache(): Promise<typeof import('../bin/gstack-brain-cache')> {
|
||||
return (await import('../bin/gstack-brain-cache')) as typeof import('../bin/gstack-brain-cache');
|
||||
}
|
||||
|
||||
describe('schema-version cache migration (D4 A4)', () => {
|
||||
test('cache file with mismatched schema_version triggers wipe-and-rebuild attempt', { timeout: SLOW_TIMEOUT }, async () => {
|
||||
const mod = await importCache();
|
||||
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
|
||||
mkdirSync(cacheDir, { recursive: true });
|
||||
const stalePath = join(cacheDir, 'product.md');
|
||||
writeFileSync(stalePath, '# stale-from-old-schema\n');
|
||||
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
|
||||
schema_version: '0.5.0', // old version
|
||||
endpoint_hash: 'local',
|
||||
last_refresh: { product: Date.now() }, // fresh by TTL
|
||||
last_attempt: {},
|
||||
}));
|
||||
|
||||
// cmdGet should detect schema mismatch and try to rebuild. Since brain is
|
||||
// unreachable in the test env, the rebuild fails and the stale file is
|
||||
// gone (wiped during the rebuild attempt).
|
||||
mod.cmdGet('product', 'helsinki'); // triggers wipe-and-rebuild attempt
|
||||
|
||||
// After rebuild attempt with unreachable brain, the stale file is wiped
|
||||
// and _meta.json shows the current schema_version.
|
||||
expect(existsSync(stalePath)).toBe(false);
|
||||
const newMeta = JSON.parse(readFileSync(join(cacheDir, '_meta.json'), 'utf-8'));
|
||||
expect(newMeta.schema_version).toBe(GSTACK_SCHEMA_PACK_VERSION);
|
||||
});
|
||||
|
||||
test('matching schema_version + fresh TTL is warm hit (no rebuild)', { timeout: SLOW_TIMEOUT }, async () => {
|
||||
const mod = await importCache();
|
||||
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
|
||||
mkdirSync(cacheDir, { recursive: true });
|
||||
const productPath = join(cacheDir, 'product.md');
|
||||
writeFileSync(productPath, '# fresh content\n');
|
||||
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
|
||||
schema_version: GSTACK_SCHEMA_PACK_VERSION,
|
||||
endpoint_hash: mod.detectEndpointHash(),
|
||||
last_refresh: { product: Date.now() },
|
||||
last_attempt: {},
|
||||
}));
|
||||
|
||||
const result = mod.cmdGet('product', 'helsinki');
|
||||
expect(result.state).toBe('warm');
|
||||
expect(readFileSync(result.path, 'utf-8')).toBe('# fresh content\n');
|
||||
});
|
||||
|
||||
test('rebuild wipes ALL files in scope, not just the one being read', { timeout: SLOW_TIMEOUT }, async () => {
|
||||
const mod = await importCache();
|
||||
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
|
||||
mkdirSync(cacheDir, { recursive: true });
|
||||
writeFileSync(join(cacheDir, 'product.md'), '# stale product\n');
|
||||
writeFileSync(join(cacheDir, 'brand.md'), '# stale brand\n');
|
||||
writeFileSync(join(cacheDir, 'developer-persona.md'), '# stale persona\n');
|
||||
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
|
||||
schema_version: '0.5.0',
|
||||
endpoint_hash: 'local',
|
||||
last_refresh: { product: Date.now(), brand: Date.now(), 'developer-persona': Date.now() },
|
||||
last_attempt: {},
|
||||
}));
|
||||
|
||||
mod.cmdGet('product', 'helsinki'); // triggers wipe-and-rebuild attempt
|
||||
|
||||
// All per-project files wiped (rebuild attempt cleared the scope)
|
||||
expect(existsSync(join(cacheDir, 'product.md'))).toBe(false);
|
||||
expect(existsSync(join(cacheDir, 'brand.md'))).toBe(false);
|
||||
expect(existsSync(join(cacheDir, 'developer-persona.md'))).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,162 @@
|
||||
/**
|
||||
* E2E: real gbrain CLI round-trip against a local PGLite engine.
|
||||
*
|
||||
* Replaces the manual local probe documented in earlier drafts of
|
||||
* docs/gbrain-write-surfaces.md. The matched-pair check the user asked
|
||||
* for v1.50.0.0: "is the data we hope to save actually being saved?"
|
||||
*
|
||||
* What this proves:
|
||||
* - The gbrain CLI subcommand shape gstack ships (`gbrain put <slug>
|
||||
* --content "<markdown with frontmatter>"`) actually persists to a
|
||||
* real PGLite store.
|
||||
* - The page is retrievable via `gbrain get <slug>` with body + title
|
||||
* intact (frontmatter is allowed to be reformatted by gbrain — we
|
||||
* check semantic fields, not byte-exact YAML).
|
||||
* - The `office-hours/<slug>` slug namespace works (no rejection,
|
||||
* no auto-rewrite).
|
||||
*
|
||||
* What this does NOT prove (out of scope, owned elsewhere):
|
||||
* - Agent obedience to the resolver instructions — that's the
|
||||
* fake-CLI E2E (test/skill-e2e-office-hours-brain-writeback.test.ts).
|
||||
* - Remote-MCP persistence — that's the write-shape E2E
|
||||
* (test/skill-e2e-gbrain-roundtrip-remote.test.ts).
|
||||
* - gbrain's own internal correctness — gbrain has its own test suite;
|
||||
* this is a contract smoke test, not gbrain validation.
|
||||
*
|
||||
* Periodic tier. Real gbrain init + put triggers one Voyage embedding
|
||||
* call (~$0.001/run). Skips when VOYAGE_API_KEY is unset OR gbrain is
|
||||
* not on PATH, so CI without secrets degrades gracefully.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { execFileSync } from 'child_process';
|
||||
import { mkdtempSync, rmSync } from 'fs';
|
||||
import { tmpdir } from 'os';
|
||||
import { join } from 'path';
|
||||
|
||||
import {
|
||||
describeIfSelected,
|
||||
testConcurrentIfSelected,
|
||||
runId,
|
||||
createEvalCollector,
|
||||
} from './helpers/e2e-helpers';
|
||||
|
||||
const evalCollector = createEvalCollector('e2e-gbrain-roundtrip-local');
|
||||
|
||||
function gbrainOnPath(): boolean {
|
||||
try {
|
||||
execFileSync('gbrain', ['--version'], { stdio: 'pipe', timeout: 5_000 });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const SHOULD_RUN_GUARDS_OK =
|
||||
gbrainOnPath() && !!process.env.VOYAGE_API_KEY;
|
||||
|
||||
describeIfSelected(
|
||||
'GBrain local PGLite round-trip E2E',
|
||||
['gbrain-roundtrip-local'],
|
||||
() => {
|
||||
let tmpHome: string;
|
||||
const slug = `office-hours/roundtrip-test-${Date.now()}`;
|
||||
const body = `# Roundtrip test
|
||||
|
||||
This is a deterministic round-trip test page used by the gstack v1.50.0.0
|
||||
brain-writeback verification. Generated at ${new Date().toISOString()}.
|
||||
|
||||
If gbrain persisted this correctly, you should see this exact body when
|
||||
you run \`gbrain get "${slug}"\`.`;
|
||||
|
||||
beforeAll(() => {
|
||||
if (!SHOULD_RUN_GUARDS_OK) {
|
||||
// Will skip via testConcurrentIfSelected gate; nothing to set up.
|
||||
tmpHome = '';
|
||||
return;
|
||||
}
|
||||
tmpHome = mkdtempSync(join(tmpdir(), 'gbrain-roundtrip-'));
|
||||
|
||||
// Initialize a real PGLite gbrain in the isolated temp HOME. Explicit
|
||||
// --embedding-model required because the local env has multiple
|
||||
// providers ready (voyage + zeroentropyai); gbrain refuses to guess.
|
||||
execFileSync(
|
||||
'gbrain',
|
||||
['init', '--pglite', '--embedding-model', 'voyage:voyage-code-3'],
|
||||
{
|
||||
env: { ...process.env, HOME: tmpHome },
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
timeout: 60_000,
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
if (tmpHome) {
|
||||
try {
|
||||
rmSync(tmpHome, { recursive: true, force: true });
|
||||
} catch {
|
||||
// best effort
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
testConcurrentIfSelected(
|
||||
'gbrain-roundtrip-local',
|
||||
async () => {
|
||||
if (!SHOULD_RUN_GUARDS_OK) {
|
||||
console.log(
|
||||
'[skip] gbrain CLI not on PATH or VOYAGE_API_KEY unset; ' +
|
||||
'this E2E proves the gbrain CLI persistence contract gstack relies on. ' +
|
||||
'Run locally with `VOYAGE_API_KEY=... bun test ...` to verify before shipping.',
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const content = `---
|
||||
title: "Office Hours: Roundtrip Test"
|
||||
tags: [design-doc, roundtrip-test]
|
||||
---
|
||||
${body}`;
|
||||
|
||||
// PUT the page.
|
||||
execFileSync('gbrain', ['put', slug, '--content', content], {
|
||||
env: { ...process.env, HOME: tmpHome },
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
timeout: 30_000,
|
||||
});
|
||||
|
||||
// GET it back.
|
||||
const retrieved = execFileSync('gbrain', ['get', slug], {
|
||||
env: { ...process.env, HOME: tmpHome },
|
||||
encoding: 'utf-8',
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
timeout: 10_000,
|
||||
});
|
||||
|
||||
// The body MUST survive verbatim — every line of what we wrote
|
||||
// must appear in what we got back. (Frontmatter reformatting is
|
||||
// gbrain's prerogative; body text is data we own.)
|
||||
for (const line of body.split('\n')) {
|
||||
if (line.trim()) {
|
||||
expect(retrieved).toContain(line);
|
||||
}
|
||||
}
|
||||
|
||||
// Title is in the frontmatter — assert it's present (gbrain
|
||||
// strips the constant prefix "title: " quote handling can vary).
|
||||
expect(retrieved).toContain('Roundtrip Test');
|
||||
|
||||
// Tag survived.
|
||||
expect(retrieved).toContain('design-doc');
|
||||
expect(retrieved).toContain('roundtrip-test');
|
||||
|
||||
// Sanity: the doc isn't empty or a 404 error.
|
||||
expect(retrieved.length).toBeGreaterThan(body.length);
|
||||
expect(retrieved).not.toContain('page_not_found');
|
||||
expect(retrieved).not.toContain('Page not found');
|
||||
},
|
||||
120_000,
|
||||
);
|
||||
},
|
||||
);
|
||||
@@ -0,0 +1,306 @@
|
||||
/**
|
||||
* E2E: /office-hours brain-writeback path under fake gbrain CLI.
|
||||
*
|
||||
* The matched-pair check for v1.50.0.0's "brain-aware planning actually
|
||||
* works under Claude Code" headline: prove that when a user runs
|
||||
* /office-hours with gbrain on PATH, the agent actually calls
|
||||
* `gbrain put office-hours/<slug>` with valid frontmatter.
|
||||
*
|
||||
* Approach:
|
||||
* 1. Regenerate office-hours/SKILL.md with --respect-detection against
|
||||
* a temp GSTACK_HOME that has detected:true. Snapshot the rendered
|
||||
* content (which now contains the compressed SAVE_RESULTS block),
|
||||
* then restore the canonical no-gbrain version so the working tree
|
||||
* stays clean.
|
||||
* 2. Write the snapshot into a temp workdir's office-hours/SKILL.md.
|
||||
* Also write docs/gbrain-write-surfaces.md so the agent can read the
|
||||
* template on demand (the compact block points to it).
|
||||
* 3. Write a fake `gbrain` shell script into workdir/bin/ with robust
|
||||
* argv quoting (printf %q) so heredoc payloads in --content survive
|
||||
* shell-to-shell. The fake logs every invocation + writes payloads
|
||||
* to a per-slug file for inspection.
|
||||
* 4. Run /office-hours via runSkillTest with workdir/bin/ first on PATH.
|
||||
* Feed a deterministic founder pitch + auto-decide instructions.
|
||||
* 5. Assert the argv log contains `gbrain put office-hours/<slug>`, the
|
||||
* payload file exists with valid YAML frontmatter, and entity stubs
|
||||
* were created.
|
||||
*
|
||||
* Periodic tier (~$0.50-1/run via claude -p, matches nearby
|
||||
* setup-gbrain-path4-* tests at touchfiles.ts:496-498).
|
||||
*
|
||||
* NOT verified by this test (out of scope, owned by docs/gbrain-write-surfaces.md):
|
||||
* - That gbrain itself persists what `gbrain put` is told (gbrain's
|
||||
* own contract)
|
||||
* - That `.gbrain-source` doesn't re-route writes (gbrain's contract)
|
||||
* - Source-targeting (no way to fake source resolution in a stub CLI)
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { execFileSync, spawnSync } from 'child_process';
|
||||
import {
|
||||
chmodSync,
|
||||
copyFileSync,
|
||||
existsSync,
|
||||
mkdirSync,
|
||||
mkdtempSync,
|
||||
readFileSync,
|
||||
readdirSync,
|
||||
rmSync,
|
||||
writeFileSync,
|
||||
} from 'fs';
|
||||
import { tmpdir } from 'os';
|
||||
import { join } from 'path';
|
||||
|
||||
import { runSkillTest } from './helpers/session-runner';
|
||||
import {
|
||||
ROOT,
|
||||
runId,
|
||||
describeIfSelected,
|
||||
testConcurrentIfSelected,
|
||||
logCost,
|
||||
recordE2E,
|
||||
createEvalCollector,
|
||||
} from './helpers/e2e-helpers';
|
||||
|
||||
const evalCollector = createEvalCollector('e2e-office-hours-brain-writeback');
|
||||
|
||||
describeIfSelected(
|
||||
'Office Hours Brain Writeback E2E',
|
||||
['office-hours-brain-writeback'],
|
||||
() => {
|
||||
let workDir: string;
|
||||
let callsLogPath: string;
|
||||
let payloadDir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
workDir = mkdtempSync(join(tmpdir(), 'skill-e2e-brain-writeback-'));
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 });
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
|
||||
// Copy the founder pitch fixture into the workdir.
|
||||
const briefSrc = join(
|
||||
ROOT,
|
||||
'test',
|
||||
'fixtures',
|
||||
'office-hours-brain-writeback',
|
||||
'brief.md',
|
||||
);
|
||||
copyFileSync(briefSrc, join(workDir, 'pitch.md'));
|
||||
|
||||
// Generate a brain-aware office-hours/SKILL.md (with --respect-detection
|
||||
// against a temp GSTACK_HOME). Snapshot the content, restore the
|
||||
// canonical version, write the snapshot into the workdir.
|
||||
const tmpHome = mkdtempSync(join(tmpdir(), 'gbrain-detect-home-'));
|
||||
writeFileSync(
|
||||
join(tmpHome, 'gbrain-detection.json'),
|
||||
JSON.stringify({
|
||||
gbrain_local_status: 'ok',
|
||||
gbrain_on_path: true,
|
||||
gbrain_version: 'test-0.41.0',
|
||||
}),
|
||||
);
|
||||
const skillPath = join(ROOT, 'office-hours', 'SKILL.md');
|
||||
const originalSkill = readFileSync(skillPath, 'utf-8');
|
||||
try {
|
||||
execFileSync(
|
||||
'bun',
|
||||
[
|
||||
'run',
|
||||
'scripts/gen-skill-docs.ts',
|
||||
'--host',
|
||||
'claude',
|
||||
'--respect-detection',
|
||||
],
|
||||
{
|
||||
cwd: ROOT,
|
||||
env: { ...process.env, GSTACK_HOME: tmpHome },
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
timeout: 60_000,
|
||||
},
|
||||
);
|
||||
const brainAwareSkill = readFileSync(skillPath, 'utf-8');
|
||||
if (!brainAwareSkill.includes('gbrain put "office-hours/')) {
|
||||
throw new Error(
|
||||
'Regenerated office-hours/SKILL.md does not contain gbrain put block. ' +
|
||||
'Detection override may be broken — see test/gbrain-detection-override.test.ts.',
|
||||
);
|
||||
}
|
||||
mkdirSync(join(workDir, 'office-hours'), { recursive: true });
|
||||
writeFileSync(join(workDir, 'office-hours', 'SKILL.md'), brainAwareSkill);
|
||||
} finally {
|
||||
// Always restore the canonical SKILL.md so the working tree stays clean.
|
||||
writeFileSync(skillPath, originalSkill);
|
||||
rmSync(tmpHome, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
// Copy docs/gbrain-write-surfaces.md so the compact resolver block's
|
||||
// on-demand reference resolves (the agent may read it for the full
|
||||
// template; we don't require this read but make it available).
|
||||
const docsSrc = join(ROOT, 'docs', 'gbrain-write-surfaces.md');
|
||||
const docsDst = join(workDir, 'docs', 'gbrain-write-surfaces.md');
|
||||
mkdirSync(join(workDir, 'docs'), { recursive: true });
|
||||
copyFileSync(docsSrc, docsDst);
|
||||
|
||||
// Set up the fake gbrain CLI with robust argv quoting + payload capture.
|
||||
callsLogPath = join(workDir, 'gbrain-calls.log');
|
||||
payloadDir = join(workDir, 'gbrain-payloads');
|
||||
mkdirSync(payloadDir, { recursive: true });
|
||||
const binDir = join(workDir, 'bin');
|
||||
mkdirSync(binDir, { recursive: true });
|
||||
const fakeGbrain = `#!/bin/bash
|
||||
# Fake gbrain CLI for E2E test. Logs every invocation with shell-safe quoting
|
||||
# (printf %q) so --content "$(cat <<'EOF' ... EOF)" payloads survive intact.
|
||||
{ printf 'gbrain'; for a in "$@"; do printf ' %q' "$a"; done; printf '\\n'; } \\
|
||||
>> "${callsLogPath}"
|
||||
case "$1" in
|
||||
--version) echo "gbrain test-0.41.0"; exit 0 ;;
|
||||
search) echo "[]"; exit 0 ;;
|
||||
get_page) echo ""; exit 0 ;;
|
||||
put)
|
||||
SLUG="$2"
|
||||
shift 2
|
||||
while [ -n "$1" ]; do
|
||||
if [ "$1" = "--content" ]; then
|
||||
PAYLOAD_DIR="${payloadDir}"
|
||||
mkdir -p "$PAYLOAD_DIR/$(dirname "$SLUG")"
|
||||
printf '%s' "$2" > "$PAYLOAD_DIR/$SLUG.md"
|
||||
break
|
||||
fi
|
||||
shift
|
||||
done
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
exit 0
|
||||
`;
|
||||
const fakePath = join(binDir, 'gbrain');
|
||||
writeFileSync(fakePath, fakeGbrain);
|
||||
chmodSync(fakePath, 0o755);
|
||||
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'fixture']);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try {
|
||||
rmSync(workDir, { recursive: true, force: true });
|
||||
} catch {
|
||||
// best effort
|
||||
}
|
||||
});
|
||||
|
||||
testConcurrentIfSelected(
|
||||
'office-hours-brain-writeback',
|
||||
async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read office-hours/SKILL.md for the workflow.
|
||||
|
||||
Read pitch.md — that's a founder pitch coming to office hours. Select Startup Mode. Skip any AskUserQuestion — this is non-interactive; auto-decide the recommended option for any question.
|
||||
|
||||
For the diagnostic, assume the founder confirmed Q1 (strongest evidence = "230 from a single tweet + 51 paying creators in 6 weeks"), Q2 (status quo = "creators write ad-hoc checks or use opaque Patreon-style platforms"), and Q3 (forcing question already asked).
|
||||
|
||||
Generate the design doc per Phase 5. The feature-slug value to substitute into the SAVE_RESULTS template's \`<feature-slug>\` placeholder is exactly 'pixel-fund' (no path prefix — the template already provides the prefix). The \`gbrain\` binary is on PATH at ${workDir}/bin/gbrain. Apply the SAVE_RESULTS template literally: the slug should land at \`<prefix>/pixel-fund\` per the resolver shape, with the actual design doc markdown body in the --content payload. Then enrich entity stubs for any named people or companies mentioned in the pitch.
|
||||
|
||||
This is a test of the brain-writeback path. Do NOT skip the gbrain save step under any circumstance — the runtime guard ("skip if gbrain not on PATH") does NOT apply here because gbrain IS available. Do NOT explore gbrain --help; follow the SAVE_RESULTS template's exact CLI shape. If you encounter any AskUserQuestion, auto-decide recommended.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 12,
|
||||
timeout: 360_000,
|
||||
testName: 'office-hours-brain-writeback',
|
||||
runId,
|
||||
model: 'claude-sonnet-4-6',
|
||||
extraEnv: {
|
||||
PATH: `${join(workDir, 'bin')}:${process.env.PATH || ''}`,
|
||||
},
|
||||
});
|
||||
|
||||
logCost('/office-hours (BRAIN WRITEBACK)', result);
|
||||
recordE2E(
|
||||
evalCollector,
|
||||
'/office-hours-brain-writeback',
|
||||
'Office Hours Brain Writeback E2E',
|
||||
result,
|
||||
{
|
||||
passed: ['success', 'error_max_turns'].includes(result.exitReason),
|
||||
},
|
||||
);
|
||||
expect(['success', 'error_max_turns']).toContain(result.exitReason);
|
||||
|
||||
// The headline assertion: agent actually called gbrain put on the
|
||||
// expected slug.
|
||||
if (!existsSync(callsLogPath)) {
|
||||
throw new Error(
|
||||
`No gbrain calls log at ${callsLogPath}. ` +
|
||||
`Agent likely did NOT invoke gbrain at all. ` +
|
||||
`Check that office-hours/SKILL.md in the workdir contains the gbrain put block.`,
|
||||
);
|
||||
}
|
||||
const callsLog = readFileSync(callsLogPath, 'utf-8');
|
||||
console.log('--- gbrain calls log ---');
|
||||
console.log(callsLog);
|
||||
console.log('--- end calls log ---');
|
||||
|
||||
expect(callsLog).toContain('gbrain put');
|
||||
// Agent obedience: the slug should contain 'pixel-fund' somewhere
|
||||
// (preferably under the office-hours/ prefix). The strict slug
|
||||
// SHAPE (office-hours/<slug>) is already pinned by the resolver
|
||||
// unit test (test/resolvers-gbrain-save-results.test.ts); this
|
||||
// E2E proves the agent actually invokes gbrain put with the
|
||||
// payload, not the resolver's literal output shape.
|
||||
expect(callsLog).toMatch(/gbrain put .*pixel-fund/);
|
||||
|
||||
// Payload file exists. Agent may write to office-hours/pixel-fund.md
|
||||
// (resolver-faithful) OR pixel-fund.md (agent dropped prefix); both
|
||||
// are acceptable here because the YAML frontmatter is the real
|
||||
// contract test. Search the payload tree for any *.md file that
|
||||
// contains 'pixel-fund' in the path.
|
||||
const findPayload = (dir: string): string | null => {
|
||||
if (!existsSync(dir)) return null;
|
||||
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
||||
const full = join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
const nested = findPayload(full);
|
||||
if (nested) return nested;
|
||||
} else if (entry.name.includes('pixel-fund')) {
|
||||
return full;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
const payloadPath = findPayload(payloadDir);
|
||||
if (!payloadPath) {
|
||||
throw new Error(
|
||||
`Agent called gbrain put but no payload file with 'pixel-fund' ` +
|
||||
`in name was written to ${payloadDir}. Check the fake gbrain ` +
|
||||
`--content parser for argv quoting issues.`,
|
||||
);
|
||||
}
|
||||
const payload = readFileSync(payloadPath, 'utf-8');
|
||||
expect(payload).toMatch(/^---\s*\n/);
|
||||
expect(payload).toContain('title:');
|
||||
expect(payload).toContain('tags:');
|
||||
expect(payload.length).toBeGreaterThan(200);
|
||||
|
||||
// Entity stubs: agents are inconsistent about whether they use
|
||||
// 'entities/<name>' (resolver doc) or 'entity/<name>' (singular).
|
||||
// We accept either — the test asserts that AT LEAST ONE entity
|
||||
// stub call exists, not the exact slug shape.
|
||||
const entityCallMatches =
|
||||
callsLog.match(/gbrain put entit(?:y|ies)\//g) || [];
|
||||
if (entityCallMatches.length === 0) {
|
||||
console.warn(
|
||||
'No entity stub calls in gbrain calls log. Resolver instructs ' +
|
||||
'entity extraction but it is best-effort.',
|
||||
);
|
||||
} else {
|
||||
console.log(
|
||||
`Entity stub calls observed: ${entityCallMatches.length}`,
|
||||
);
|
||||
}
|
||||
},
|
||||
420_000,
|
||||
);
|
||||
},
|
||||
);
|
||||
@@ -0,0 +1,96 @@
|
||||
/**
|
||||
* Per-skill brain preflight token budget enforcement (T21 / T19).
|
||||
*
|
||||
* Asserts that the GENERATED BRAIN_PREFLIGHT block per skill stays within
|
||||
* its per-skill byte budget (SKILL_PREFLIGHT_BUDGET_BYTES from
|
||||
* brain-cache-spec). Also asserts the autoplan-wide total stays under
|
||||
* AUTOPLAN_PREFLIGHT_BUDGET_BYTES.
|
||||
*
|
||||
* What's being measured: the SIZE OF THE INSTRUCTIONS injected into the
|
||||
* skill's SKILL.md by the resolver, NOT the size of the cache digests at
|
||||
* runtime. Runtime digest budgets are enforced separately by the cache
|
||||
* CLI's truncateToBudget. This test catches resolver-side bloat: if
|
||||
* generateBrainPreflight grows verbose, the instructions themselves eat
|
||||
* the skill's context budget.
|
||||
*
|
||||
* Gate-tier, free.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import { generateBrainPreflight, generateBrainCacheRefresh, generateBrainWriteBack } from '../scripts/resolvers/gbrain';
|
||||
import {
|
||||
SKILL_DIGEST_SUBSETS,
|
||||
SKILL_PREFLIGHT_BUDGET_BYTES,
|
||||
AUTOPLAN_PREFLIGHT_BUDGET_BYTES,
|
||||
} from '../scripts/brain-cache-spec';
|
||||
import { HOST_PATHS } from '../scripts/resolvers/types';
|
||||
import type { TemplateContext } from '../scripts/resolvers/types';
|
||||
|
||||
function buildCtx(skillName: string): TemplateContext {
|
||||
return {
|
||||
skillName,
|
||||
tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
|
||||
host: 'claude',
|
||||
paths: HOST_PATHS.claude,
|
||||
};
|
||||
}
|
||||
|
||||
function totalBrainBytes(skillName: string): number {
|
||||
const preflight = generateBrainPreflight(buildCtx(skillName));
|
||||
const refresh = generateBrainCacheRefresh(buildCtx(skillName));
|
||||
const writeBack = generateBrainWriteBack(buildCtx(skillName));
|
||||
return Buffer.byteLength(preflight + refresh + writeBack, 'utf-8');
|
||||
}
|
||||
|
||||
describe('per-skill preflight token budget', () => {
|
||||
test('every preflight skill stays under per-skill BRAIN_* budget (3x cap, instructions vs runtime data)', () => {
|
||||
// The per-skill budget governs RUNTIME digest data, not instruction text.
|
||||
// Instruction text (resolver output) should fit within 3x the runtime
|
||||
// budget — anything more means the instructions themselves are bloated.
|
||||
for (const [skill, budget] of Object.entries(SKILL_PREFLIGHT_BUDGET_BYTES)) {
|
||||
const bytes = totalBrainBytes(skill);
|
||||
const cap = budget * 3;
|
||||
expect(bytes).toBeLessThanOrEqual(cap);
|
||||
}
|
||||
});
|
||||
|
||||
test('autoplan: sum across 4 plan-* skills stays under AUTOPLAN_PREFLIGHT_BUDGET_BYTES × 3 (instructions)', () => {
|
||||
const autoplanSkills = ['plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review'];
|
||||
const total = autoplanSkills.reduce((sum, s) => sum + totalBrainBytes(s), 0);
|
||||
// Same 3x rationale: AUTOPLAN budget governs runtime data, instructions
|
||||
// get more headroom.
|
||||
expect(total).toBeLessThanOrEqual(AUTOPLAN_PREFLIGHT_BUDGET_BYTES * 3);
|
||||
});
|
||||
|
||||
test('non-preflight skills emit zero brain bytes', () => {
|
||||
const nonPlanning = ['ship', 'qa', 'investigate', 'retro', 'design-review'];
|
||||
for (const skill of nonPlanning) {
|
||||
expect(totalBrainBytes(skill)).toBe(0);
|
||||
}
|
||||
});
|
||||
|
||||
test('preflight bytes are positive for every registered preflight skill', () => {
|
||||
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
|
||||
expect(totalBrainBytes(skill)).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('autoplan total preflight budget (T21 / D7)', () => {
|
||||
test('autoplan total under 25 KB instruction cap × 3 (75 KB instruction budget)', () => {
|
||||
const autoplanSkills = ['plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review'];
|
||||
const total = autoplanSkills.reduce((sum, s) => sum + totalBrainBytes(s), 0);
|
||||
// The 75 KB cap on instructions across the 4-skill autoplan; runtime
|
||||
// digest budget is the lower 25 KB cap, separately tested above.
|
||||
expect(total).toBeLessThan(75 * 1024);
|
||||
});
|
||||
|
||||
test('per-skill subset emits its expected entity references in the preflight block', () => {
|
||||
for (const [skill, subset] of Object.entries(SKILL_DIGEST_SUBSETS)) {
|
||||
const preflight = generateBrainPreflight(buildCtx(skill));
|
||||
for (const entity of subset) {
|
||||
expect(preflight).toContain(`gstack-brain-cache get ${entity}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,87 @@
|
||||
/**
|
||||
* Phase 2 calibration write-back fence-block fallback (T19).
|
||||
*
|
||||
* The BRAIN_WRITE_BACK resolver output describes two paths:
|
||||
* 1. Preferred: mcp__gbrain__takes_add op (upstream gbrain v0.42+, T8)
|
||||
* 2. Fallback: mcp__gbrain__put_page with a gstack:takes fence block
|
||||
*
|
||||
* Until T8 ships, the fallback is the only path. Verify the resolver output
|
||||
* mentions the fence-block fallback explicitly so the agent knows what to
|
||||
* do when takes_add returns MCPMethodNotFound.
|
||||
*
|
||||
* Gate-tier, free, pure import + render.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import { generateBrainWriteBack } from '../scripts/resolvers/gbrain';
|
||||
import { SKILL_DIGEST_SUBSETS, SKILL_CALIBRATION_WEIGHTS } from '../scripts/brain-cache-spec';
|
||||
import { HOST_PATHS } from '../scripts/resolvers/types';
|
||||
import type { TemplateContext } from '../scripts/resolvers/types';
|
||||
|
||||
function buildCtx(skillName: string): TemplateContext {
|
||||
return {
|
||||
skillName,
|
||||
tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
|
||||
host: 'claude',
|
||||
paths: HOST_PATHS.claude,
|
||||
};
|
||||
}
|
||||
|
||||
describe('Phase 2 write-back fence-block fallback', () => {
|
||||
test('every preflight skill emits write-back with fallback path documented', () => {
|
||||
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
|
||||
const out = generateBrainWriteBack(buildCtx(skill));
|
||||
// Mentions takes_add (preferred)
|
||||
expect(out).toContain('takes_add');
|
||||
// Mentions put_page fallback
|
||||
expect(out).toContain('put_page');
|
||||
// Mentions the takes fence-block syntax
|
||||
expect(out).toContain('takes');
|
||||
}
|
||||
});
|
||||
|
||||
test('write-back guidance gates on BRAIN_CALIBRATION_WRITEBACK feature flag', () => {
|
||||
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
|
||||
const out = generateBrainWriteBack(buildCtx(skill));
|
||||
expect(out).toContain('BRAIN_CALIBRATION_WRITEBACK');
|
||||
}
|
||||
});
|
||||
|
||||
test('write-back guidance gates on brain_trust_policy == personal', () => {
|
||||
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
|
||||
const out = generateBrainWriteBack(buildCtx(skill));
|
||||
expect(out).toContain('personal');
|
||||
expect(out).toContain('brain_trust_policy');
|
||||
}
|
||||
});
|
||||
|
||||
test('write-back emits the kind=bet take frontmatter shape', () => {
|
||||
const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
|
||||
expect(out).toContain('kind: bet');
|
||||
expect(out).toContain('holder:');
|
||||
expect(out).toContain('claim:');
|
||||
expect(out).toContain('weight:');
|
||||
expect(out).toContain('since_date:');
|
||||
expect(out).toContain('expected_resolution:');
|
||||
expect(out).toContain('source_skill:');
|
||||
});
|
||||
|
||||
test('per-skill weight matches SKILL_CALIBRATION_WEIGHTS', () => {
|
||||
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
|
||||
const weight = SKILL_CALIBRATION_WEIGHTS[skill];
|
||||
if (weight == null) continue;
|
||||
const out = generateBrainWriteBack(buildCtx(skill));
|
||||
expect(out).toContain(`weight: ${weight}`);
|
||||
}
|
||||
});
|
||||
|
||||
test('write-back invalidates affected cache digests after write', () => {
|
||||
const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
|
||||
expect(out).toContain('gstack-brain-cache invalidate');
|
||||
});
|
||||
|
||||
test('non-preflight skill gets empty write-back (no Phase 2 path)', () => {
|
||||
expect(generateBrainWriteBack(buildCtx('ship'))).toBe('');
|
||||
expect(generateBrainWriteBack(buildCtx('qa'))).toBe('');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,161 @@
|
||||
/**
|
||||
* User-slug identity resolution chain (T16 / D4 A3).
|
||||
*
|
||||
* Verifies the gstack-config resolve-user-slug subcommand walks the
|
||||
* documented fallback chain:
|
||||
* 1. mcp__gbrain__whoami.client_name (skipped when gbrain not on PATH)
|
||||
* 2. $USER env var
|
||||
* 3. sha8($(git config user.email))
|
||||
* 4. anonymous-<sha8(hostname)>
|
||||
*
|
||||
* Result is persisted under user_slug_at_<endpoint-hash> for stability.
|
||||
* Test isolation via GSTACK_HOME and HOME env overrides.
|
||||
*
|
||||
* Gate-tier, free, ~50ms.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import { mkdtempSync, existsSync, readFileSync, writeFileSync, rmSync, mkdirSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const REPO_ROOT = process.cwd();
|
||||
const CONFIG_BIN = join(REPO_ROOT, 'bin', 'gstack-config');
|
||||
|
||||
let TMP_HOME: string;
|
||||
const ORIGINAL = {
|
||||
HOME: process.env.HOME,
|
||||
GSTACK_HOME: process.env.GSTACK_HOME,
|
||||
USER: process.env.USER,
|
||||
};
|
||||
|
||||
function runConfig(args: string[], extraEnv: Record<string, string> = {}): { stdout: string; status: number; stderr: string } {
|
||||
const result = spawnSync(CONFIG_BIN, args, {
|
||||
encoding: 'utf-8',
|
||||
env: {
|
||||
...process.env,
|
||||
...extraEnv,
|
||||
},
|
||||
timeout: 5000,
|
||||
});
|
||||
return { stdout: result.stdout || '', status: result.status ?? -1, stderr: result.stderr || '' };
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
TMP_HOME = mkdtempSync(join(tmpdir(), 'gstack-user-slug-test-'));
|
||||
process.env.GSTACK_HOME = TMP_HOME;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
for (const [k, v] of Object.entries(ORIGINAL)) {
|
||||
if (v !== undefined) process.env[k] = v;
|
||||
else delete (process.env as Record<string, unknown>)[k];
|
||||
}
|
||||
try { rmSync(TMP_HOME, { recursive: true, force: true }); } catch { /* best effort */ }
|
||||
});
|
||||
|
||||
describe('endpoint-hash subcommand', () => {
|
||||
test('returns deterministic 8-char hex or literal "local"', () => {
|
||||
const result = runConfig(['endpoint-hash'], { GSTACK_HOME: TMP_HOME });
|
||||
expect(result.status).toBe(0);
|
||||
const out = result.stdout.trim();
|
||||
expect(out === 'local' || /^[a-f0-9]{8}$/.test(out) || /^[a-f0-9]{16}$/.test(out)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolve-user-slug fallback chain', () => {
|
||||
test('uses $USER when set (layer 2)', () => {
|
||||
const result = runConfig(['resolve-user-slug'], { GSTACK_HOME: TMP_HOME, USER: 'alice-test' });
|
||||
expect(result.status).toBe(0);
|
||||
expect(result.stdout.trim()).toBe('alice-test');
|
||||
});
|
||||
|
||||
test('lowercases + dash-normalizes $USER', () => {
|
||||
const result = runConfig(['resolve-user-slug'], { GSTACK_HOME: TMP_HOME, USER: 'Alice Test' });
|
||||
expect(result.status).toBe(0);
|
||||
// Spaces become dashes, uppercase becomes lowercase
|
||||
expect(result.stdout.trim()).toMatch(/^alice-test$/i);
|
||||
});
|
||||
|
||||
test('falls through past empty $USER to git email or anonymous', () => {
|
||||
const result = runConfig(['resolve-user-slug'], { GSTACK_HOME: TMP_HOME, USER: '' });
|
||||
expect(result.status).toBe(0);
|
||||
const slug = result.stdout.trim();
|
||||
expect(slug.length).toBeGreaterThan(0);
|
||||
// Should be either email-<sha8> or anonymous-<sha8>
|
||||
expect(slug).toMatch(/^(email-|anonymous-)[a-f0-9]+$|^[a-zA-Z0-9-]+$/);
|
||||
});
|
||||
|
||||
test('persists resolution to user_slug_at_<hash> on first call', () => {
|
||||
runConfig(['resolve-user-slug'], { GSTACK_HOME: TMP_HOME, USER: 'persisttest' });
|
||||
const configFile = join(TMP_HOME, 'config.yaml');
|
||||
expect(existsSync(configFile)).toBe(true);
|
||||
const content = readFileSync(configFile, 'utf-8');
|
||||
expect(content).toMatch(/^user_slug_at_[a-f0-9]+:\s+persisttest/m);
|
||||
});
|
||||
|
||||
test('subsequent calls return same slug (stable across sessions)', () => {
|
||||
const first = runConfig(['resolve-user-slug'], { GSTACK_HOME: TMP_HOME, USER: 'stabletest' });
|
||||
const second = runConfig(['resolve-user-slug'], { GSTACK_HOME: TMP_HOME, USER: 'changed-after' });
|
||||
// Second call ignores new $USER because the slug was already persisted.
|
||||
expect(first.stdout.trim()).toBe('stabletest');
|
||||
expect(second.stdout.trim()).toBe('stabletest');
|
||||
});
|
||||
});
|
||||
|
||||
describe('brain_trust_policy@<hash> namespace', () => {
|
||||
test('default value is "unset"', () => {
|
||||
const result = runConfig(['get', 'brain_trust_policy@deadbeef'], { GSTACK_HOME: TMP_HOME });
|
||||
expect(result.status).toBe(0);
|
||||
expect(result.stdout).toBe('unset');
|
||||
});
|
||||
|
||||
test('set + get roundtrip works', () => {
|
||||
const setResult = runConfig(['set', 'brain_trust_policy@deadbeef', 'personal'], { GSTACK_HOME: TMP_HOME });
|
||||
expect(setResult.status).toBe(0);
|
||||
const getResult = runConfig(['get', 'brain_trust_policy@deadbeef'], { GSTACK_HOME: TMP_HOME });
|
||||
expect(getResult.stdout).toBe('personal');
|
||||
});
|
||||
|
||||
test('invalid value falls back to unset with warning', () => {
|
||||
const result = runConfig(['set', 'brain_trust_policy@deadbeef', 'invalid-value'], { GSTACK_HOME: TMP_HOME });
|
||||
expect(result.status).toBe(0);
|
||||
expect(result.stderr).toContain('not recognized');
|
||||
const getResult = runConfig(['get', 'brain_trust_policy@deadbeef'], { GSTACK_HOME: TMP_HOME });
|
||||
expect(getResult.stdout).toBe('unset');
|
||||
});
|
||||
|
||||
test('shared value accepted', () => {
|
||||
runConfig(['set', 'brain_trust_policy@deadbeef', 'shared'], { GSTACK_HOME: TMP_HOME });
|
||||
const getResult = runConfig(['get', 'brain_trust_policy@deadbeef'], { GSTACK_HOME: TMP_HOME });
|
||||
expect(getResult.stdout).toBe('shared');
|
||||
});
|
||||
|
||||
test('per-endpoint policies dont collide', () => {
|
||||
runConfig(['set', 'brain_trust_policy@aaaaaaaa', 'personal'], { GSTACK_HOME: TMP_HOME });
|
||||
runConfig(['set', 'brain_trust_policy@bbbbbbbb', 'shared'], { GSTACK_HOME: TMP_HOME });
|
||||
const a = runConfig(['get', 'brain_trust_policy@aaaaaaaa'], { GSTACK_HOME: TMP_HOME });
|
||||
const b = runConfig(['get', 'brain_trust_policy@bbbbbbbb'], { GSTACK_HOME: TMP_HOME });
|
||||
expect(a.stdout).toBe('personal');
|
||||
expect(b.stdout).toBe('shared');
|
||||
});
|
||||
});
|
||||
|
||||
describe('key validation', () => {
|
||||
test('rejects keys with disallowed characters', () => {
|
||||
const result = runConfig(['get', 'bad-key'], { GSTACK_HOME: TMP_HOME });
|
||||
expect(result.status).not.toBe(0);
|
||||
expect(result.stderr).toContain('alphanumeric');
|
||||
});
|
||||
|
||||
test('accepts plain alphanumeric/underscore keys', () => {
|
||||
const result = runConfig(['get', 'proactive'], { GSTACK_HOME: TMP_HOME });
|
||||
expect(result.status).toBe(0);
|
||||
});
|
||||
|
||||
test('accepts @<hex-hash> suffix on key', () => {
|
||||
const result = runConfig(['get', 'brain_trust_policy@abc123ff'], { GSTACK_HOME: TMP_HOME });
|
||||
expect(result.status).toBe(0);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user