mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-06 21:46:40 +02:00
test: align unit tests with slim resolvers + exempt 27MB security fixture
- test/skill-validation.test.ts: assert the slim Completeness Principle shape (Completeness: X/10, kind-note language) instead of the old Compression table. Remove the 3 tier-1 skills from the spot-check list (they intentionally don't carry the full Completeness Principle section). Exempt browse/test/fixtures/security-bench-haiku-responses.json (27MB deterministic replay fixture for BrowseSafe-Bench) from the 2MB tracked-file gate. The gate was actually failing on origin/main since the fixture was added in v1.6.4.0 — this is a side-fix to a real regression. - test/brain-sync.test.ts: developer-machine-safe assertion for GSTACK_HOME override (compare config contents before/after instead of asserting the absence of a string that may legitimately exist). - test/gen-skill-docs.test.ts: new tests for the slim — plan-review preambles stay under the post-slim budget (~33KB), Voice + Writing Style sections stay compact, and the slim Voice section preserves the load-bearing semantic contract (lead-with-the-point, name-the-file, user-outcome framing, no-corporate, no-AI-vocab, user-sovereignty). Update path-leakage scan to allow repo-root sidecar symlinks. - test/writing-style-resolver.test.ts: assert the compact contract (gloss-on-first-use, outcome-framing, user-impact, terse-mode override) instead of the old 6-numbered-rules shape. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+11
-4
@@ -97,11 +97,18 @@ describe('gstack-config gbrain keys', () => {
|
||||
});
|
||||
|
||||
test('GSTACK_HOME overrides real config dir', () => {
|
||||
run(['gstack-config', 'set', 'gbrain_sync_mode', 'full']);
|
||||
// Real ~/.gstack/config.yaml must NOT have been touched.
|
||||
const realConfig = path.join(os.homedir(), '.gstack', 'config.yaml');
|
||||
const real = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : '';
|
||||
expect(real).not.toContain('gbrain_sync_mode: full');
|
||||
const before = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : null;
|
||||
|
||||
run(['gstack-config', 'set', 'gbrain_sync_mode', 'full']);
|
||||
|
||||
const tempConfig = fs.readFileSync(path.join(tmpHome, 'config.yaml'), 'utf-8');
|
||||
expect(tempConfig).toContain('gbrain_sync_mode: full');
|
||||
|
||||
// Real ~/.gstack/config.yaml must not be touched. It may already contain
|
||||
// the same value on a developer machine, so compare contents, not strings.
|
||||
const after = fs.existsSync(realConfig) ? fs.readFileSync(realConfig, 'utf-8') : null;
|
||||
expect(after).toBe(before);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -40,6 +40,35 @@ function extractDescription(content: string): string {
|
||||
return description;
|
||||
}
|
||||
|
||||
function extractMarkdownSection(content: string, heading: string): string {
|
||||
const escaped = heading.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
const startMatch = content.match(new RegExp(`^${escaped}.*$`, 'm'));
|
||||
expect(startMatch?.index).toBeDefined();
|
||||
const start = startMatch!.index!;
|
||||
const afterHeading = start + startMatch![0].length;
|
||||
const nextSection = content.slice(afterHeading).match(/\n## /);
|
||||
const end = nextSection?.index === undefined
|
||||
? content.length
|
||||
: afterHeading + nextSection.index;
|
||||
return content.slice(start, end).trim();
|
||||
}
|
||||
|
||||
function extractPreambleBeforeWorkflow(content: string, workflowMarkers: string[]): string {
|
||||
const markerIndexes = workflowMarkers
|
||||
.map(marker => content.indexOf(marker))
|
||||
.filter(index => index >= 0);
|
||||
expect(markerIndexes.length).toBeGreaterThan(0);
|
||||
return content.slice(0, Math.min(...markerIndexes));
|
||||
}
|
||||
|
||||
function isRepoRootSymlink(candidateDir: string): boolean {
|
||||
try {
|
||||
return fs.realpathSync(candidateDir) === fs.realpathSync(ROOT);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Dynamic template discovery — matches the generator's findTemplates() behavior.
|
||||
// New skills automatically get test coverage without updating a static list.
|
||||
const ALL_SKILLS = (() => {
|
||||
@@ -263,6 +292,50 @@ describe('gen-skill-docs', () => {
|
||||
expect(content).toContain('~/.gstack/analytics');
|
||||
});
|
||||
|
||||
test('plan-review generated preambles stay under the Option A budget', () => {
|
||||
const reviewSkills = [
|
||||
{
|
||||
path: path.join(ROOT, 'plan-ceo-review', 'SKILL.md'),
|
||||
markers: ['# Mega Plan Review Mode', '## Step 0: Detect platform and base branch'],
|
||||
},
|
||||
{
|
||||
path: path.join(ROOT, 'plan-eng-review', 'SKILL.md'),
|
||||
markers: ['# Plan Review Mode'],
|
||||
},
|
||||
];
|
||||
|
||||
// Plan skills carry the same preamble surface as other tier-≥2 skills
|
||||
// (Brain Sync, Context Recovery, Routing Injection are load-bearing
|
||||
// functionality, not optional). Budget is set to current size + small
|
||||
// headroom; ratchet down if a future slim trims real bytes.
|
||||
for (const skill of reviewSkills) {
|
||||
const content = fs.readFileSync(skill.path, 'utf-8');
|
||||
const preamble = extractPreambleBeforeWorkflow(content, skill.markers);
|
||||
expect(Buffer.byteLength(preamble, 'utf-8')).toBeLessThan(33_000);
|
||||
}
|
||||
});
|
||||
|
||||
test('voice and writing-style preamble sections stay compact', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8');
|
||||
const voice = extractMarkdownSection(content, '## Voice');
|
||||
const writingStyle = extractMarkdownSection(content, '## Writing Style');
|
||||
|
||||
expect(Buffer.byteLength(voice, 'utf-8')).toBeLessThan(3_000);
|
||||
expect(Buffer.byteLength(writingStyle, 'utf-8')).toBeLessThan(2_000);
|
||||
});
|
||||
|
||||
test('slim voice section preserves the gstack voice contract', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8');
|
||||
const voice = extractMarkdownSection(content, '## Voice');
|
||||
|
||||
expect(voice).toMatch(/lead with the point|direct/i);
|
||||
expect(voice).toMatch(/file|function|line|command|real numbers/i);
|
||||
expect(voice).toMatch(/user.*outcome|user.*experience|real user/i);
|
||||
expect(voice).toMatch(/corporate|academic|PR|hype/i);
|
||||
expect(voice).toMatch(/AI vocabulary|delve|crucial|robust/i);
|
||||
expect(voice).toMatch(/user decides|user.*context|sovereignty|recommendation, not a decision/i);
|
||||
});
|
||||
|
||||
test('preamble .pending-* glob is zsh-safe (uses find, not shell glob)', () => {
|
||||
for (const skill of ALL_SKILLS) {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
|
||||
@@ -1964,13 +2037,13 @@ describe('Parameterized host smoke tests', () => {
|
||||
expect(skills.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test('no .claude/skills path leakage in non-root skills', () => {
|
||||
test('no .claude/skills path leakage outside repo-root sidecar symlinks', () => {
|
||||
if (!fs.existsSync(hostDir)) return; // skip if not generated
|
||||
const skills = fs.readdirSync(hostDir);
|
||||
for (const skill of skills) {
|
||||
// Skip root gstack skill — it contains preamble with intentional .claude/skills
|
||||
// fallback paths for binary lookup and skill prefix instructions
|
||||
if (skill === 'gstack') continue;
|
||||
// Dev installs may mount the repo root at host/skills/gstack as a runtime
|
||||
// sidecar. The generator skips that symlink loop, so leakage checks should too.
|
||||
if (isRepoRootSymlink(path.join(hostDir, skill))) continue;
|
||||
const skillMd = path.join(hostDir, skill, 'SKILL.md');
|
||||
if (!fs.existsSync(skillMd)) continue;
|
||||
const content = fs.readFileSync(skillMd, 'utf-8');
|
||||
|
||||
@@ -800,9 +800,8 @@ describe('Enum & Value Completeness in review checklist', () => {
|
||||
|
||||
describe('Completeness Principle in generated SKILL.md files', () => {
|
||||
const skillsWithPreamble = [
|
||||
'SKILL.md', 'browse/SKILL.md', 'qa/SKILL.md',
|
||||
'qa/SKILL.md',
|
||||
'qa-only/SKILL.md',
|
||||
'setup-browser-cookies/SKILL.md',
|
||||
'ship/SKILL.md', 'review/SKILL.md',
|
||||
'plan-ceo-review/SKILL.md', 'plan-eng-review/SKILL.md',
|
||||
'retro/SKILL.md',
|
||||
@@ -820,11 +819,12 @@ describe('Completeness Principle in generated SKILL.md files', () => {
|
||||
});
|
||||
}
|
||||
|
||||
test('Completeness Principle includes compression table in tier 2+ skills', () => {
|
||||
// Root is tier 1 (no completeness). Check tier 2+ skill.
|
||||
test('Completeness Principle keeps compact scoring guidance in tier 2+ skills', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'cso', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('CC+gstack');
|
||||
expect(content).toContain('Compression');
|
||||
expect(content).toContain('Completeness: X/10');
|
||||
expect(content).toContain('10 = all edge cases');
|
||||
expect(content).toContain('Note: options differ in kind, not coverage');
|
||||
expect(content).toContain('Do not fabricate scores');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1634,7 +1634,13 @@ describe('no compiled binaries in git', () => {
|
||||
test('git tracks no files larger than 2MB', () => {
|
||||
// Pure fs.statSync — no shell spawn per file.
|
||||
const MAX_BYTES = 2 * 1024 * 1024;
|
||||
const knownLargeFixtures = new Set([
|
||||
// Deterministic replay fixture for BrowseSafe-Bench. The live bench is
|
||||
// expensive; this file is intentionally committed so the gate is free.
|
||||
'browse/test/fixtures/security-bench-haiku-responses.json',
|
||||
]);
|
||||
const oversized = trackedFiles.filter((f: string) => {
|
||||
if (knownLargeFixtures.has(f)) return false;
|
||||
const full = path.join(ROOT, f);
|
||||
try {
|
||||
return fs.statSync(full).size > MAX_BYTES;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
* What this test enforces:
|
||||
* - Writing Style section header present in tier-≥2 generated preamble
|
||||
* - All 6 writing rules present (gloss, outcome, short, impact, first-use, override)
|
||||
* - Compact semantic contract present (gloss, outcome, impact, override)
|
||||
* - Jargon list inlined (sample terms appear)
|
||||
* - Terse-mode gate condition text present
|
||||
* - Codex output uses $GSTACK_BIN, not ~/.claude/... (host-aware paths)
|
||||
@@ -41,21 +41,12 @@ describe('Writing Style preamble section', () => {
|
||||
expect(out).toContain('EXPLAIN_LEVEL:');
|
||||
});
|
||||
|
||||
test('tier 2+ preamble includes all 6 writing rules', () => {
|
||||
test('tier 2+ preamble includes the compact writing-style contract', () => {
|
||||
const out = generatePreamble(makeCtx('claude', 2));
|
||||
// Rule 1: jargon-gloss on first use
|
||||
expect(out).toContain('gloss on first use');
|
||||
// Rule 2: outcome framing
|
||||
expect(out).toMatch(/outcome terms/);
|
||||
// Rule 3: short sentences / concrete nouns / active voice
|
||||
expect(out).toContain('Short sentences');
|
||||
expect(out.toLowerCase()).toContain('active voice');
|
||||
// Rule 4: close with user impact
|
||||
expect(out).toMatch(/user impact/);
|
||||
// Rule 5: unconditional first-use gloss (even if user pasted term)
|
||||
expect(out).toMatch(/paste.*jargon|paste.*term/i);
|
||||
// Rule 6: user-turn override
|
||||
expect(out).toMatch(/user-turn override|user's own current message|user's in-turn/i);
|
||||
expect(out).toMatch(/gloss.*first use|first-use.*gloss/i);
|
||||
expect(out).toMatch(/outcome/i);
|
||||
expect(out).toMatch(/user impact|user.*experience|what.*user.*sees/i);
|
||||
expect(out).toMatch(/terse|no explanations|user-turn override|current message/i);
|
||||
});
|
||||
|
||||
test('tier 2+ preamble inlines jargon list', () => {
|
||||
|
||||
Reference in New Issue
Block a user