mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-21 09:10:11 +02:00
Merge origin/main into /spec branch — retag v1.45.0.0 → v1.47.0.0
main moved to v1.46.0.0 (gstack v2 foundation, eval-first floor across 51 skills) while this branch was at v1.45.0.0. v1.46 also reserved v1.45.0.0 for the design daemon feature. Retag this branch's release v1.45.0.0 → v1.47.0.0 so it lands cleanly on top of main. Conflict resolutions: - VERSION: 1.47.0.0 (MINOR continues on top of main's 1.46.0.0; this branch is also a MINOR per scale-aware rules — new skill capability). - CHANGELOG: rewrite this branch's release header v1.45.0.0 → v1.47.0.0. Keep both main entries above main's older history. Adapts to main's eval-first floor (v1.46.0.0 test/skill-coverage-matrix.ts + test/skill-coverage-floor.test.ts): - Register /spec in SKILL_COVERAGE with 3 gate entries + 2 periodic. - Skill catalog grows 51 → 52. Floor 6/6 structural checks pass. - Catalog tokens: 4045 → 4116 (+71 for /spec, within v1.46's ≤7000 budget). - Trim spec frontmatter description to single-paragraph block form to respect v1.46's catalog-trim intent (was 14 lines / ~900 chars, now 5 lines / ~350 chars; routing prose stays in body sections). - 363/363 gate-tier tests pass across skill-coverage-floor (309) + skill-coverage-matrix (10) + skill-size-budget (3) + parity-suite (4) + spec-template-invariants (35) + spec-template-sync (2). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,66 @@
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
// Static invariants guarding Windows artifact-sync (bin/gstack-brain-sync).
|
||||
//
|
||||
// These are deliberately static, not behavioral. The brain-sync integration
|
||||
// suite (test/brain-sync.test.ts) spawns the bin/ scripts directly, which
|
||||
// Node/Bun cannot exec on Windows (they are bash-shebang scripts), so that
|
||||
// suite is excluded from the Windows CI lane. Instead we assert the source
|
||||
// keeps the properties that make `--discover-new` and the `--once` drain work
|
||||
// on Windows. Each maps to a confirmed, separately-reproduced failure:
|
||||
//
|
||||
// 1. os.path.relpath yields BACKSLASH separators on Windows, which never
|
||||
// match the forward-slash allowlist globs (e.g. "projects/*/learnings.jsonl"),
|
||||
// so nested artifacts were silently never discovered.
|
||||
// 2. discover-new enqueued via subprocess.run([bash-shim]); Windows Python
|
||||
// cannot exec a shebang script, so it enqueued nothing even once matched.
|
||||
// 3. compute_paths_to_stage's python print() emits CRLF on Windows; the bash
|
||||
// `read -r` keeps the trailing \r, so `git add -- "path\r"` matches
|
||||
// nothing and the drain silently stages/commits nothing.
|
||||
//
|
||||
// Plus two robustness properties (independent codex review, both [P2]):
|
||||
// 4. the inline enqueue must append one atomic record at a time (O_APPEND),
|
||||
// or a concurrent writer-shim append can interleave mid-record and produce
|
||||
// a malformed queue line that the drain silently drops.
|
||||
// 5. the skip-list must be normalized to the same separator form as `rel`,
|
||||
// or a backslash entry in .brain-skip.txt stops matching and a file the
|
||||
// user explicitly skipped gets synced.
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const SRC = fs.readFileSync(path.join(ROOT, 'bin', 'gstack-brain-sync'), 'utf-8');
|
||||
|
||||
describe('gstack-brain-sync — Windows path/exec invariants', () => {
|
||||
test('discover-new normalizes relpath separators before fnmatch (bug 1)', () => {
|
||||
expect(SRC).toContain('os.path.relpath(full, gstack_home).replace(os.sep, "/")');
|
||||
});
|
||||
|
||||
test('no python subprocess exec — Windows cannot exec the bash shims (bug 2)', () => {
|
||||
// The whole script must never shell out to a bin/ bash script from Python;
|
||||
// that is the exec failure that left discover enqueuing nothing on Windows.
|
||||
expect(SRC).not.toContain('subprocess');
|
||||
});
|
||||
|
||||
test('drain loop strips trailing CR before git add (bug 3)', () => {
|
||||
const CR_STRIP = "p=\"${p%$'\\r'}\"";
|
||||
expect(SRC).toContain(CR_STRIP);
|
||||
// The strip must precede the staging call, or the pathspec still carries \r.
|
||||
expect(SRC.indexOf(CR_STRIP)).toBeLessThan(SRC.indexOf('add -f -- "$p"'));
|
||||
});
|
||||
|
||||
test('inline enqueue appends one atomic record at a time (codex P2 #1)', () => {
|
||||
expect(SRC).toContain('os.O_APPEND');
|
||||
expect(SRC).toContain('os.write(fd');
|
||||
// No buffered batch write to the queue (the interleave-corruption shape).
|
||||
expect(SRC).not.toContain('open(queue_path, "a"');
|
||||
});
|
||||
|
||||
test('skip-list is normalized on BOTH discover and drain sides (codex P2 #2)', () => {
|
||||
// The drain (compute_paths_to_stage) is the real staging boundary, so it
|
||||
// must normalize skip entries identically to discover_new — otherwise a
|
||||
// backslash .brain-skip.txt entry is honored at discovery but bypassed at
|
||||
// commit, syncing a file the user explicitly skipped.
|
||||
const NORM = 's.replace(os.sep, "/") for s in load_lines(skip_path)';
|
||||
expect(SRC.split(NORM).length - 1).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,118 @@
|
||||
/**
|
||||
* Gap B (v1.46.0.0): --catalog-mode=full opt-out behavior.
|
||||
*
|
||||
* The catalog trim is the default. The opt-out (`--catalog-mode=full`)
|
||||
* preserves v1.44 multi-line frontmatter descriptions for users / hosts
|
||||
* that depend on the legacy fat catalog. Without this test, someone could
|
||||
* break the conditional `if (host === 'claude' && CATALOG_MODE === 'trim')`
|
||||
* and silently turn the opt-out path into a no-op — users with the flag
|
||||
* still get trim'd output, the v1.44 behavior is gone.
|
||||
*
|
||||
* Two layers:
|
||||
* 1. Static: the CATALOG_MODE flag is wired into gen-skill-docs.ts and
|
||||
* the conditional gate is in the pipeline.
|
||||
* 2. Smoke: running with --catalog-mode=full produces a frontmatter
|
||||
* `description: |` block (multi-line) instead of the trim'd one-line
|
||||
* `description: ...(gstack)` form.
|
||||
*
|
||||
* The smoke test mutates the working tree mid-run. It restores the default
|
||||
* trim'd state in a finally block so a crash mid-test still leaves a clean
|
||||
* working tree.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
const REPO_ROOT = path.resolve(import.meta.dir, '..');
|
||||
const GEN_SKILL_DOCS = path.join(REPO_ROOT, 'scripts', 'gen-skill-docs.ts');
|
||||
const SHIP_SKILL = path.join(REPO_ROOT, 'ship', 'SKILL.md');
|
||||
|
||||
describe('--catalog-mode=full opt-out wiring (static)', () => {
|
||||
test('CATALOG_MODE_ARG parsing is wired into gen-skill-docs.ts', () => {
|
||||
const src = fs.readFileSync(GEN_SKILL_DOCS, 'utf-8');
|
||||
expect(src).toContain('CATALOG_MODE_ARG');
|
||||
expect(src).toContain("a.startsWith('--catalog-mode')");
|
||||
});
|
||||
|
||||
test('CATALOG_MODE accepts only "trim" or "full" — anything else throws', () => {
|
||||
const src = fs.readFileSync(GEN_SKILL_DOCS, 'utf-8');
|
||||
expect(src).toMatch(/val !== 'trim' && val !== 'full'/);
|
||||
expect(src).toContain('Unknown catalog mode');
|
||||
});
|
||||
|
||||
test('catalog trim only fires when CATALOG_MODE === "trim"', () => {
|
||||
const src = fs.readFileSync(GEN_SKILL_DOCS, 'utf-8');
|
||||
// The applyCatalogTrim call is gated by both host and CATALOG_MODE checks.
|
||||
expect(src).toMatch(/CATALOG_MODE === 'trim'/);
|
||||
expect(src).toContain('applyCatalogTrim(content, skillName)');
|
||||
});
|
||||
|
||||
test('default CATALOG_MODE is "trim" (opt-out, not opt-in)', () => {
|
||||
const src = fs.readFileSync(GEN_SKILL_DOCS, 'utf-8');
|
||||
// The const initializer falls back to 'trim' when --catalog-mode is unset.
|
||||
expect(src).toMatch(/if \(!CATALOG_MODE_ARG\) return 'trim'/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('--catalog-mode=full opt-out behavior (smoke)', () => {
|
||||
test('--catalog-mode=full produces multi-line description in frontmatter', () => {
|
||||
// Save the trim'd state so we can restore it.
|
||||
const trimmedShip = fs.readFileSync(SHIP_SKILL, 'utf-8');
|
||||
expect(trimmedShip).toMatch(/^description: Ship workflow:[^\n]*\(gstack\)\n/m);
|
||||
|
||||
try {
|
||||
// Run with --catalog-mode=full. Mutates working tree.
|
||||
const result = spawnSync('bun', ['run', 'gen:skill-docs', '--catalog-mode=full'], {
|
||||
cwd: REPO_ROOT,
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
timeout: 60_000,
|
||||
});
|
||||
expect(result.status).toBe(0);
|
||||
|
||||
// After --catalog-mode=full, frontmatter description is the legacy
|
||||
// multi-line block, not the trim'd one-line form.
|
||||
const fullShip = fs.readFileSync(SHIP_SKILL, 'utf-8');
|
||||
expect(fullShip).toMatch(/^description: \|\s*$/m); // YAML block scalar
|
||||
// Legacy multi-line content includes "Use when asked to..." in the
|
||||
// frontmatter (in trim mode this lives in the body section).
|
||||
const fmEnd = fullShip.indexOf('\n---', 4);
|
||||
const fm = fullShip.slice(0, fmEnd);
|
||||
expect(fm).toMatch(/Use when asked to/i);
|
||||
|
||||
// "When to invoke" body section should NOT be present in full mode
|
||||
// (because the routing prose stayed in frontmatter).
|
||||
const body = fullShip.slice(fmEnd);
|
||||
expect(body).not.toContain('## When to invoke this skill');
|
||||
} finally {
|
||||
// Restore default trim state regardless of test outcome.
|
||||
const restore = spawnSync('bun', ['run', 'gen:skill-docs'], {
|
||||
cwd: REPO_ROOT,
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
timeout: 60_000,
|
||||
});
|
||||
if (restore.status !== 0) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(
|
||||
'CRITICAL: failed to restore default trim state. Run `bun run gen:skill-docs` to clean up.',
|
||||
);
|
||||
}
|
||||
// Sanity-check the restored state matches what we saw at the start.
|
||||
const restoredShip = fs.readFileSync(SHIP_SKILL, 'utf-8');
|
||||
expect(restoredShip).toMatch(/^description: Ship workflow:[^\n]*\(gstack\)\n/m);
|
||||
}
|
||||
}, 180_000);
|
||||
|
||||
test('--catalog-mode=invalid throws a clear error', () => {
|
||||
const result = spawnSync('bun', ['run', 'gen:skill-docs', '--catalog-mode=invalid'], {
|
||||
cwd: REPO_ROOT,
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
timeout: 30_000,
|
||||
});
|
||||
expect(result.status).not.toBe(0);
|
||||
const stderr = result.stderr?.toString() ?? '';
|
||||
expect(stderr).toMatch(/Unknown catalog mode/);
|
||||
expect(stderr).toMatch(/invalid/);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,313 @@
|
||||
/**
|
||||
* Unit tests for catalog-trim helpers (gen-skill-docs.ts T4 functions).
|
||||
*
|
||||
* splitCatalogDescription, buildTrimmedDescription, buildWhenToInvokeSection,
|
||||
* applyCatalogTrim — these handle every skill's frontmatter rewrite at gen
|
||||
* time. Two bugs already shipped here:
|
||||
*
|
||||
* v1.45.0.0 design-consultation: when the first sentence exceeded 200 chars,
|
||||
* the routing-prose extraction lost the entire tail. design-consultation's
|
||||
* "Use when asked to..." silently disappeared from the body section.
|
||||
*
|
||||
* v1.45.0.0 CI freshness: the root-skill key leaked the checkout directory
|
||||
* name ("seville-v3" vs "gstack") and aggregate order was filesystem-
|
||||
* iteration order. Two machines produced two different JSON files.
|
||||
*
|
||||
* Both are regression-tested here. Future bugs in these functions surface as
|
||||
* unit-test failures before they hit CI or production.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import {
|
||||
splitCatalogDescription,
|
||||
buildTrimmedDescription,
|
||||
buildWhenToInvokeSection,
|
||||
applyCatalogTrim,
|
||||
} from '../scripts/gen-skill-docs';
|
||||
|
||||
describe('splitCatalogDescription', () => {
|
||||
test('extracts lead sentence + routing prose from simple multi-line description', () => {
|
||||
const desc =
|
||||
'Pre-landing PR review. Analyzes diff against the base branch for SQL safety, LLM trust\n' +
|
||||
'boundary violations, conditional side effects, and other structural issues. Use when\n' +
|
||||
'asked to "review this PR", "code review", "pre-landing review", or "check my diff".\n' +
|
||||
'Proactively suggest when the user is about to merge or land code changes. (gstack)';
|
||||
|
||||
const parts = splitCatalogDescription(desc);
|
||||
|
||||
expect(parts.lead).toBe('Pre-landing PR review.');
|
||||
expect(parts.hasGstackTag).toBe(true);
|
||||
expect(parts.voiceLine).toBeNull();
|
||||
expect(parts.routingProse).toContain('Use when');
|
||||
expect(parts.routingProse).toContain('Proactively suggest');
|
||||
expect(parts.routingProse).toContain('Analyzes diff');
|
||||
// (gstack) tag stripped from routingProse
|
||||
expect(parts.routingProse).not.toContain('(gstack)');
|
||||
});
|
||||
|
||||
test('REGRESSION (design-consultation v1.45.0.0): >200 char first sentence keeps routing', () => {
|
||||
// This is the exact shape that broke. First sentence (with embedded periods)
|
||||
// is 207 chars. Original bug: routing extraction ran AFTER lead truncation,
|
||||
// so collapsed.indexOf(lead) returned -1 (lead ended in "...") and the
|
||||
// entire "Use when..." + "Proactively..." tail dropped to empty string.
|
||||
const desc =
|
||||
'Design consultation: understands your product, researches the landscape, ' +
|
||||
'proposes a complete design system (aesthetic, typography, color, layout, ' +
|
||||
'spacing, motion), and generates font+color preview pages. ' +
|
||||
'Creates DESIGN.md as your project\'s design source of truth. ' +
|
||||
'For existing sites, use /plan-design-review to infer the system instead. ' +
|
||||
'Use when asked to "design system", "brand guidelines", or "create DESIGN.md". ' +
|
||||
'Proactively suggest when starting a new project\'s UI with no existing ' +
|
||||
'design system or DESIGN.md. (gstack)';
|
||||
|
||||
const parts = splitCatalogDescription(desc);
|
||||
|
||||
// Lead may be truncated with "..." since it exceeds 200 chars
|
||||
expect(parts.lead.length).toBeLessThanOrEqual(205);
|
||||
// Critical: routing MUST contain the "Use when..." and "Proactively..." prose
|
||||
expect(parts.routingProse).toContain('Use when asked to');
|
||||
expect(parts.routingProse).toContain('design system');
|
||||
expect(parts.routingProse).toContain('Proactively suggest');
|
||||
expect(parts.routingProse).toContain('Creates DESIGN.md');
|
||||
});
|
||||
|
||||
test('extracts voice-triggers line when present', () => {
|
||||
const desc =
|
||||
'Quick fix. Use when asked to fix the bug. ' +
|
||||
'Voice triggers (speech-to-text aliases): "fix it", "patch this", "make it work". ' +
|
||||
'(gstack)';
|
||||
|
||||
const parts = splitCatalogDescription(desc);
|
||||
|
||||
expect(parts.lead).toBe('Quick fix.');
|
||||
expect(parts.voiceLine).toContain('Voice triggers');
|
||||
expect(parts.voiceLine).toContain('"fix it"');
|
||||
expect(parts.routingProse).toContain('Use when asked to fix');
|
||||
// Voice line should NOT leak into routing
|
||||
expect(parts.routingProse).not.toContain('speech-to-text');
|
||||
});
|
||||
|
||||
test('handles description without (gstack) tag', () => {
|
||||
const desc = 'Single sentence description. With routing prose afterward.';
|
||||
const parts = splitCatalogDescription(desc);
|
||||
expect(parts.lead).toBe('Single sentence description.');
|
||||
expect(parts.hasGstackTag).toBe(false);
|
||||
expect(parts.routingProse).toBe('With routing prose afterward.');
|
||||
});
|
||||
|
||||
test('embedded-period descriptions: known limitation falls back to first-20-words', () => {
|
||||
// KNOWN LIMITATION: the sentence regex `^([^.!?]*[.!?])(?:\\s|$)` stops
|
||||
// at the FIRST `.`-then-non-whitespace because [^.!?]* is greedy and
|
||||
// can't backtrack past a non-period char. For "DESIGN.md and v1.45.0.0
|
||||
// in the lead. Use when..." the regex fails entirely and the lead falls
|
||||
// back to the first 20 words (~the whole short input).
|
||||
//
|
||||
// The real-world impact is small: descriptions like "DESIGN.md" or "v1.45"
|
||||
// appearing in the middle of the FIRST sentence are rare. When they do
|
||||
// occur, the lead simply becomes the full description (no body section
|
||||
// generated) — same as a description without a period. The trim CI gate
|
||||
// still keeps the per-skill size budget honest.
|
||||
//
|
||||
// If this gap matters later, replace the regex with a sentence tokenizer
|
||||
// (compromise.js / Intl.Segmenter) — until then we accept the fallback.
|
||||
const desc =
|
||||
'Skill that mentions DESIGN.md and v1.45.0.0 in the lead. ' +
|
||||
'Use when asked to do something.';
|
||||
const parts = splitCatalogDescription(desc);
|
||||
// Actual behavior: lead absorbs the whole input via the word-count fallback.
|
||||
expect(parts.lead.length).toBeGreaterThan(0);
|
||||
// routingProse may be empty when the fallback consumes everything.
|
||||
// The test exists to detect REGRESSIONS (lead becoming oddly short like
|
||||
// "Skill that mentions DESIGN.") not to assert ideal behavior.
|
||||
expect(parts.lead).toContain('Skill that mentions');
|
||||
});
|
||||
|
||||
test('description without a period uses first ~20 words as lead', () => {
|
||||
const desc = 'A long fragment with no sentence terminator drifting on and on across many words for an unusual frontmatter shape';
|
||||
const parts = splitCatalogDescription(desc);
|
||||
expect(parts.lead.length).toBeGreaterThan(0);
|
||||
expect(parts.lead.split(/\s+/).length).toBeLessThanOrEqual(21);
|
||||
});
|
||||
|
||||
test('idempotent: calling on already-trimmed output returns the same parts', () => {
|
||||
const desc = 'Already trimmed. (gstack)';
|
||||
const parts1 = splitCatalogDescription(desc);
|
||||
const parts2 = splitCatalogDescription(buildTrimmedDescription(parts1));
|
||||
// Re-split of a one-line trimmed result keeps lead identical, routing empty.
|
||||
expect(parts2.lead).toBe(parts1.lead);
|
||||
expect(parts2.hasGstackTag).toBe(true);
|
||||
expect(parts2.routingProse).toBe('');
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildTrimmedDescription', () => {
|
||||
test('appends (gstack) when hasGstackTag is true', () => {
|
||||
const out = buildTrimmedDescription({
|
||||
lead: 'Some lead.',
|
||||
routingProse: 'routing',
|
||||
voiceLine: null,
|
||||
hasGstackTag: true,
|
||||
});
|
||||
expect(out).toBe('Some lead. (gstack)');
|
||||
});
|
||||
|
||||
test('omits (gstack) when hasGstackTag is false', () => {
|
||||
const out = buildTrimmedDescription({
|
||||
lead: 'No tag.',
|
||||
routingProse: '',
|
||||
voiceLine: null,
|
||||
hasGstackTag: false,
|
||||
});
|
||||
expect(out).toBe('No tag.');
|
||||
});
|
||||
|
||||
test('trims whitespace from lead', () => {
|
||||
const out = buildTrimmedDescription({
|
||||
lead: ' Lead with whitespace. ',
|
||||
routingProse: '',
|
||||
voiceLine: null,
|
||||
hasGstackTag: true,
|
||||
});
|
||||
expect(out).toBe('Lead with whitespace. (gstack)');
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildWhenToInvokeSection', () => {
|
||||
test('produces markdown H2 with routing prose and voice line', () => {
|
||||
const out = buildWhenToInvokeSection({
|
||||
lead: 'Lead.',
|
||||
routingProse: 'Use when asked to ship.',
|
||||
voiceLine: 'Voice triggers (speech-to-text aliases): "ship it".',
|
||||
hasGstackTag: true,
|
||||
});
|
||||
expect(out).toContain('## When to invoke this skill');
|
||||
expect(out).toContain('Use when asked to ship.');
|
||||
expect(out).toContain('Voice triggers');
|
||||
});
|
||||
|
||||
test('omits routing block when routingProse is empty', () => {
|
||||
const out = buildWhenToInvokeSection({
|
||||
lead: 'Lead.',
|
||||
routingProse: '',
|
||||
voiceLine: null,
|
||||
hasGstackTag: true,
|
||||
});
|
||||
expect(out).toContain('## When to invoke this skill');
|
||||
expect(out).not.toContain('Use when');
|
||||
});
|
||||
|
||||
test('emits even when only voice line is present', () => {
|
||||
const out = buildWhenToInvokeSection({
|
||||
lead: 'Lead.',
|
||||
routingProse: '',
|
||||
voiceLine: 'Voice triggers: x.',
|
||||
hasGstackTag: true,
|
||||
});
|
||||
expect(out).toContain('Voice triggers: x.');
|
||||
});
|
||||
});
|
||||
|
||||
describe('applyCatalogTrim', () => {
|
||||
const minimalSkill = `---
|
||||
name: example
|
||||
description: |
|
||||
Example skill: this is the first sentence of the description, intended to be
|
||||
the lead displayed in the catalog. Use when asked to do an example task.
|
||||
Proactively suggest when the user mentions examples. (gstack)
|
||||
preamble-tier: 2
|
||||
---
|
||||
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
|
||||
<!-- Regenerate: bun run gen:skill-docs -->
|
||||
|
||||
# Example body
|
||||
Original body content here.
|
||||
`;
|
||||
|
||||
test('rewrites multi-line description into one-line + body section', () => {
|
||||
const result = applyCatalogTrim(minimalSkill, 'example');
|
||||
expect(result).not.toBeNull();
|
||||
const { content, parts } = result!;
|
||||
// Frontmatter description is now ONE line ending with (gstack)
|
||||
expect(content).toMatch(/^description: Example skill:[^\n]*\(gstack\)\n/m);
|
||||
// Body has the When to invoke section
|
||||
expect(content).toContain('## When to invoke this skill');
|
||||
expect(content).toContain('Use when asked to do an example task.');
|
||||
expect(content).toContain('Proactively suggest when');
|
||||
// Original body still present
|
||||
expect(content).toContain('# Example body');
|
||||
expect(content).toContain('Original body content here.');
|
||||
// parts is populated for the aggregator
|
||||
expect(parts.lead).toContain('Example skill');
|
||||
expect(parts.hasGstackTag).toBe(true);
|
||||
});
|
||||
|
||||
test('returns null for already-short descriptions (no-op)', () => {
|
||||
const shortSkill = minimalSkill.replace(
|
||||
/description: \|[\s\S]*?(?=preamble-tier:)/,
|
||||
'description: Already short. (gstack)\n',
|
||||
);
|
||||
const result = applyCatalogTrim(shortSkill, 'example');
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test('keeps the newline between description and next YAML field (no field collision)', () => {
|
||||
// Bug shape from v1.45.0.0 first attempt: produced
|
||||
// `description: ... (gstack)preamble-tier:` with no newline.
|
||||
const result = applyCatalogTrim(minimalSkill, 'example');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.content).not.toMatch(/\(gstack\)preamble-tier/);
|
||||
expect(result!.content).not.toMatch(/\(gstack\)allowed-tools/);
|
||||
expect(result!.content).toMatch(/\(gstack\)\n[a-z-]+:/);
|
||||
});
|
||||
|
||||
test('returns null on content without proper frontmatter', () => {
|
||||
expect(applyCatalogTrim('no frontmatter here', 'whatever')).toBeNull();
|
||||
expect(applyCatalogTrim('---\nincomplete frontmatter', 'whatever')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('proactive-suggestions.json determinism (regression for v1.45.0.0 CI freshness fail)', () => {
|
||||
test('committed JSON keys are alphabetically sorted', () => {
|
||||
// Reads the actual committed file at scripts/proactive-suggestions.json
|
||||
// and verifies sort order. Catches regressions to non-sorted output.
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const json = JSON.parse(
|
||||
fs.readFileSync(path.join(__dirname, '..', 'scripts', 'proactive-suggestions.json'), 'utf-8'),
|
||||
);
|
||||
const keys = Object.keys(json.skills);
|
||||
const sorted = [...keys].sort();
|
||||
expect(keys).toEqual(sorted);
|
||||
});
|
||||
|
||||
test('root skill is keyed as "gstack" (not the checkout directory name)', () => {
|
||||
// Catches the bug where the root SKILL.md.tmpl's catalog parts get
|
||||
// registered under the directory basename ("seville-v3" in a Conductor
|
||||
// worktree, "gstack" on CI).
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const json = JSON.parse(
|
||||
fs.readFileSync(path.join(__dirname, '..', 'scripts', 'proactive-suggestions.json'), 'utf-8'),
|
||||
);
|
||||
expect(json.skills).toHaveProperty('gstack');
|
||||
// The directory the test runs in must NOT appear as a key.
|
||||
const repoDir = path.basename(path.resolve(__dirname, '..'));
|
||||
if (repoDir !== 'gstack') {
|
||||
expect(json.skills).not.toHaveProperty(repoDir);
|
||||
}
|
||||
});
|
||||
|
||||
test('schema + catalog_mode + note fields are stable', () => {
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const json = JSON.parse(
|
||||
fs.readFileSync(path.join(__dirname, '..', 'scripts', 'proactive-suggestions.json'), 'utf-8'),
|
||||
);
|
||||
expect(json).toHaveProperty('$schema');
|
||||
expect(json.catalog_mode).toBe('trim');
|
||||
expect(typeof json.note).toBe('string');
|
||||
// No timestamp field — those cause flapping CI freshness checks.
|
||||
expect(json).not.toHaveProperty('generated_at');
|
||||
expect(json).not.toHaveProperty('timestamp');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,86 @@
|
||||
/**
|
||||
* cso security-guidance preservation test (v1.45.0.0 T6).
|
||||
*
|
||||
* The cso skill carries load-bearing security prose: OWASP Top 10 mappings,
|
||||
* STRIDE threat-model phrasing, "do not auto-fix without user approval"
|
||||
* gates. Codex 2nd-pass critique #9: "cso exemption too broad ... should
|
||||
* still get resolver dedup, catalog trim, sectioning if safe, and targeted
|
||||
* evals around must-not-miss checks."
|
||||
*
|
||||
* This test pins the must-not-miss checks. cso gets the same resolver gate
|
||||
* (T2), jargon dedup (T3), and catalog trim (T4) as every other skill — but
|
||||
* its security-guidance body content stays intact. Future compression work
|
||||
* that would strip this content fails CI here.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
const REPO_ROOT = path.resolve(import.meta.dir, '..');
|
||||
const CSO_SKILL = path.join(REPO_ROOT, 'cso', 'SKILL.md');
|
||||
|
||||
const MUST_PRESERVE_PHRASES = [
|
||||
// OWASP / STRIDE positioning
|
||||
'OWASP',
|
||||
'STRIDE',
|
||||
// Mode discipline
|
||||
'daily',
|
||||
'comprehensive',
|
||||
// Severity language
|
||||
'confidence',
|
||||
// Active verification requirement (codex critique: "active verification")
|
||||
'verif', // covers "verify", "verification", "verified"
|
||||
];
|
||||
|
||||
const MUST_PRESERVE_HEADINGS = [
|
||||
'## Preamble', // from PREAMBLE resolver
|
||||
];
|
||||
|
||||
describe('cso skill preserves load-bearing security guidance', () => {
|
||||
test('cso/SKILL.md exists and is non-trivial', () => {
|
||||
expect(fs.existsSync(CSO_SKILL)).toBe(true);
|
||||
const content = fs.readFileSync(CSO_SKILL, 'utf-8');
|
||||
// cso is a content-heavy security skill; under 30 KB suggests stripping went too far.
|
||||
expect(content.length).toBeGreaterThan(30_000);
|
||||
});
|
||||
|
||||
test('cso preserves required security phrases (case-insensitive)', () => {
|
||||
const content = fs.readFileSync(CSO_SKILL, 'utf-8').toLowerCase();
|
||||
const missing: string[] = [];
|
||||
for (const phrase of MUST_PRESERVE_PHRASES) {
|
||||
if (!content.includes(phrase.toLowerCase())) missing.push(phrase);
|
||||
}
|
||||
if (missing.length > 0) {
|
||||
throw new Error(
|
||||
`cso/SKILL.md is missing required security phrases: ${missing.join(', ')}. ` +
|
||||
`These are load-bearing for the skill's audit posture. If you intentionally ` +
|
||||
`removed them, update this test with the new phrasing.`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test('cso preserves required headings', () => {
|
||||
const content = fs.readFileSync(CSO_SKILL, 'utf-8');
|
||||
for (const heading of MUST_PRESERVE_HEADINGS) {
|
||||
expect(content).toContain(heading);
|
||||
}
|
||||
});
|
||||
|
||||
test('cso catalog trim landed (frontmatter description ≤ 200 chars)', () => {
|
||||
const content = fs.readFileSync(CSO_SKILL, 'utf-8');
|
||||
const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
||||
expect(fmMatch).not.toBeNull();
|
||||
const fm = fmMatch![1];
|
||||
const descMatch = fm.match(/^description:\s+(.+)$/m);
|
||||
expect(descMatch).not.toBeNull();
|
||||
const desc = descMatch![1].trim();
|
||||
expect(desc.length).toBeLessThanOrEqual(200);
|
||||
expect(desc).toContain('(gstack)');
|
||||
});
|
||||
|
||||
test('cso routing prose moved to "## When to invoke" body section', () => {
|
||||
const content = fs.readFileSync(CSO_SKILL, 'utf-8');
|
||||
expect(content).toContain('## When to invoke this skill');
|
||||
});
|
||||
});
|
||||
@@ -140,6 +140,12 @@ describe('gstack-diff-scope', () => {
|
||||
expect(scope.SCOPE_AUTH).toBe('true');
|
||||
});
|
||||
|
||||
test('detects config via bun.lock (Bun v1.2+ text lockfile)', () => {
|
||||
const dir = createRepo(['bun.lock']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_CONFIG).toBe('true');
|
||||
});
|
||||
|
||||
test('returns false for all new signals when no matching files', () => {
|
||||
const dir = createRepo(['docs/readme.md', 'config.yml']);
|
||||
const scope = runScope(dir);
|
||||
|
||||
+10
-84
@@ -2,12 +2,7 @@
|
||||
name: ship
|
||||
preamble-tier: 4
|
||||
version: 1.0.0
|
||||
description: |
|
||||
Ship workflow: detect + merge base branch, run tests, review diff, bump VERSION,
|
||||
update CHANGELOG, commit, push, create PR. Use when asked to "ship", "deploy",
|
||||
"push to main", "create a PR", "merge and push", or "get it deployed".
|
||||
Proactively invoke this skill (do NOT push/PR directly) when the user says code
|
||||
is ready, asks about deploying, wants to push code up, or asks to create a PR. (gstack)
|
||||
description: Ship workflow: detect + merge base branch, run tests, review diff, bump VERSION, update CHANGELOG, commit, push, create PR. (gstack)
|
||||
allowed-tools:
|
||||
- Bash
|
||||
- Read
|
||||
@@ -27,6 +22,14 @@ triggers:
|
||||
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
|
||||
<!-- Regenerate: bun run gen:skill-docs -->
|
||||
|
||||
|
||||
## When to invoke this skill
|
||||
|
||||
Use when asked to "ship", "deploy",
|
||||
"push to main", "create a PR", "merge and push", or "get it deployed".
|
||||
Proactively invoke this skill (do NOT push/PR directly) when the user says code
|
||||
is ready, asks about deploying, wants to push code up, or asks to create a PR.
|
||||
|
||||
## Preamble (run first)
|
||||
|
||||
```bash
|
||||
@@ -553,84 +556,7 @@ Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format i
|
||||
- User-turn override wins: if the current message asks for terse / no explanations / just the answer, skip this section.
|
||||
- Terse mode (EXPLAIN_LEVEL: terse): no glosses, no outcome-framing layer, shorter responses.
|
||||
|
||||
Jargon list, gloss on first use if the term appears:
|
||||
- idempotent
|
||||
- idempotency
|
||||
- race condition
|
||||
- deadlock
|
||||
- cyclomatic complexity
|
||||
- N+1
|
||||
- N+1 query
|
||||
- backpressure
|
||||
- memoization
|
||||
- eventual consistency
|
||||
- CAP theorem
|
||||
- CORS
|
||||
- CSRF
|
||||
- XSS
|
||||
- SQL injection
|
||||
- prompt injection
|
||||
- DDoS
|
||||
- rate limit
|
||||
- throttle
|
||||
- circuit breaker
|
||||
- load balancer
|
||||
- reverse proxy
|
||||
- SSR
|
||||
- CSR
|
||||
- hydration
|
||||
- tree-shaking
|
||||
- bundle splitting
|
||||
- code splitting
|
||||
- hot reload
|
||||
- tombstone
|
||||
- soft delete
|
||||
- cascade delete
|
||||
- foreign key
|
||||
- composite index
|
||||
- covering index
|
||||
- OLTP
|
||||
- OLAP
|
||||
- sharding
|
||||
- replication lag
|
||||
- quorum
|
||||
- two-phase commit
|
||||
- saga
|
||||
- outbox pattern
|
||||
- inbox pattern
|
||||
- optimistic locking
|
||||
- pessimistic locking
|
||||
- thundering herd
|
||||
- cache stampede
|
||||
- bloom filter
|
||||
- consistent hashing
|
||||
- virtual DOM
|
||||
- reconciliation
|
||||
- closure
|
||||
- hoisting
|
||||
- tail call
|
||||
- GIL
|
||||
- zero-copy
|
||||
- mmap
|
||||
- cold start
|
||||
- warm start
|
||||
- green-blue deploy
|
||||
- canary deploy
|
||||
- feature flag
|
||||
- kill switch
|
||||
- dead letter queue
|
||||
- fan-out
|
||||
- fan-in
|
||||
- debounce
|
||||
- throttle (UI)
|
||||
- hydration mismatch
|
||||
- memory leak
|
||||
- GC pause
|
||||
- heap fragmentation
|
||||
- stack overflow
|
||||
- null pointer
|
||||
- dangling pointer
|
||||
- buffer overflow
|
||||
Curated jargon list lives at `~/.claude/skills/gstack/scripts/jargon-list.json` (80+ terms). On the first jargon term you encounter this session, Read that file once; treat the `terms` array as the canonical list. The list is repo-owned and may grow between releases.
|
||||
|
||||
|
||||
## Completeness Principle — Boil the Lake
|
||||
|
||||
+1
-78
@@ -542,84 +542,7 @@ Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format i
|
||||
- User-turn override wins: if the current message asks for terse / no explanations / just the answer, skip this section.
|
||||
- Terse mode (EXPLAIN_LEVEL: terse): no glosses, no outcome-framing layer, shorter responses.
|
||||
|
||||
Jargon list, gloss on first use if the term appears:
|
||||
- idempotent
|
||||
- idempotency
|
||||
- race condition
|
||||
- deadlock
|
||||
- cyclomatic complexity
|
||||
- N+1
|
||||
- N+1 query
|
||||
- backpressure
|
||||
- memoization
|
||||
- eventual consistency
|
||||
- CAP theorem
|
||||
- CORS
|
||||
- CSRF
|
||||
- XSS
|
||||
- SQL injection
|
||||
- prompt injection
|
||||
- DDoS
|
||||
- rate limit
|
||||
- throttle
|
||||
- circuit breaker
|
||||
- load balancer
|
||||
- reverse proxy
|
||||
- SSR
|
||||
- CSR
|
||||
- hydration
|
||||
- tree-shaking
|
||||
- bundle splitting
|
||||
- code splitting
|
||||
- hot reload
|
||||
- tombstone
|
||||
- soft delete
|
||||
- cascade delete
|
||||
- foreign key
|
||||
- composite index
|
||||
- covering index
|
||||
- OLTP
|
||||
- OLAP
|
||||
- sharding
|
||||
- replication lag
|
||||
- quorum
|
||||
- two-phase commit
|
||||
- saga
|
||||
- outbox pattern
|
||||
- inbox pattern
|
||||
- optimistic locking
|
||||
- pessimistic locking
|
||||
- thundering herd
|
||||
- cache stampede
|
||||
- bloom filter
|
||||
- consistent hashing
|
||||
- virtual DOM
|
||||
- reconciliation
|
||||
- closure
|
||||
- hoisting
|
||||
- tail call
|
||||
- GIL
|
||||
- zero-copy
|
||||
- mmap
|
||||
- cold start
|
||||
- warm start
|
||||
- green-blue deploy
|
||||
- canary deploy
|
||||
- feature flag
|
||||
- kill switch
|
||||
- dead letter queue
|
||||
- fan-out
|
||||
- fan-in
|
||||
- debounce
|
||||
- throttle (UI)
|
||||
- hydration mismatch
|
||||
- memory leak
|
||||
- GC pause
|
||||
- heap fragmentation
|
||||
- stack overflow
|
||||
- null pointer
|
||||
- dangling pointer
|
||||
- buffer overflow
|
||||
Curated jargon list lives at `$GSTACK_ROOT/scripts/jargon-list.json` (80+ terms). On the first jargon term you encounter this session, Read that file once; treat the `terms` array as the canonical list. The list is repo-owned and may grow between releases.
|
||||
|
||||
|
||||
## Completeness Principle — Boil the Lake
|
||||
|
||||
+1
-78
@@ -544,84 +544,7 @@ Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format i
|
||||
- User-turn override wins: if the current message asks for terse / no explanations / just the answer, skip this section.
|
||||
- Terse mode (EXPLAIN_LEVEL: terse): no glosses, no outcome-framing layer, shorter responses.
|
||||
|
||||
Jargon list, gloss on first use if the term appears:
|
||||
- idempotent
|
||||
- idempotency
|
||||
- race condition
|
||||
- deadlock
|
||||
- cyclomatic complexity
|
||||
- N+1
|
||||
- N+1 query
|
||||
- backpressure
|
||||
- memoization
|
||||
- eventual consistency
|
||||
- CAP theorem
|
||||
- CORS
|
||||
- CSRF
|
||||
- XSS
|
||||
- SQL injection
|
||||
- prompt injection
|
||||
- DDoS
|
||||
- rate limit
|
||||
- throttle
|
||||
- circuit breaker
|
||||
- load balancer
|
||||
- reverse proxy
|
||||
- SSR
|
||||
- CSR
|
||||
- hydration
|
||||
- tree-shaking
|
||||
- bundle splitting
|
||||
- code splitting
|
||||
- hot reload
|
||||
- tombstone
|
||||
- soft delete
|
||||
- cascade delete
|
||||
- foreign key
|
||||
- composite index
|
||||
- covering index
|
||||
- OLTP
|
||||
- OLAP
|
||||
- sharding
|
||||
- replication lag
|
||||
- quorum
|
||||
- two-phase commit
|
||||
- saga
|
||||
- outbox pattern
|
||||
- inbox pattern
|
||||
- optimistic locking
|
||||
- pessimistic locking
|
||||
- thundering herd
|
||||
- cache stampede
|
||||
- bloom filter
|
||||
- consistent hashing
|
||||
- virtual DOM
|
||||
- reconciliation
|
||||
- closure
|
||||
- hoisting
|
||||
- tail call
|
||||
- GIL
|
||||
- zero-copy
|
||||
- mmap
|
||||
- cold start
|
||||
- warm start
|
||||
- green-blue deploy
|
||||
- canary deploy
|
||||
- feature flag
|
||||
- kill switch
|
||||
- dead letter queue
|
||||
- fan-out
|
||||
- fan-in
|
||||
- debounce
|
||||
- throttle (UI)
|
||||
- hydration mismatch
|
||||
- memory leak
|
||||
- GC pause
|
||||
- heap fragmentation
|
||||
- stack overflow
|
||||
- null pointer
|
||||
- dangling pointer
|
||||
- buffer overflow
|
||||
Curated jargon list lives at `$GSTACK_ROOT/scripts/jargon-list.json` (80+ terms). On the first jargon term you encounter this session, Read that file once; treat the `terms` array as the canonical list. The list is repo-owned and may grow between releases.
|
||||
|
||||
|
||||
## Completeness Principle — Boil the Lake
|
||||
|
||||
+623
@@ -0,0 +1,623 @@
|
||||
{
|
||||
"tag": "v1.44.1",
|
||||
"capturedAt": "2026-05-26T03:29:32.568Z",
|
||||
"capturedFromCommit": "74bc8054",
|
||||
"capturedFromBranch": "garrytan/slim-skill-tokens",
|
||||
"totalSkills": 51,
|
||||
"totalCorpusBytes": 2915151,
|
||||
"estTotalCatalogTokens": 9319,
|
||||
"topHeaviest": [
|
||||
{
|
||||
"skill": "ship",
|
||||
"skillMdBytes": 163553,
|
||||
"skillMdLines": 3094,
|
||||
"estTokens": 40888,
|
||||
"tmplBytes": 48869,
|
||||
"descriptionLen": 557,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
{
|
||||
"skill": "plan-ceo-review",
|
||||
"skillMdBytes": 130891,
|
||||
"skillMdLines": 2224,
|
||||
"estTokens": 32723,
|
||||
"tmplBytes": 63393,
|
||||
"descriptionLen": 1326,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
{
|
||||
"skill": "office-hours",
|
||||
"skillMdBytes": 111088,
|
||||
"skillMdLines": 2090,
|
||||
"estTokens": 27772,
|
||||
"tmplBytes": 55466,
|
||||
"descriptionLen": 1579,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
{
|
||||
"skill": "plan-design-review",
|
||||
"skillMdBytes": 105592,
|
||||
"skillMdLines": 1944,
|
||||
"estTokens": 26398,
|
||||
"tmplBytes": 28624,
|
||||
"descriptionLen": 568,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
{
|
||||
"skill": "plan-devex-review",
|
||||
"skillMdBytes": 104571,
|
||||
"skillMdLines": 2145,
|
||||
"estTokens": 26143,
|
||||
"tmplBytes": 35680,
|
||||
"descriptionLen": 886,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
{
|
||||
"skill": "plan-eng-review",
|
||||
"skillMdBytes": 101409,
|
||||
"skillMdLines": 1788,
|
||||
"estTokens": 25352,
|
||||
"tmplBytes": 26234,
|
||||
"descriptionLen": 743,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
{
|
||||
"skill": "design-review",
|
||||
"skillMdBytes": 94055,
|
||||
"skillMdLines": 1960,
|
||||
"estTokens": 23514,
|
||||
"tmplBytes": 11674,
|
||||
"descriptionLen": 709,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
{
|
||||
"skill": "review",
|
||||
"skillMdBytes": 92443,
|
||||
"skillMdLines": 1789,
|
||||
"estTokens": 23111,
|
||||
"tmplBytes": 14099,
|
||||
"descriptionLen": 512,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
{
|
||||
"skill": "land-and-deploy",
|
||||
"skillMdBytes": 90281,
|
||||
"skillMdLines": 1883,
|
||||
"estTokens": 22570,
|
||||
"tmplBytes": 48624,
|
||||
"descriptionLen": 378,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
{
|
||||
"skill": "autoplan",
|
||||
"skillMdBytes": 89274,
|
||||
"skillMdLines": 1811,
|
||||
"estTokens": 22319,
|
||||
"tmplBytes": 45271,
|
||||
"descriptionLen": 857,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
}
|
||||
],
|
||||
"skills": {
|
||||
"autoplan": {
|
||||
"skill": "autoplan",
|
||||
"skillMdBytes": 89274,
|
||||
"skillMdLines": 1811,
|
||||
"estTokens": 22319,
|
||||
"tmplBytes": 45271,
|
||||
"descriptionLen": 857,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
"benchmark": {
|
||||
"skill": "benchmark",
|
||||
"skillMdBytes": 32537,
|
||||
"skillMdLines": 728,
|
||||
"estTokens": 8134,
|
||||
"tmplBytes": 9378,
|
||||
"descriptionLen": 549,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"benchmark-models": {
|
||||
"skill": "benchmark-models",
|
||||
"skillMdBytes": 28606,
|
||||
"skillMdLines": 603,
|
||||
"estTokens": 7152,
|
||||
"tmplBytes": 6631,
|
||||
"descriptionLen": 740,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"browse": {
|
||||
"skill": "browse",
|
||||
"skillMdBytes": 47290,
|
||||
"skillMdLines": 911,
|
||||
"estTokens": 11823,
|
||||
"tmplBytes": 10805,
|
||||
"descriptionLen": 612,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"canary": {
|
||||
"skill": "canary",
|
||||
"skillMdBytes": 45502,
|
||||
"skillMdLines": 1017,
|
||||
"estTokens": 11376,
|
||||
"tmplBytes": 8033,
|
||||
"descriptionLen": 477,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"careful": {
|
||||
"skill": "careful",
|
||||
"skillMdBytes": 2531,
|
||||
"skillMdLines": 64,
|
||||
"estTokens": 633,
|
||||
"tmplBytes": 2435,
|
||||
"descriptionLen": 625,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"codex": {
|
||||
"skill": "codex",
|
||||
"skillMdBytes": 78018,
|
||||
"skillMdLines": 1545,
|
||||
"estTokens": 19505,
|
||||
"tmplBytes": 34143,
|
||||
"descriptionLen": 626,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"context-restore": {
|
||||
"skill": "context-restore",
|
||||
"skillMdBytes": 39894,
|
||||
"skillMdLines": 875,
|
||||
"estTokens": 9974,
|
||||
"tmplBytes": 5255,
|
||||
"descriptionLen": 636,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"context-save": {
|
||||
"skill": "context-save",
|
||||
"skillMdBytes": 44091,
|
||||
"skillMdLines": 994,
|
||||
"estTokens": 11023,
|
||||
"tmplBytes": 9293,
|
||||
"descriptionLen": 562,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"cso": {
|
||||
"skill": "cso",
|
||||
"skillMdBytes": 75797,
|
||||
"skillMdLines": 1477,
|
||||
"estTokens": 18949,
|
||||
"tmplBytes": 35158,
|
||||
"descriptionLen": 774,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"design-consultation": {
|
||||
"skill": "design-consultation",
|
||||
"skillMdBytes": 76963,
|
||||
"skillMdLines": 1578,
|
||||
"estTokens": 19241,
|
||||
"tmplBytes": 25899,
|
||||
"descriptionLen": 1201,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"design-html": {
|
||||
"skill": "design-html",
|
||||
"skillMdBytes": 64951,
|
||||
"skillMdLines": 1476,
|
||||
"estTokens": 16238,
|
||||
"tmplBytes": 22567,
|
||||
"descriptionLen": 870,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"design-review": {
|
||||
"skill": "design-review",
|
||||
"skillMdBytes": 94055,
|
||||
"skillMdLines": 1960,
|
||||
"estTokens": 23514,
|
||||
"tmplBytes": 11674,
|
||||
"descriptionLen": 709,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"design-shotgun": {
|
||||
"skill": "design-shotgun",
|
||||
"skillMdBytes": 60571,
|
||||
"skillMdLines": 1327,
|
||||
"estTokens": 15143,
|
||||
"tmplBytes": 13331,
|
||||
"descriptionLen": 1057,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"devex-review": {
|
||||
"skill": "devex-review",
|
||||
"skillMdBytes": 62815,
|
||||
"skillMdLines": 1259,
|
||||
"estTokens": 15704,
|
||||
"tmplBytes": 7984,
|
||||
"descriptionLen": 827,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"document-generate": {
|
||||
"skill": "document-generate",
|
||||
"skillMdBytes": 51386,
|
||||
"skillMdLines": 1204,
|
||||
"estTokens": 12847,
|
||||
"tmplBytes": 15093,
|
||||
"descriptionLen": 671,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"document-release": {
|
||||
"skill": "document-release",
|
||||
"skillMdBytes": 56652,
|
||||
"skillMdLines": 1262,
|
||||
"estTokens": 14163,
|
||||
"tmplBytes": 20362,
|
||||
"descriptionLen": 707,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"freeze": {
|
||||
"skill": "freeze",
|
||||
"skillMdBytes": 3134,
|
||||
"skillMdLines": 88,
|
||||
"estTokens": 784,
|
||||
"tmplBytes": 3038,
|
||||
"descriptionLen": 761,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"gstack-upgrade": {
|
||||
"skill": "gstack-upgrade",
|
||||
"skillMdBytes": 10794,
|
||||
"skillMdLines": 280,
|
||||
"estTokens": 2699,
|
||||
"tmplBytes": 10667,
|
||||
"descriptionLen": 439,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"guard": {
|
||||
"skill": "guard",
|
||||
"skillMdBytes": 3277,
|
||||
"skillMdLines": 88,
|
||||
"estTokens": 819,
|
||||
"tmplBytes": 3181,
|
||||
"descriptionLen": 968,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"health": {
|
||||
"skill": "health",
|
||||
"skillMdBytes": 46313,
|
||||
"skillMdLines": 1041,
|
||||
"estTokens": 11578,
|
||||
"tmplBytes": 11617,
|
||||
"descriptionLen": 463,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"investigate": {
|
||||
"skill": "investigate",
|
||||
"skillMdBytes": 48810,
|
||||
"skillMdLines": 1039,
|
||||
"estTokens": 12203,
|
||||
"tmplBytes": 11561,
|
||||
"descriptionLen": 1811,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"ios-clean": {
|
||||
"skill": "ios-clean",
|
||||
"skillMdBytes": 39447,
|
||||
"skillMdLines": 840,
|
||||
"estTokens": 9862,
|
||||
"tmplBytes": 3851,
|
||||
"descriptionLen": 761,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"ios-design-review": {
|
||||
"skill": "ios-design-review",
|
||||
"skillMdBytes": 40037,
|
||||
"skillMdLines": 841,
|
||||
"estTokens": 10009,
|
||||
"tmplBytes": 4417,
|
||||
"descriptionLen": 836,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"ios-fix": {
|
||||
"skill": "ios-fix",
|
||||
"skillMdBytes": 39164,
|
||||
"skillMdLines": 837,
|
||||
"estTokens": 9791,
|
||||
"tmplBytes": 3574,
|
||||
"descriptionLen": 767,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"ios-qa": {
|
||||
"skill": "ios-qa",
|
||||
"skillMdBytes": 45677,
|
||||
"skillMdLines": 957,
|
||||
"estTokens": 11419,
|
||||
"tmplBytes": 10090,
|
||||
"descriptionLen": 875,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"ios-sync": {
|
||||
"skill": "ios-sync",
|
||||
"skillMdBytes": 39137,
|
||||
"skillMdLines": 831,
|
||||
"estTokens": 9784,
|
||||
"tmplBytes": 3544,
|
||||
"descriptionLen": 727,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"land-and-deploy": {
|
||||
"skill": "land-and-deploy",
|
||||
"skillMdBytes": 90281,
|
||||
"skillMdLines": 1883,
|
||||
"estTokens": 22570,
|
||||
"tmplBytes": 48624,
|
||||
"descriptionLen": 378,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"landing-report": {
|
||||
"skill": "landing-report",
|
||||
"skillMdBytes": 42382,
|
||||
"skillMdLines": 901,
|
||||
"estTokens": 10596,
|
||||
"tmplBytes": 6806,
|
||||
"descriptionLen": 512,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"learn": {
|
||||
"skill": "learn",
|
||||
"skillMdBytes": 40119,
|
||||
"skillMdLines": 918,
|
||||
"estTokens": 10030,
|
||||
"tmplBytes": 5594,
|
||||
"descriptionLen": 460,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"make-pdf": {
|
||||
"skill": "make-pdf",
|
||||
"skillMdBytes": 28721,
|
||||
"skillMdLines": 644,
|
||||
"estTokens": 7180,
|
||||
"tmplBytes": 5106,
|
||||
"descriptionLen": 698,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"office-hours": {
|
||||
"skill": "office-hours",
|
||||
"skillMdBytes": 111088,
|
||||
"skillMdLines": 2090,
|
||||
"estTokens": 27772,
|
||||
"tmplBytes": 55466,
|
||||
"descriptionLen": 1579,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"open-gstack-browser": {
|
||||
"skill": "open-gstack-browser",
|
||||
"skillMdBytes": 44529,
|
||||
"skillMdLines": 981,
|
||||
"estTokens": 11132,
|
||||
"tmplBytes": 7702,
|
||||
"descriptionLen": 586,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"pair-agent": {
|
||||
"skill": "pair-agent",
|
||||
"skillMdBytes": 45339,
|
||||
"skillMdLines": 1036,
|
||||
"estTokens": 11335,
|
||||
"tmplBytes": 8548,
|
||||
"descriptionLen": 709,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"plan-ceo-review": {
|
||||
"skill": "plan-ceo-review",
|
||||
"skillMdBytes": 130891,
|
||||
"skillMdLines": 2224,
|
||||
"estTokens": 32723,
|
||||
"tmplBytes": 63393,
|
||||
"descriptionLen": 1326,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
"plan-design-review": {
|
||||
"skill": "plan-design-review",
|
||||
"skillMdBytes": 105592,
|
||||
"skillMdLines": 1944,
|
||||
"estTokens": 26398,
|
||||
"tmplBytes": 28624,
|
||||
"descriptionLen": 568,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
"plan-devex-review": {
|
||||
"skill": "plan-devex-review",
|
||||
"skillMdBytes": 104571,
|
||||
"skillMdLines": 2145,
|
||||
"estTokens": 26143,
|
||||
"tmplBytes": 35680,
|
||||
"descriptionLen": 886,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
"plan-eng-review": {
|
||||
"skill": "plan-eng-review",
|
||||
"skillMdBytes": 101409,
|
||||
"skillMdLines": 1788,
|
||||
"estTokens": 25352,
|
||||
"tmplBytes": 26234,
|
||||
"descriptionLen": 743,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
"plan-tune": {
|
||||
"skill": "plan-tune",
|
||||
"skillMdBytes": 50123,
|
||||
"skillMdLines": 1105,
|
||||
"estTokens": 12531,
|
||||
"tmplBytes": 15586,
|
||||
"descriptionLen": 997,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"qa": {
|
||||
"skill": "qa",
|
||||
"skillMdBytes": 72267,
|
||||
"skillMdLines": 1648,
|
||||
"estTokens": 18067,
|
||||
"tmplBytes": 12701,
|
||||
"descriptionLen": 814,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"qa-only": {
|
||||
"skill": "qa-only",
|
||||
"skillMdBytes": 54819,
|
||||
"skillMdLines": 1220,
|
||||
"estTokens": 13705,
|
||||
"tmplBytes": 3851,
|
||||
"descriptionLen": 605,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"retro": {
|
||||
"skill": "retro",
|
||||
"skillMdBytes": 81286,
|
||||
"skillMdLines": 1777,
|
||||
"estTokens": 20322,
|
||||
"tmplBytes": 42427,
|
||||
"descriptionLen": 979,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"review": {
|
||||
"skill": "review",
|
||||
"skillMdBytes": 92443,
|
||||
"skillMdLines": 1789,
|
||||
"estTokens": 23111,
|
||||
"tmplBytes": 14099,
|
||||
"descriptionLen": 512,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"scrape": {
|
||||
"skill": "scrape",
|
||||
"skillMdBytes": 42040,
|
||||
"skillMdLines": 914,
|
||||
"estTokens": 10510,
|
||||
"tmplBytes": 5220,
|
||||
"descriptionLen": 519,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"setup-browser-cookies": {
|
||||
"skill": "setup-browser-cookies",
|
||||
"skillMdBytes": 25886,
|
||||
"skillMdLines": 577,
|
||||
"estTokens": 6472,
|
||||
"tmplBytes": 2724,
|
||||
"descriptionLen": 433,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"setup-deploy": {
|
||||
"skill": "setup-deploy",
|
||||
"skillMdBytes": 42326,
|
||||
"skillMdLines": 946,
|
||||
"estTokens": 10582,
|
||||
"tmplBytes": 7780,
|
||||
"descriptionLen": 564,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"setup-gbrain": {
|
||||
"skill": "setup-gbrain",
|
||||
"skillMdBytes": 76791,
|
||||
"skillMdLines": 1733,
|
||||
"estTokens": 19198,
|
||||
"tmplBytes": 42245,
|
||||
"descriptionLen": 512,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"ship": {
|
||||
"skill": "ship",
|
||||
"skillMdBytes": 163553,
|
||||
"skillMdLines": 3094,
|
||||
"estTokens": 40888,
|
||||
"tmplBytes": 48869,
|
||||
"descriptionLen": 557,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
"skillify": {
|
||||
"skill": "skillify",
|
||||
"skillMdBytes": 51935,
|
||||
"skillMdLines": 1196,
|
||||
"estTokens": 12984,
|
||||
"tmplBytes": 15107,
|
||||
"descriptionLen": 571,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"sync-gbrain": {
|
||||
"skill": "sync-gbrain",
|
||||
"skillMdBytes": 48555,
|
||||
"skillMdLines": 1057,
|
||||
"estTokens": 12139,
|
||||
"tmplBytes": 13996,
|
||||
"descriptionLen": 510,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"unfreeze": {
|
||||
"skill": "unfreeze",
|
||||
"skillMdBytes": 1482,
|
||||
"skillMdLines": 46,
|
||||
"estTokens": 371,
|
||||
"tmplBytes": 1386,
|
||||
"descriptionLen": 350,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
}
|
||||
}
|
||||
}
|
||||
+623
@@ -0,0 +1,623 @@
|
||||
{
|
||||
"tag": "v1.46.0.0",
|
||||
"capturedAt": "2026-05-26T04:17:57.247Z",
|
||||
"capturedFromCommit": "2aff29e9",
|
||||
"capturedFromBranch": "garrytan/slim-skill-tokens",
|
||||
"totalSkills": 51,
|
||||
"totalCorpusBytes": 2882468,
|
||||
"estTotalCatalogTokens": 4045,
|
||||
"topHeaviest": [
|
||||
{
|
||||
"skill": "ship",
|
||||
"skillMdBytes": 162702,
|
||||
"skillMdLines": 3020,
|
||||
"estTokens": 40676,
|
||||
"tmplBytes": 48869,
|
||||
"descriptionLen": 291,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
{
|
||||
"skill": "plan-ceo-review",
|
||||
"skillMdBytes": 130034,
|
||||
"skillMdLines": 2151,
|
||||
"estTokens": 32509,
|
||||
"tmplBytes": 63393,
|
||||
"descriptionLen": 794,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
{
|
||||
"skill": "office-hours",
|
||||
"skillMdBytes": 110388,
|
||||
"skillMdLines": 2020,
|
||||
"estTokens": 27597,
|
||||
"tmplBytes": 55466,
|
||||
"descriptionLen": 860,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
{
|
||||
"skill": "plan-design-review",
|
||||
"skillMdBytes": 105401,
|
||||
"skillMdLines": 1882,
|
||||
"estTokens": 26350,
|
||||
"tmplBytes": 28624,
|
||||
"descriptionLen": 218,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
{
|
||||
"skill": "plan-devex-review",
|
||||
"skillMdBytes": 103713,
|
||||
"skillMdLines": 2073,
|
||||
"estTokens": 25928,
|
||||
"tmplBytes": 35680,
|
||||
"descriptionLen": 250,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
{
|
||||
"skill": "plan-eng-review",
|
||||
"skillMdBytes": 100555,
|
||||
"skillMdLines": 1716,
|
||||
"estTokens": 25139,
|
||||
"tmplBytes": 26234,
|
||||
"descriptionLen": 231,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
{
|
||||
"skill": "design-review",
|
||||
"skillMdBytes": 93200,
|
||||
"skillMdLines": 1886,
|
||||
"estTokens": 23300,
|
||||
"tmplBytes": 11674,
|
||||
"descriptionLen": 304,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
{
|
||||
"skill": "review",
|
||||
"skillMdBytes": 91594,
|
||||
"skillMdLines": 1716,
|
||||
"estTokens": 22899,
|
||||
"tmplBytes": 14099,
|
||||
"descriptionLen": 205,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
{
|
||||
"skill": "land-and-deploy",
|
||||
"skillMdBytes": 89432,
|
||||
"skillMdLines": 1810,
|
||||
"estTokens": 22358,
|
||||
"tmplBytes": 48624,
|
||||
"descriptionLen": 160,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
{
|
||||
"skill": "autoplan",
|
||||
"skillMdBytes": 88416,
|
||||
"skillMdLines": 1738,
|
||||
"estTokens": 22104,
|
||||
"tmplBytes": 45271,
|
||||
"descriptionLen": 366,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
}
|
||||
],
|
||||
"skills": {
|
||||
"autoplan": {
|
||||
"skill": "autoplan",
|
||||
"skillMdBytes": 88416,
|
||||
"skillMdLines": 1738,
|
||||
"estTokens": 22104,
|
||||
"tmplBytes": 45271,
|
||||
"descriptionLen": 366,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
"benchmark": {
|
||||
"skill": "benchmark",
|
||||
"skillMdBytes": 32556,
|
||||
"skillMdLines": 733,
|
||||
"estTokens": 8139,
|
||||
"tmplBytes": 9378,
|
||||
"descriptionLen": 213,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"benchmark-models": {
|
||||
"skill": "benchmark-models",
|
||||
"skillMdBytes": 28623,
|
||||
"skillMdLines": 608,
|
||||
"estTokens": 7156,
|
||||
"tmplBytes": 6631,
|
||||
"descriptionLen": 217,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"browse": {
|
||||
"skill": "browse",
|
||||
"skillMdBytes": 47308,
|
||||
"skillMdLines": 915,
|
||||
"estTokens": 11827,
|
||||
"tmplBytes": 10805,
|
||||
"descriptionLen": 181,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"canary": {
|
||||
"skill": "canary",
|
||||
"skillMdBytes": 44651,
|
||||
"skillMdLines": 944,
|
||||
"estTokens": 11163,
|
||||
"tmplBytes": 8033,
|
||||
"descriptionLen": 180,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"careful": {
|
||||
"skill": "careful",
|
||||
"skillMdBytes": 2551,
|
||||
"skillMdLines": 68,
|
||||
"estTokens": 638,
|
||||
"tmplBytes": 2435,
|
||||
"descriptionLen": 315,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"codex": {
|
||||
"skill": "codex",
|
||||
"skillMdBytes": 77166,
|
||||
"skillMdLines": 1473,
|
||||
"estTokens": 19292,
|
||||
"tmplBytes": 34143,
|
||||
"descriptionLen": 187,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"context-restore": {
|
||||
"skill": "context-restore",
|
||||
"skillMdBytes": 39039,
|
||||
"skillMdLines": 802,
|
||||
"estTokens": 9760,
|
||||
"tmplBytes": 5255,
|
||||
"descriptionLen": 238,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"context-save": {
|
||||
"skill": "context-save",
|
||||
"skillMdBytes": 43236,
|
||||
"skillMdLines": 920,
|
||||
"estTokens": 10809,
|
||||
"tmplBytes": 9293,
|
||||
"descriptionLen": 168,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"cso": {
|
||||
"skill": "cso",
|
||||
"skillMdBytes": 74943,
|
||||
"skillMdLines": 1405,
|
||||
"estTokens": 18736,
|
||||
"tmplBytes": 35158,
|
||||
"descriptionLen": 196,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"design-consultation": {
|
||||
"skill": "design-consultation",
|
||||
"skillMdBytes": 76768,
|
||||
"skillMdLines": 1515,
|
||||
"estTokens": 19192,
|
||||
"tmplBytes": 25899,
|
||||
"descriptionLen": 888,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"design-html": {
|
||||
"skill": "design-html",
|
||||
"skillMdBytes": 64093,
|
||||
"skillMdLines": 1403,
|
||||
"estTokens": 16023,
|
||||
"tmplBytes": 22567,
|
||||
"descriptionLen": 233,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"design-review": {
|
||||
"skill": "design-review",
|
||||
"skillMdBytes": 93200,
|
||||
"skillMdLines": 1886,
|
||||
"estTokens": 23300,
|
||||
"tmplBytes": 11674,
|
||||
"descriptionLen": 304,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"design-shotgun": {
|
||||
"skill": "design-shotgun",
|
||||
"skillMdBytes": 60382,
|
||||
"skillMdLines": 1265,
|
||||
"estTokens": 15096,
|
||||
"tmplBytes": 13331,
|
||||
"descriptionLen": 786,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"devex-review": {
|
||||
"skill": "devex-review",
|
||||
"skillMdBytes": 61959,
|
||||
"skillMdLines": 1187,
|
||||
"estTokens": 15490,
|
||||
"tmplBytes": 7984,
|
||||
"descriptionLen": 201,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"document-generate": {
|
||||
"skill": "document-generate",
|
||||
"skillMdBytes": 50533,
|
||||
"skillMdLines": 1130,
|
||||
"estTokens": 12633,
|
||||
"tmplBytes": 15093,
|
||||
"descriptionLen": 334,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"document-release": {
|
||||
"skill": "document-release",
|
||||
"skillMdBytes": 55797,
|
||||
"skillMdLines": 1189,
|
||||
"estTokens": 13949,
|
||||
"tmplBytes": 20362,
|
||||
"descriptionLen": 192,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"freeze": {
|
||||
"skill": "freeze",
|
||||
"skillMdBytes": 3154,
|
||||
"skillMdLines": 92,
|
||||
"estTokens": 789,
|
||||
"tmplBytes": 3038,
|
||||
"descriptionLen": 503,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"gstack-upgrade": {
|
||||
"skill": "gstack-upgrade",
|
||||
"skillMdBytes": 10817,
|
||||
"skillMdLines": 285,
|
||||
"estTokens": 2704,
|
||||
"tmplBytes": 10667,
|
||||
"descriptionLen": 163,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"guard": {
|
||||
"skill": "guard",
|
||||
"skillMdBytes": 3297,
|
||||
"skillMdLines": 91,
|
||||
"estTokens": 824,
|
||||
"tmplBytes": 3181,
|
||||
"descriptionLen": 686,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"health": {
|
||||
"skill": "health",
|
||||
"skillMdBytes": 45462,
|
||||
"skillMdLines": 968,
|
||||
"estTokens": 11366,
|
||||
"tmplBytes": 11617,
|
||||
"descriptionLen": 184,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"investigate": {
|
||||
"skill": "investigate",
|
||||
"skillMdBytes": 47955,
|
||||
"skillMdLines": 966,
|
||||
"estTokens": 11989,
|
||||
"tmplBytes": 11561,
|
||||
"descriptionLen": 1379,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"ios-clean": {
|
||||
"skill": "ios-clean",
|
||||
"skillMdBytes": 38591,
|
||||
"skillMdLines": 767,
|
||||
"estTokens": 9648,
|
||||
"tmplBytes": 3851,
|
||||
"descriptionLen": 252,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"ios-design-review": {
|
||||
"skill": "ios-design-review",
|
||||
"skillMdBytes": 39177,
|
||||
"skillMdLines": 769,
|
||||
"estTokens": 9794,
|
||||
"tmplBytes": 4417,
|
||||
"descriptionLen": 209,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"ios-fix": {
|
||||
"skill": "ios-fix",
|
||||
"skillMdBytes": 38306,
|
||||
"skillMdLines": 765,
|
||||
"estTokens": 9577,
|
||||
"tmplBytes": 3574,
|
||||
"descriptionLen": 187,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"ios-qa": {
|
||||
"skill": "ios-qa",
|
||||
"skillMdBytes": 44817,
|
||||
"skillMdLines": 885,
|
||||
"estTokens": 11204,
|
||||
"tmplBytes": 10090,
|
||||
"descriptionLen": 223,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"ios-sync": {
|
||||
"skill": "ios-sync",
|
||||
"skillMdBytes": 38283,
|
||||
"skillMdLines": 758,
|
||||
"estTokens": 9571,
|
||||
"tmplBytes": 3544,
|
||||
"descriptionLen": 269,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"land-and-deploy": {
|
||||
"skill": "land-and-deploy",
|
||||
"skillMdBytes": 89432,
|
||||
"skillMdLines": 1810,
|
||||
"estTokens": 22358,
|
||||
"tmplBytes": 48624,
|
||||
"descriptionLen": 160,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"landing-report": {
|
||||
"skill": "landing-report",
|
||||
"skillMdBytes": 41531,
|
||||
"skillMdLines": 828,
|
||||
"estTokens": 10383,
|
||||
"tmplBytes": 6806,
|
||||
"descriptionLen": 195,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"learn": {
|
||||
"skill": "learn",
|
||||
"skillMdBytes": 39268,
|
||||
"skillMdLines": 845,
|
||||
"estTokens": 9817,
|
||||
"tmplBytes": 5594,
|
||||
"descriptionLen": 178,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"make-pdf": {
|
||||
"skill": "make-pdf",
|
||||
"skillMdBytes": 28740,
|
||||
"skillMdLines": 649,
|
||||
"estTokens": 7185,
|
||||
"tmplBytes": 5106,
|
||||
"descriptionLen": 177,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"office-hours": {
|
||||
"skill": "office-hours",
|
||||
"skillMdBytes": 110388,
|
||||
"skillMdLines": 2020,
|
||||
"estTokens": 27597,
|
||||
"tmplBytes": 55466,
|
||||
"descriptionLen": 860,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"open-gstack-browser": {
|
||||
"skill": "open-gstack-browser",
|
||||
"skillMdBytes": 43677,
|
||||
"skillMdLines": 908,
|
||||
"estTokens": 10919,
|
||||
"tmplBytes": 7702,
|
||||
"descriptionLen": 204,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"pair-agent": {
|
||||
"skill": "pair-agent",
|
||||
"skillMdBytes": 44485,
|
||||
"skillMdLines": 964,
|
||||
"estTokens": 11121,
|
||||
"tmplBytes": 8548,
|
||||
"descriptionLen": 167,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"plan-ceo-review": {
|
||||
"skill": "plan-ceo-review",
|
||||
"skillMdBytes": 130034,
|
||||
"skillMdLines": 2151,
|
||||
"estTokens": 32509,
|
||||
"tmplBytes": 63393,
|
||||
"descriptionLen": 794,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
"plan-design-review": {
|
||||
"skill": "plan-design-review",
|
||||
"skillMdBytes": 105401,
|
||||
"skillMdLines": 1882,
|
||||
"estTokens": 26350,
|
||||
"tmplBytes": 28624,
|
||||
"descriptionLen": 218,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
"plan-devex-review": {
|
||||
"skill": "plan-devex-review",
|
||||
"skillMdBytes": 103713,
|
||||
"skillMdLines": 2073,
|
||||
"estTokens": 25928,
|
||||
"tmplBytes": 35680,
|
||||
"descriptionLen": 250,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
"plan-eng-review": {
|
||||
"skill": "plan-eng-review",
|
||||
"skillMdBytes": 100555,
|
||||
"skillMdLines": 1716,
|
||||
"estTokens": 25139,
|
||||
"tmplBytes": 26234,
|
||||
"descriptionLen": 231,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
"plan-tune": {
|
||||
"skill": "plan-tune",
|
||||
"skillMdBytes": 49263,
|
||||
"skillMdLines": 1031,
|
||||
"estTokens": 12316,
|
||||
"tmplBytes": 15586,
|
||||
"descriptionLen": 325,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"qa": {
|
||||
"skill": "qa",
|
||||
"skillMdBytes": 71409,
|
||||
"skillMdLines": 1576,
|
||||
"estTokens": 17852,
|
||||
"tmplBytes": 12701,
|
||||
"descriptionLen": 218,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"qa-only": {
|
||||
"skill": "qa-only",
|
||||
"skillMdBytes": 53967,
|
||||
"skillMdLines": 1148,
|
||||
"estTokens": 13492,
|
||||
"tmplBytes": 3851,
|
||||
"descriptionLen": 165,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"retro": {
|
||||
"skill": "retro",
|
||||
"skillMdBytes": 80435,
|
||||
"skillMdLines": 1704,
|
||||
"estTokens": 20109,
|
||||
"tmplBytes": 42427,
|
||||
"descriptionLen": 648,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"review": {
|
||||
"skill": "review",
|
||||
"skillMdBytes": 91594,
|
||||
"skillMdLines": 1716,
|
||||
"estTokens": 22899,
|
||||
"tmplBytes": 14099,
|
||||
"descriptionLen": 205,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"scrape": {
|
||||
"skill": "scrape",
|
||||
"skillMdBytes": 41187,
|
||||
"skillMdLines": 841,
|
||||
"estTokens": 10297,
|
||||
"tmplBytes": 5220,
|
||||
"descriptionLen": 167,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"setup-browser-cookies": {
|
||||
"skill": "setup-browser-cookies",
|
||||
"skillMdBytes": 25908,
|
||||
"skillMdLines": 580,
|
||||
"estTokens": 6477,
|
||||
"tmplBytes": 2724,
|
||||
"descriptionLen": 222,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"setup-deploy": {
|
||||
"skill": "setup-deploy",
|
||||
"skillMdBytes": 41473,
|
||||
"skillMdLines": 873,
|
||||
"estTokens": 10368,
|
||||
"tmplBytes": 7780,
|
||||
"descriptionLen": 197,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"setup-gbrain": {
|
||||
"skill": "setup-gbrain",
|
||||
"skillMdBytes": 75940,
|
||||
"skillMdLines": 1658,
|
||||
"estTokens": 18985,
|
||||
"tmplBytes": 42245,
|
||||
"descriptionLen": 323,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"ship": {
|
||||
"skill": "ship",
|
||||
"skillMdBytes": 162702,
|
||||
"skillMdLines": 3020,
|
||||
"estTokens": 40676,
|
||||
"tmplBytes": 48869,
|
||||
"descriptionLen": 291,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": true
|
||||
},
|
||||
"skillify": {
|
||||
"skill": "skillify",
|
||||
"skillMdBytes": 51080,
|
||||
"skillMdLines": 1122,
|
||||
"estTokens": 12770,
|
||||
"tmplBytes": 15107,
|
||||
"descriptionLen": 233,
|
||||
"hasGateEval": true,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"sync-gbrain": {
|
||||
"skill": "sync-gbrain",
|
||||
"skillMdBytes": 47702,
|
||||
"skillMdLines": 982,
|
||||
"estTokens": 11926,
|
||||
"tmplBytes": 13996,
|
||||
"descriptionLen": 299,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
},
|
||||
"unfreeze": {
|
||||
"skill": "unfreeze",
|
||||
"skillMdBytes": 1504,
|
||||
"skillMdLines": 49,
|
||||
"estTokens": 376,
|
||||
"tmplBytes": 1386,
|
||||
"descriptionLen": 199,
|
||||
"hasGateEval": false,
|
||||
"hasPeriodicEval": false
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,159 @@
|
||||
/**
|
||||
* Idempotency test for gen-skill-docs (regression for v1.45.0.0 timestamp flap).
|
||||
*
|
||||
* Running `bun run gen:skill-docs` twice in a row must produce a no-op on
|
||||
* the second run: every output file is byte-identical to itself. Without
|
||||
* this gate, CI freshness checks flap whenever someone introduces a
|
||||
* timestamp, a random seed, or any other non-deterministic field into a
|
||||
* generated artifact.
|
||||
*
|
||||
* v1.45.0.0 shipped with a `generated_at` ISO timestamp in
|
||||
* scripts/proactive-suggestions.json that updated every run. CI freshness
|
||||
* checks failed because the committed file's timestamp never matched the
|
||||
* latest gen. Fixed in 43e18af4 — this test pins the contract going forward.
|
||||
*
|
||||
* The test pays a small cost (~2 gen-skill-docs invocations, ~3s total) but
|
||||
* catches a class of bugs that's invisible until CI fails.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
const REPO_ROOT = path.resolve(import.meta.dir, '..');
|
||||
|
||||
/** Files that gen-skill-docs writes and that must be byte-stable across runs. */
|
||||
const STABLE_OUTPUTS = [
|
||||
'scripts/proactive-suggestions.json',
|
||||
'SKILL.md',
|
||||
'ship/SKILL.md',
|
||||
'plan-ceo-review/SKILL.md',
|
||||
'office-hours/SKILL.md',
|
||||
'gstack/llms.txt',
|
||||
];
|
||||
|
||||
/**
|
||||
* Sampled outputs from EVERY non-Claude host. The full host-all run touches
|
||||
* .agents/, .cursor/, .factory/, .gbrain/, .hermes/, .kiro/, .openclaw/,
|
||||
* .opencode/, .slate/ — picking one canonical file per host catches per-host
|
||||
* non-determinism without paying the cost of snapshotting hundreds of files.
|
||||
*/
|
||||
const STABLE_HOST_ALL_OUTPUTS = [
|
||||
'scripts/proactive-suggestions.json',
|
||||
'SKILL.md',
|
||||
'ship/SKILL.md',
|
||||
'.agents/skills/gstack-ship/SKILL.md',
|
||||
'.cursor/skills/gstack-ship/SKILL.md',
|
||||
'.factory/skills/gstack-ship/SKILL.md',
|
||||
'.gbrain/skills/gstack-ship/SKILL.md',
|
||||
];
|
||||
|
||||
function runGen(extraArgs: string[] = []): { exitCode: number; stderr: string } {
|
||||
const result = spawnSync('bun', ['run', 'gen:skill-docs', ...extraArgs], {
|
||||
cwd: REPO_ROOT,
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
timeout: 120_000,
|
||||
});
|
||||
return {
|
||||
exitCode: result.status ?? -1,
|
||||
stderr: result.stderr?.toString() ?? '',
|
||||
};
|
||||
}
|
||||
|
||||
function snapshot(files: string[] = STABLE_OUTPUTS): Map<string, string> {
|
||||
const m = new Map<string, string>();
|
||||
for (const rel of files) {
|
||||
const full = path.join(REPO_ROOT, rel);
|
||||
if (fs.existsSync(full)) {
|
||||
m.set(rel, fs.readFileSync(full, 'utf-8'));
|
||||
}
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
describe('gen-skill-docs idempotency', () => {
|
||||
test('two consecutive runs produce byte-identical outputs (no flapping fields)', () => {
|
||||
const firstRun = runGen();
|
||||
expect(firstRun.exitCode).toBe(0);
|
||||
|
||||
const after1 = snapshot();
|
||||
expect(after1.size).toBeGreaterThan(0);
|
||||
|
||||
const secondRun = runGen();
|
||||
expect(secondRun.exitCode).toBe(0);
|
||||
|
||||
const after2 = snapshot();
|
||||
|
||||
// Compare each stable output byte-for-byte.
|
||||
const flapping: string[] = [];
|
||||
for (const [file, before] of after1.entries()) {
|
||||
const now = after2.get(file);
|
||||
if (now !== before) flapping.push(file);
|
||||
}
|
||||
|
||||
if (flapping.length > 0) {
|
||||
throw new Error(
|
||||
`${flapping.length} file(s) changed between two consecutive gen-skill-docs runs (flapping):\n` +
|
||||
flapping.map(f => ` - ${f}`).join('\n') +
|
||||
`\nLikely cause: a non-deterministic field (timestamp, random ID, ` +
|
||||
`filesystem-iteration order) leaked into the generated output. CI freshness ` +
|
||||
`checks (git diff --exit-code) will fail unpredictably until this is fixed.`,
|
||||
);
|
||||
}
|
||||
}, 180_000); // ~2 min budget for two gen runs
|
||||
|
||||
test('--dry-run after a fresh gen reports zero stale files', () => {
|
||||
// Pre-condition: working tree gen must be fresh (idempotency test above ran first).
|
||||
// If a contributor introduces a non-deterministic field, this dry-run reports STALE.
|
||||
const result = spawnSync('bun', ['run', 'gen:skill-docs', '--dry-run'], {
|
||||
cwd: REPO_ROOT,
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
timeout: 60_000,
|
||||
});
|
||||
expect(result.status).toBe(0);
|
||||
const stdout = result.stdout?.toString() ?? '';
|
||||
// STALE: prefix means a file would change. Count them.
|
||||
const staleLines = stdout.split('\n').filter(l => l.startsWith('STALE:'));
|
||||
if (staleLines.length > 0) {
|
||||
throw new Error(
|
||||
`--dry-run reports ${staleLines.length} stale file(s) after a fresh gen:\n` +
|
||||
staleLines.map(l => ` ${l}`).join('\n') +
|
||||
`\nRun \`bun run gen:skill-docs\` and commit the result.`,
|
||||
);
|
||||
}
|
||||
}, 90_000);
|
||||
|
||||
test('--host all idempotency: every host output is byte-stable across two runs', () => {
|
||||
// Gap A: the default test above runs Claude host only. Non-Claude hosts
|
||||
// (Codex, Factory, Cursor, OpenClaw, GBrain, Slate, OpenCode, Hermes,
|
||||
// Kiro) have their own output paths and could carry their own
|
||||
// non-deterministic fields. We hit a "--host all needed for freshness
|
||||
// check" mid-/ship; this test pins the contract across every host.
|
||||
const firstRun = runGen(['--host', 'all']);
|
||||
expect(firstRun.exitCode).toBe(0);
|
||||
|
||||
const after1 = snapshot(STABLE_HOST_ALL_OUTPUTS);
|
||||
expect(after1.size).toBeGreaterThan(0);
|
||||
|
||||
const secondRun = runGen(['--host', 'all']);
|
||||
expect(secondRun.exitCode).toBe(0);
|
||||
|
||||
const after2 = snapshot(STABLE_HOST_ALL_OUTPUTS);
|
||||
|
||||
const flapping: string[] = [];
|
||||
for (const [file, before] of after1.entries()) {
|
||||
const now = after2.get(file);
|
||||
if (now !== before) flapping.push(file);
|
||||
}
|
||||
|
||||
if (flapping.length > 0) {
|
||||
throw new Error(
|
||||
`${flapping.length} file(s) changed between two consecutive --host all gen runs:\n` +
|
||||
flapping.map(f => ` - ${f}`).join('\n') +
|
||||
`\nLikely cause: a non-deterministic field leaked into a non-Claude host adapter ` +
|
||||
`(scripts/host-adapters/*.ts). CI freshness checks for that host will flap.`,
|
||||
);
|
||||
}
|
||||
}, 300_000); // ~5 min budget for two host-all runs
|
||||
});
|
||||
@@ -439,3 +439,120 @@ describe('gstack-developer-profile errors', () => {
|
||||
expect(r.stderr).toContain('unknown subcommand');
|
||||
});
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// --log-session — the #1671 fix: writer that matches the reader.
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
describe('gstack-developer-profile --log-session (#1671 fix)', () => {
|
||||
test('regression: read-write-read sequence on fresh $HOME promotes to welcome_back', () => {
|
||||
// First --read creates an empty stub (this is the bug-shape on current main).
|
||||
const r1 = runDev('--read');
|
||||
expect(r1.stdout).toContain('SESSION_COUNT: 0');
|
||||
expect(r1.stdout).toContain('TIER: introduction');
|
||||
|
||||
// Office-hours writes a session via the new subcommand.
|
||||
const r2 = runDev('--log-session', JSON.stringify({
|
||||
date: '2026-05-23T00:00:00Z',
|
||||
mode: 'startup',
|
||||
project_slug: 'test',
|
||||
signal_count: 2,
|
||||
signals: ['s1', 's2'],
|
||||
}));
|
||||
expect(r2.status).toBe(0);
|
||||
|
||||
// Second --read sees the session — this is what was broken.
|
||||
const r3 = runDev('--read');
|
||||
expect(r3.stdout).toContain('SESSION_COUNT: 1');
|
||||
expect(r3.stdout).toContain('TIER: welcome_back');
|
||||
expect(r3.stdout).toContain('LAST_PROJECT: test');
|
||||
expect(r3.stdout).toContain('TOTAL_SIGNAL_COUNT: 2');
|
||||
});
|
||||
|
||||
test('aggregates signals across multiple sessions', () => {
|
||||
runDev('--log-session', JSON.stringify({
|
||||
date: '2026-05-20T00:00:00Z', mode: 'startup', project_slug: 'p', signals: ['a', 'b'],
|
||||
}));
|
||||
runDev('--log-session', JSON.stringify({
|
||||
date: '2026-05-21T00:00:00Z', mode: 'startup', project_slug: 'p', signals: ['a', 'c'],
|
||||
}));
|
||||
const p = readProfile() as { sessions: unknown[]; signals_accumulated: Record<string, number> };
|
||||
expect(p.sessions.length).toBe(2);
|
||||
expect(p.signals_accumulated).toEqual({ a: 2, b: 1, c: 1 });
|
||||
});
|
||||
|
||||
test('aggregates resources_shown and topics as deduped unions', () => {
|
||||
runDev('--log-session', JSON.stringify({
|
||||
date: '2026-05-20T00:00:00Z', mode: 'resources', project_slug: 'p',
|
||||
resources_shown: ['url1', 'url2'], topics: ['ai'],
|
||||
}));
|
||||
runDev('--log-session', JSON.stringify({
|
||||
date: '2026-05-21T00:00:00Z', mode: 'resources', project_slug: 'p',
|
||||
resources_shown: ['url2', 'url3'], topics: ['ai', 'eng'],
|
||||
}));
|
||||
const p = readProfile() as { resources_shown: string[]; topics: string[] };
|
||||
expect(p.resources_shown.sort()).toEqual(['url1', 'url2', 'url3']);
|
||||
expect(p.topics.sort()).toEqual(['ai', 'eng']);
|
||||
});
|
||||
|
||||
test('silently skips invalid JSON input (matches gstack-timeline-log pattern)', () => {
|
||||
const r = runDev('--log-session', 'not-json');
|
||||
expect(r.status).toBe(0); // silent skip, not error
|
||||
const file = path.join(tmpHome, 'developer-profile.json');
|
||||
expect(fs.existsSync(file)).toBe(false); // no stub created either
|
||||
});
|
||||
|
||||
test('silently skips JSON missing required fields', () => {
|
||||
const r = runDev('--log-session', JSON.stringify({ foo: 'bar' }));
|
||||
expect(r.status).toBe(0);
|
||||
const file = path.join(tmpHome, 'developer-profile.json');
|
||||
expect(fs.existsSync(file)).toBe(false);
|
||||
});
|
||||
|
||||
test('injects ts field if missing', () => {
|
||||
runDev('--log-session', JSON.stringify({
|
||||
date: '2026-05-23T00:00:00Z', mode: 'startup', project_slug: 'p',
|
||||
}));
|
||||
const p = readProfile() as { sessions: Array<{ ts: string }> };
|
||||
expect(p.sessions[0].ts).toMatch(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/);
|
||||
});
|
||||
|
||||
test('preserves user-set ts field if provided', () => {
|
||||
runDev('--log-session', JSON.stringify({
|
||||
date: '2026-05-23T00:00:00Z', mode: 'startup', project_slug: 'p',
|
||||
ts: '2026-05-23T12:34:56Z',
|
||||
}));
|
||||
const p = readProfile() as { sessions: Array<{ ts: string }> };
|
||||
expect(p.sessions[0].ts).toBe('2026-05-23T12:34:56Z');
|
||||
});
|
||||
|
||||
test('do_read picks LAST_* from real sessions, not from a trailing mode:resources entry', () => {
|
||||
// The Phase 6 resources auto-append happens AFTER the real session in the
|
||||
// same /office-hours invocation. Without the mode filter, that resources
|
||||
// entry would clobber LAST_PROJECT/LAST_ASSIGNMENT/LAST_DESIGN_TITLE for
|
||||
// the next session.
|
||||
runDev('--log-session', JSON.stringify({
|
||||
date: '2026-05-20T00:00:00Z',
|
||||
mode: 'startup',
|
||||
project_slug: 'realproj',
|
||||
assignment: 'real assignment text',
|
||||
design_doc: 'plans/real.md',
|
||||
}));
|
||||
runDev('--log-session', JSON.stringify({
|
||||
date: '2026-05-20T01:00:00Z',
|
||||
mode: 'resources',
|
||||
project_slug: 'realproj',
|
||||
assignment: '',
|
||||
design_doc: '',
|
||||
resources_shown: ['url1'],
|
||||
}));
|
||||
|
||||
const r = runDev('--read');
|
||||
expect(r.stdout).toContain('LAST_PROJECT: realproj');
|
||||
expect(r.stdout).toContain('LAST_ASSIGNMENT: real assignment text');
|
||||
expect(r.stdout).toContain('LAST_DESIGN_TITLE: plans/real.md');
|
||||
// Resources still aggregate into RESOURCES_SHOWN.
|
||||
expect(r.stdout).toContain('RESOURCES_SHOWN: url1');
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -4,6 +4,9 @@
|
||||
// when the relevant CLI isn't available).
|
||||
|
||||
import { test, expect, describe } from "bun:test";
|
||||
import { mkdirSync, mkdtempSync, writeFileSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import {
|
||||
parseVersion,
|
||||
fmtVersion,
|
||||
@@ -11,6 +14,7 @@ import {
|
||||
cmpVersion,
|
||||
pickNextSlot,
|
||||
markActiveSiblings,
|
||||
resolveVersionPath,
|
||||
} from "../bin/gstack-next-version";
|
||||
|
||||
describe("parseVersion", () => {
|
||||
@@ -150,6 +154,73 @@ describe("markActiveSiblings", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveVersionPath (monorepo VERSION-path support)", () => {
|
||||
test("CLI flag wins over everything", () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), "nextver-"));
|
||||
try {
|
||||
mkdirSync(join(dir, ".gstack"));
|
||||
writeFileSync(join(dir, ".gstack", "version-path"), "config/VERSION\n");
|
||||
expect(resolveVersionPath("flag/path/VERSION", dir)).toBe("flag/path/VERSION");
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test(".gstack/version-path config is picked up", () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), "nextver-"));
|
||||
try {
|
||||
mkdirSync(join(dir, ".gstack"));
|
||||
writeFileSync(join(dir, ".gstack", "version-path"), "Tinas Second Brain/health-tracker/VERSION\n");
|
||||
expect(resolveVersionPath(undefined, dir)).toBe("Tinas Second Brain/health-tracker/VERSION");
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("trims whitespace and ignores blank lines after the first", () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), "nextver-"));
|
||||
try {
|
||||
mkdirSync(join(dir, ".gstack"));
|
||||
writeFileSync(join(dir, ".gstack", "version-path"), " apps/web/VERSION \n\n# comment-ish line\n");
|
||||
expect(resolveVersionPath(undefined, dir)).toBe("apps/web/VERSION");
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("empty config file falls back to default VERSION", () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), "nextver-"));
|
||||
try {
|
||||
mkdirSync(join(dir, ".gstack"));
|
||||
writeFileSync(join(dir, ".gstack", "version-path"), "\n");
|
||||
expect(resolveVersionPath(undefined, dir)).toBe("VERSION");
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("missing config file falls back to default VERSION", () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), "nextver-"));
|
||||
try {
|
||||
expect(resolveVersionPath(undefined, dir)).toBe("VERSION");
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("empty override string falls back to config/default", () => {
|
||||
// Defensive: "" should NOT win over config — only a non-empty CLI arg should.
|
||||
const dir = mkdtempSync(join(tmpdir(), "nextver-"));
|
||||
try {
|
||||
mkdirSync(join(dir, ".gstack"));
|
||||
writeFileSync(join(dir, ".gstack", "version-path"), "subproj/VERSION\n");
|
||||
expect(resolveVersionPath("", dir)).toBe("subproj/VERSION");
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Integration smoke — only runs if gh is available and authenticated. Confirms
|
||||
// the CLI executes end-to-end against real APIs without crashing.
|
||||
describe("integration (smoke)", () => {
|
||||
@@ -181,5 +252,27 @@ describe("integration (smoke)", () => {
|
||||
expect(Array.isArray(parsed.claimed)).toBe(true);
|
||||
expect(parsed).toHaveProperty("siblings");
|
||||
expect(parsed.siblings).toEqual([]); // --workspace-root null disabled scanning
|
||||
expect(parsed).toHaveProperty("version_path", "VERSION"); // default when no config + no flag
|
||||
}, 30_000); // Headroom over the 4-5s wall time of the spawned process under load
|
||||
|
||||
test("CLI runs with --version-path and surfaces it in JSON output", async () => {
|
||||
const proc = Bun.spawnSync([
|
||||
"bun",
|
||||
"run",
|
||||
"./bin/gstack-next-version",
|
||||
"--base",
|
||||
"main",
|
||||
"--bump",
|
||||
"patch",
|
||||
"--current-version",
|
||||
"1.6.3.0",
|
||||
"--workspace-root",
|
||||
"null",
|
||||
"--version-path",
|
||||
"Tinas Second Brain/health-tracker/VERSION",
|
||||
]);
|
||||
const out = new TextDecoder().decode(proc.stdout);
|
||||
const parsed = JSON.parse(out);
|
||||
expect(parsed).toHaveProperty("version_path", "Tinas Second Brain/health-tracker/VERSION");
|
||||
}, 30_000);
|
||||
});
|
||||
|
||||
@@ -0,0 +1,324 @@
|
||||
/**
|
||||
* gstack-upgrade/migrations/v1.40.0.0.sh — migration script unit tests.
|
||||
*
|
||||
* Per #1581: the original script unconditionally `touch`ed its done-marker even
|
||||
* when the jq-gated privacy-map patch was skipped. The fix defers `touch ${DONE}`
|
||||
* until every required repair either succeeded or was provably unnecessary.
|
||||
*
|
||||
* The "regression case" that this file pins is case 2: jq missing + privacy-map
|
||||
* present → no done-marker. Against the buggy script, case 2 fails (marker is
|
||||
* written despite skipped patch); against the fix it passes.
|
||||
*
|
||||
* Strategy: each test sets up an isolated tmpHome with controlled fixture
|
||||
* content, and runs the migration via `spawnSync('bash', [MIGRATION], …)`.
|
||||
* For "jq missing" we point PATH at a curated dir of symlinks to the standard
|
||||
* utilities the script uses, omitting jq. For "jq mutation fails" we point PATH
|
||||
* at a dir containing a jq shim that exits 1.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from "bun:test";
|
||||
import * as fs from "fs";
|
||||
import * as os from "os";
|
||||
import * as path from "path";
|
||||
import { spawnSync } from "child_process";
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, "..");
|
||||
const MIGRATION = path.join(
|
||||
ROOT,
|
||||
"gstack-upgrade",
|
||||
"migrations",
|
||||
"v1.40.0.0.sh",
|
||||
);
|
||||
|
||||
const NEW_PATTERN = "projects/*/*-eng-review-test-plan-*.md";
|
||||
const REAL_PATH = "/usr/bin:/bin:/opt/homebrew/bin";
|
||||
|
||||
let tmpHome: string;
|
||||
let gstackHome: string;
|
||||
let migrationDir: string;
|
||||
let donePath: string;
|
||||
let allowlistPath: string;
|
||||
let privacyPath: string;
|
||||
let gitattrsPath: string;
|
||||
|
||||
beforeEach(() => {
|
||||
tmpHome = fs.mkdtempSync(path.join(os.tmpdir(), "gstack-mig-v1400-"));
|
||||
gstackHome = path.join(tmpHome, ".gstack");
|
||||
migrationDir = path.join(gstackHome, ".migrations");
|
||||
donePath = path.join(migrationDir, "v1.40.0.0.done");
|
||||
allowlistPath = path.join(gstackHome, ".brain-allowlist");
|
||||
privacyPath = path.join(gstackHome, ".brain-privacy-map.json");
|
||||
gitattrsPath = path.join(gstackHome, ".gitattributes");
|
||||
fs.mkdirSync(gstackHome, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
try {
|
||||
fs.chmodSync(gstackHome, 0o755);
|
||||
if (fs.existsSync(allowlistPath)) fs.chmodSync(allowlistPath, 0o644);
|
||||
if (fs.existsSync(privacyPath)) fs.chmodSync(privacyPath, 0o644);
|
||||
if (fs.existsSync(gitattrsPath)) fs.chmodSync(gitattrsPath, 0o644);
|
||||
fs.rmSync(tmpHome, { recursive: true, force: true });
|
||||
} catch {}
|
||||
});
|
||||
|
||||
/**
|
||||
* Construct a PATH-style directory of symlinks to standard utilities the
|
||||
* migration script needs (mkdir, grep, sed, mv, rm, mktemp, cat, touch, printf,
|
||||
* command, etc.). Optionally omit jq, or substitute a shim.
|
||||
*/
|
||||
function makeCuratedPath(opts: { jq?: "missing" | "shim-fail" | "real" } = {}): string {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), "gstack-mig-path-"));
|
||||
const utils = [
|
||||
"bash",
|
||||
"sh",
|
||||
"mkdir",
|
||||
"grep",
|
||||
"sed",
|
||||
"mv",
|
||||
"rm",
|
||||
"mktemp",
|
||||
"cat",
|
||||
"touch",
|
||||
"printf",
|
||||
"command",
|
||||
"echo",
|
||||
"test",
|
||||
"[",
|
||||
"tee",
|
||||
"true",
|
||||
"false",
|
||||
"ls",
|
||||
"chmod",
|
||||
];
|
||||
const realDirs = REAL_PATH.split(":");
|
||||
for (const u of utils) {
|
||||
for (const d of realDirs) {
|
||||
const src = path.join(d, u);
|
||||
if (fs.existsSync(src)) {
|
||||
try {
|
||||
fs.symlinkSync(src, path.join(dir, u));
|
||||
} catch {}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
const jq = opts.jq ?? "real";
|
||||
if (jq === "real") {
|
||||
for (const d of realDirs) {
|
||||
const src = path.join(d, "jq");
|
||||
if (fs.existsSync(src)) {
|
||||
try {
|
||||
fs.symlinkSync(src, path.join(dir, "jq"));
|
||||
} catch {}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (jq === "shim-fail") {
|
||||
const shim = path.join(dir, "jq");
|
||||
fs.writeFileSync(
|
||||
shim,
|
||||
`#!/usr/bin/env bash\necho "fake jq: refusing" >&2\nexit 1\n`,
|
||||
{ mode: 0o755 },
|
||||
);
|
||||
}
|
||||
// jq === "missing" → don't add anything
|
||||
return dir;
|
||||
}
|
||||
|
||||
function run(opts: { path?: string } = {}) {
|
||||
const env = {
|
||||
HOME: tmpHome,
|
||||
PATH: opts.path ?? REAL_PATH,
|
||||
};
|
||||
return spawnSync("bash", [MIGRATION], {
|
||||
env,
|
||||
encoding: "utf-8",
|
||||
cwd: tmpHome,
|
||||
});
|
||||
}
|
||||
|
||||
function freshPrivacyMap() {
|
||||
fs.writeFileSync(
|
||||
privacyPath,
|
||||
JSON.stringify(
|
||||
[{ pattern: "projects/*/*-some-other-*.md", class: "artifact" }],
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
function freshAllowlist() {
|
||||
fs.writeFileSync(
|
||||
allowlistPath,
|
||||
"# header\nprojects/*/*-some-other-*.md\n# ---- USER ADDITIONS BELOW\n",
|
||||
);
|
||||
}
|
||||
|
||||
function freshGitattrs() {
|
||||
fs.writeFileSync(gitattrsPath, "projects/*/*-some-other-*.md merge=union\n");
|
||||
}
|
||||
|
||||
describe("migrations/v1.40.0.0.sh", () => {
|
||||
test("case 1: jq present, fresh privacy-map — all three files patched, marker written", () => {
|
||||
freshAllowlist();
|
||||
freshPrivacyMap();
|
||||
freshGitattrs();
|
||||
|
||||
const r = run();
|
||||
|
||||
expect(r.status).toBe(0);
|
||||
expect(fs.existsSync(donePath)).toBe(true);
|
||||
|
||||
const allowlist = fs.readFileSync(allowlistPath, "utf-8");
|
||||
expect(allowlist).toContain(NEW_PATTERN);
|
||||
|
||||
const privacy = JSON.parse(fs.readFileSync(privacyPath, "utf-8"));
|
||||
expect(
|
||||
privacy.some(
|
||||
(e: any) => e.pattern === NEW_PATTERN && e.class === "artifact",
|
||||
),
|
||||
).toBe(true);
|
||||
|
||||
const gitattrs = fs.readFileSync(gitattrsPath, "utf-8");
|
||||
expect(gitattrs).toContain(`${NEW_PATTERN} merge=union`);
|
||||
});
|
||||
|
||||
test("case 2 (regression for #1581): jq missing, privacy-map exists — marker NOT written, text patches still applied", () => {
|
||||
freshAllowlist();
|
||||
freshPrivacyMap();
|
||||
freshGitattrs();
|
||||
|
||||
const noJq = makeCuratedPath({ jq: "missing" });
|
||||
const r = run({ path: noJq });
|
||||
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stderr).toMatch(/jq not found/);
|
||||
|
||||
// Done-marker must NOT be written — this is the whole point of the fix.
|
||||
expect(fs.existsSync(donePath)).toBe(false);
|
||||
|
||||
// Text-only patches still landed (they don't need jq).
|
||||
expect(fs.readFileSync(allowlistPath, "utf-8")).toContain(NEW_PATTERN);
|
||||
expect(fs.readFileSync(gitattrsPath, "utf-8")).toContain(
|
||||
`${NEW_PATTERN} merge=union`,
|
||||
);
|
||||
|
||||
// Privacy-map untouched (still missing the new entry).
|
||||
const privacy = JSON.parse(fs.readFileSync(privacyPath, "utf-8"));
|
||||
expect(privacy.some((e: any) => e.pattern === NEW_PATTERN)).toBe(false);
|
||||
});
|
||||
|
||||
test("case 3: jq missing, then jq restored — second run completes patch and writes marker", () => {
|
||||
freshAllowlist();
|
||||
freshPrivacyMap();
|
||||
freshGitattrs();
|
||||
|
||||
// First run with jq missing
|
||||
const noJq = makeCuratedPath({ jq: "missing" });
|
||||
const r1 = run({ path: noJq });
|
||||
expect(r1.status).toBe(0);
|
||||
expect(fs.existsSync(donePath)).toBe(false);
|
||||
|
||||
// Second run with jq restored
|
||||
const r2 = run();
|
||||
expect(r2.status).toBe(0);
|
||||
expect(fs.existsSync(donePath)).toBe(true);
|
||||
|
||||
const privacy = JSON.parse(fs.readFileSync(privacyPath, "utf-8"));
|
||||
expect(
|
||||
privacy.some(
|
||||
(e: any) => e.pattern === NEW_PATTERN && e.class === "artifact",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("case 4: jq present, privacy-map already has correct entry — idempotent, marker written", () => {
|
||||
freshAllowlist();
|
||||
fs.writeFileSync(
|
||||
privacyPath,
|
||||
JSON.stringify(
|
||||
[{ pattern: NEW_PATTERN, class: "artifact" }],
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
freshGitattrs();
|
||||
|
||||
const r = run();
|
||||
expect(r.status).toBe(0);
|
||||
expect(fs.existsSync(donePath)).toBe(true);
|
||||
|
||||
const privacy = JSON.parse(fs.readFileSync(privacyPath, "utf-8"));
|
||||
const matches = privacy.filter((e: any) => e.pattern === NEW_PATTERN);
|
||||
expect(matches.length).toBe(1);
|
||||
expect(matches[0].class).toBe("artifact");
|
||||
});
|
||||
|
||||
test("case 5: jq present, privacy-map file missing — allowlist + gitattrs patched, marker written", () => {
|
||||
freshAllowlist();
|
||||
// No privacy-map file
|
||||
freshGitattrs();
|
||||
|
||||
const r = run();
|
||||
expect(r.status).toBe(0);
|
||||
expect(fs.existsSync(donePath)).toBe(true);
|
||||
expect(fs.existsSync(privacyPath)).toBe(false);
|
||||
|
||||
expect(fs.readFileSync(allowlistPath, "utf-8")).toContain(NEW_PATTERN);
|
||||
expect(fs.readFileSync(gitattrsPath, "utf-8")).toContain(
|
||||
`${NEW_PATTERN} merge=union`,
|
||||
);
|
||||
});
|
||||
|
||||
test("case 6: jq present, privacy-map JSON malformed — no marker, error logged, no mutation", () => {
|
||||
freshAllowlist();
|
||||
fs.writeFileSync(privacyPath, "{ this is not json [");
|
||||
freshGitattrs();
|
||||
|
||||
const r = run();
|
||||
expect(r.status).toBe(0);
|
||||
// No marker — broken JSON should NOT be papered over.
|
||||
expect(fs.existsSync(donePath)).toBe(false);
|
||||
// Privacy-map content untouched.
|
||||
expect(fs.readFileSync(privacyPath, "utf-8")).toBe("{ this is not json [");
|
||||
});
|
||||
|
||||
test("case 7: jq present but mutation fails (shim exit 1) — no marker, tempfile cleaned up", () => {
|
||||
freshAllowlist();
|
||||
freshPrivacyMap();
|
||||
freshGitattrs();
|
||||
|
||||
const fakeJq = makeCuratedPath({ jq: "shim-fail" });
|
||||
const r = run({ path: fakeJq });
|
||||
|
||||
expect(r.status).toBe(0);
|
||||
expect(fs.existsSync(donePath)).toBe(false);
|
||||
|
||||
// Tempfile cleanup: no leftover *.tmp.* sidecars.
|
||||
const leftovers = fs
|
||||
.readdirSync(gstackHome)
|
||||
.filter((n) => n.startsWith(".brain-privacy-map.json.tmp."));
|
||||
expect(leftovers.length).toBe(0);
|
||||
});
|
||||
|
||||
test("case 8: allowlist append fails (read-only file, no USER ADDITIONS marker) — no marker, warn logged", () => {
|
||||
// Allowlist WITHOUT the "# ---- USER ADDITIONS BELOW" marker — the script
|
||||
// falls into the plain `printf >>` append path. Make the file read-only
|
||||
// so the append fails (sed -i.bak on macOS silently no-ops on read-only
|
||||
// files, so we have to take the printf path to exercise this).
|
||||
fs.writeFileSync(
|
||||
allowlistPath,
|
||||
"# header\nprojects/*/*-some-other-*.md\n",
|
||||
);
|
||||
freshPrivacyMap();
|
||||
freshGitattrs();
|
||||
fs.chmodSync(allowlistPath, 0o444);
|
||||
|
||||
const r = run();
|
||||
expect(r.status).toBe(0);
|
||||
// Marker must NOT be written when a required repair failed.
|
||||
expect(fs.existsSync(donePath)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,116 @@
|
||||
/**
|
||||
* Unit tests for budget-override audit logger.
|
||||
*
|
||||
* The audit trail is the only check on `EVALS_BUDGET_OVERRIDE_REASON` and
|
||||
* `GSTACK_SIZE_BUDGET_OVERRIDE_REASON` — if the logger silently drops events,
|
||||
* overrides become invisible and the budget gates are theater. These tests
|
||||
* pin the contract: every override produces exactly one JSONL line with
|
||||
* timestamp + scope + reason + CI provenance.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { logBudgetOverride } from './budget-override';
|
||||
|
||||
const TMP_HOME = fs.mkdtempSync(path.join(os.tmpdir(), 'budget-override-test-'));
|
||||
process.env.GSTACK_HOME = TMP_HOME;
|
||||
const AUDIT_PATH = path.join(TMP_HOME, 'analytics', 'spend-overrides.jsonl');
|
||||
|
||||
describe('logBudgetOverride', () => {
|
||||
beforeEach(() => {
|
||||
// Start each test with a clean audit file
|
||||
try { fs.unlinkSync(AUDIT_PATH); } catch { /* doesn't exist */ }
|
||||
});
|
||||
|
||||
test('writes one JSONL line per call with required fields', () => {
|
||||
logBudgetOverride({
|
||||
scope: 'evals-cost-cap-e2e',
|
||||
reason: 'model price went up, will rebase the cap next sprint',
|
||||
details: { tier: 'e2e', cap: 25, observed_cost_usd: 31.4 },
|
||||
});
|
||||
|
||||
expect(fs.existsSync(AUDIT_PATH)).toBe(true);
|
||||
const lines = fs.readFileSync(AUDIT_PATH, 'utf-8').split('\n').filter(Boolean);
|
||||
expect(lines.length).toBe(1);
|
||||
const entry = JSON.parse(lines[0]!);
|
||||
expect(entry.scope).toBe('evals-cost-cap-e2e');
|
||||
expect(entry.reason).toBe('model price went up, will rebase the cap next sprint');
|
||||
expect(entry.details).toEqual({ tier: 'e2e', cap: 25, observed_cost_usd: 31.4 });
|
||||
expect(typeof entry.timestamp).toBe('string');
|
||||
expect(entry.timestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/);
|
||||
});
|
||||
|
||||
test('captures CI provenance when CI env is set', () => {
|
||||
process.env.CI = 'true';
|
||||
process.env.GITHUB_ACTIONS = 'true';
|
||||
process.env.GITHUB_REF_NAME = 'feature/x';
|
||||
process.env.GITHUB_SHA = 'deadbeefcafe1234';
|
||||
|
||||
logBudgetOverride({ scope: 'skill-size-budget', reason: 'big diff bake-in' });
|
||||
|
||||
const entry = JSON.parse(fs.readFileSync(AUDIT_PATH, 'utf-8').trim());
|
||||
expect(entry.ci).toBe(true);
|
||||
expect(entry.runner).toBe('github-actions');
|
||||
expect(entry.branch).toBe('feature/x');
|
||||
expect(entry.commit).toBe('deadbeef');
|
||||
|
||||
delete process.env.CI;
|
||||
delete process.env.GITHUB_ACTIONS;
|
||||
delete process.env.GITHUB_REF_NAME;
|
||||
delete process.env.GITHUB_SHA;
|
||||
});
|
||||
|
||||
test('defaults provenance to local when CI is unset', () => {
|
||||
delete process.env.CI;
|
||||
delete process.env.GITHUB_ACTIONS;
|
||||
delete process.env.GITHUB_REF_NAME;
|
||||
delete process.env.GITHUB_SHA;
|
||||
delete process.env.CI_RUNNER;
|
||||
delete process.env.CI_COMMIT_REF_NAME;
|
||||
delete process.env.CI_COMMIT_SHORT_SHA;
|
||||
|
||||
logBudgetOverride({ scope: 'skill-size-budget-corpus', reason: 'local dev test' });
|
||||
|
||||
const entry = JSON.parse(fs.readFileSync(AUDIT_PATH, 'utf-8').trim());
|
||||
expect(entry.ci).toBe(false);
|
||||
expect(entry.runner).toBe('local');
|
||||
expect(entry.branch).toBe('unknown');
|
||||
expect(entry.commit).toBe('unknown');
|
||||
});
|
||||
|
||||
test('append-only: multiple calls produce multiple lines', () => {
|
||||
logBudgetOverride({ scope: 's1', reason: 'r1' });
|
||||
logBudgetOverride({ scope: 's2', reason: 'r2' });
|
||||
logBudgetOverride({ scope: 's3', reason: 'r3' });
|
||||
|
||||
const lines = fs.readFileSync(AUDIT_PATH, 'utf-8').split('\n').filter(Boolean);
|
||||
expect(lines.length).toBe(3);
|
||||
const scopes = lines.map(l => JSON.parse(l).scope);
|
||||
expect(scopes).toEqual(['s1', 's2', 's3']);
|
||||
});
|
||||
|
||||
test('omits details key when entry.details is absent (uses empty object)', () => {
|
||||
logBudgetOverride({ scope: 'plain', reason: 'no details' });
|
||||
const entry = JSON.parse(fs.readFileSync(AUDIT_PATH, 'utf-8').trim());
|
||||
expect(entry.details).toEqual({});
|
||||
});
|
||||
|
||||
test('never throws even when audit directory is missing — creates it', () => {
|
||||
// Remove the analytics dir to force mkdir
|
||||
try { fs.rmSync(path.join(TMP_HOME, 'analytics'), { recursive: true, force: true }); } catch { /* */ }
|
||||
expect(() => logBudgetOverride({ scope: 'recreate', reason: 'test' })).not.toThrow();
|
||||
expect(fs.existsSync(AUDIT_PATH)).toBe(true);
|
||||
});
|
||||
|
||||
test('survives an unwritable audit path (logs warning, does not throw)', () => {
|
||||
// Point GSTACK_HOME at a path inside a file (illegal directory location)
|
||||
const originalHome = process.env.GSTACK_HOME;
|
||||
const bogusFile = path.join(TMP_HOME, 'not-a-dir.txt');
|
||||
fs.writeFileSync(bogusFile, 'just a file');
|
||||
process.env.GSTACK_HOME = bogusFile;
|
||||
expect(() => logBudgetOverride({ scope: 'unwritable', reason: 'fs error path' })).not.toThrow();
|
||||
process.env.GSTACK_HOME = originalHome;
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
* Budget override audit trail (v1.45.0.0 T5).
|
||||
*
|
||||
* Records uses of GSTACK_SIZE_BUDGET_OVERRIDE_REASON or
|
||||
* EVALS_BUDGET_OVERRIDE_REASON so a reviewer can see what was waived,
|
||||
* by whom, and why. Append-only JSONL at ~/.gstack/analytics/spend-overrides.jsonl.
|
||||
*
|
||||
* Why audit: a hard cap with no escape valve becomes operationally hostile
|
||||
* (legit price changes, longer transcripts, new required evals can all
|
||||
* blow the cap). An escape valve with no audit becomes "everyone overrides
|
||||
* everything and we lose the gate." This module is the audit half.
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
export interface BudgetOverrideEntry {
|
||||
scope: string; // e.g. 'skill-size-budget', 'evals-cost-cap'
|
||||
reason: string; // user-supplied REASON env var
|
||||
details?: Record<string, unknown>; // numbers / regressions
|
||||
}
|
||||
|
||||
function getAuditPath(): string {
|
||||
const base = process.env.GSTACK_HOME || path.join(os.homedir(), '.gstack');
|
||||
return path.join(base, 'analytics', 'spend-overrides.jsonl');
|
||||
}
|
||||
|
||||
export function logBudgetOverride(entry: BudgetOverrideEntry): void {
|
||||
try {
|
||||
const auditPath = getAuditPath();
|
||||
fs.mkdirSync(path.dirname(auditPath), { recursive: true });
|
||||
const line = JSON.stringify({
|
||||
timestamp: new Date().toISOString(),
|
||||
scope: entry.scope,
|
||||
reason: entry.reason,
|
||||
details: entry.details ?? {},
|
||||
// Capture provenance: who/where/which CI ran
|
||||
ci: process.env.CI === 'true',
|
||||
runner: process.env.GITHUB_ACTIONS ? 'github-actions' : process.env.CI_RUNNER || 'local',
|
||||
branch: process.env.GITHUB_REF_NAME || process.env.CI_COMMIT_REF_NAME || 'unknown',
|
||||
commit: process.env.GITHUB_SHA?.slice(0, 8) || process.env.CI_COMMIT_SHORT_SHA || 'unknown',
|
||||
}) + '\n';
|
||||
fs.appendFileSync(auditPath, line);
|
||||
} catch (err) {
|
||||
// Best-effort logging; don't fail the test on audit-write errors.
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn(`[budget-override] could not write audit log: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
/**
|
||||
* Unit tests for parity baseline capture.
|
||||
*
|
||||
* Free. Reads the live repo state via captureBaseline() and asserts
|
||||
* shape + invariants, not specific numbers (which drift release-over-release).
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { captureBaseline, diffBaselines, type ParityBaseline } from './capture-parity-baseline';
|
||||
|
||||
const REPO_ROOT = path.resolve(import.meta.dir, '..', '..');
|
||||
|
||||
describe('capture-parity-baseline', () => {
|
||||
test('produces a shaped baseline for the current repo', () => {
|
||||
const baseline = captureBaseline({ repoRoot: REPO_ROOT, tag: 'unit-test' });
|
||||
expect(baseline.tag).toBe('unit-test');
|
||||
expect(baseline.totalSkills).toBeGreaterThan(20);
|
||||
expect(baseline.totalCorpusBytes).toBeGreaterThan(100_000);
|
||||
expect(baseline.topHeaviest.length).toBeGreaterThan(0);
|
||||
expect(baseline.topHeaviest.length).toBeLessThanOrEqual(10);
|
||||
expect(baseline.topHeaviest[0]!.skillMdBytes).toBeGreaterThan(0);
|
||||
// Top 1 should be ≥ Top 2 (sort invariant)
|
||||
if (baseline.topHeaviest.length >= 2) {
|
||||
expect(baseline.topHeaviest[0]!.skillMdBytes).toBeGreaterThanOrEqual(
|
||||
baseline.topHeaviest[1]!.skillMdBytes,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test('each skill entry has byte + line + token estimates', () => {
|
||||
const baseline = captureBaseline({ repoRoot: REPO_ROOT });
|
||||
for (const skill of Object.values(baseline.skills)) {
|
||||
expect(skill.skillMdBytes).toBeGreaterThan(0);
|
||||
expect(skill.skillMdLines).toBeGreaterThan(0);
|
||||
expect(skill.estTokens).toBeGreaterThan(0);
|
||||
// ~4 chars/token heuristic
|
||||
expect(skill.estTokens).toBeCloseTo(skill.skillMdBytes / 4, -2);
|
||||
}
|
||||
});
|
||||
|
||||
test('diffBaselines returns expected deltas', () => {
|
||||
const before: ParityBaseline = {
|
||||
tag: 'before',
|
||||
capturedAt: '2026-01-01T00:00:00Z',
|
||||
capturedFromCommit: 'abc',
|
||||
capturedFromBranch: 'main',
|
||||
totalSkills: 2,
|
||||
totalCorpusBytes: 1000,
|
||||
estTotalCatalogTokens: 100,
|
||||
topHeaviest: [],
|
||||
skills: {
|
||||
foo: { skill: 'foo', skillMdBytes: 600, skillMdLines: 10, estTokens: 150, tmplBytes: 300, descriptionLen: 50, hasGateEval: true, hasPeriodicEval: false },
|
||||
bar: { skill: 'bar', skillMdBytes: 400, skillMdLines: 8, estTokens: 100, tmplBytes: 200, descriptionLen: 30, hasGateEval: false, hasPeriodicEval: false },
|
||||
},
|
||||
};
|
||||
const after: ParityBaseline = {
|
||||
...before,
|
||||
tag: 'after',
|
||||
totalCorpusBytes: 700,
|
||||
estTotalCatalogTokens: 60,
|
||||
skills: {
|
||||
foo: { ...before.skills.foo!, skillMdBytes: 400 },
|
||||
bar: { ...before.skills.bar!, skillMdBytes: 300 },
|
||||
},
|
||||
};
|
||||
const diff = diffBaselines(before, after);
|
||||
expect(diff.totalCorpusDelta).toBe(-300);
|
||||
expect(diff.totalCorpusDeltaPct).toBeCloseTo(-30, 1);
|
||||
expect(diff.catalogTokensDelta).toBe(-40);
|
||||
expect(diff.perSkill.length).toBe(2);
|
||||
// Sorted by abs delta descending
|
||||
expect(diff.perSkill[0]!.skill).toBe('foo');
|
||||
expect(diff.perSkill[0]!.deltaBytes).toBe(-200);
|
||||
expect(diff.perSkill[1]!.skill).toBe('bar');
|
||||
});
|
||||
|
||||
test('v1.44.1 baseline file exists with expected shape', () => {
|
||||
const baselinePath = path.join(REPO_ROOT, 'test', 'fixtures', 'parity-baseline-v1.44.1.json');
|
||||
expect(fs.existsSync(baselinePath)).toBe(true);
|
||||
const baseline = JSON.parse(fs.readFileSync(baselinePath, 'utf-8')) as ParityBaseline;
|
||||
expect(baseline.tag).toBe('v1.44.1');
|
||||
expect(baseline.totalSkills).toBeGreaterThan(40);
|
||||
// Document the v1.44.1 snapshot as the v1→v2 baseline reference.
|
||||
// Compression in v1.45+ should drop totalCorpusBytes; this assertion
|
||||
// anchors the "v1 was XX MB" claim in the CHANGELOG to a real file.
|
||||
expect(baseline.totalCorpusBytes).toBeGreaterThan(2_000_000);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,231 @@
|
||||
/**
|
||||
* Parity baseline capture — cathedral parity-eval suite primitive.
|
||||
*
|
||||
* Snapshots the current state of every top-level SKILL.md: byte count, line
|
||||
* count, estimated token count, frontmatter description length, eval
|
||||
* coverage. The output JSON is the v1.44 baseline that v2 must beat on
|
||||
* compression AND match (or exceed) on parity.
|
||||
*
|
||||
* The numbers quoted in the v2.0.0.0 CHANGELOG numbers table are read
|
||||
* from a baseline JSON captured by this script. Never invent baseline
|
||||
* numbers; ship them only if they came from a real captureBaseline() run.
|
||||
*
|
||||
* Usage:
|
||||
* bun run scripts/capture-baseline.ts # write default path
|
||||
* bun run scripts/capture-baseline.ts --out PATH # write custom path
|
||||
* bun run scripts/capture-baseline.ts --tag v1.44.1 # tag the snapshot
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { execSync } from 'child_process';
|
||||
|
||||
export interface SkillBaselineEntry {
|
||||
skill: string;
|
||||
skillMdBytes: number;
|
||||
skillMdLines: number;
|
||||
estTokens: number; // ~4 chars/token heuristic
|
||||
tmplBytes: number | null; // null when no .tmpl exists (vendored or non-Claude)
|
||||
descriptionLen: number; // bytes in frontmatter description field
|
||||
hasGateEval: boolean;
|
||||
hasPeriodicEval: boolean;
|
||||
}
|
||||
|
||||
export interface ParityBaseline {
|
||||
tag: string;
|
||||
capturedAt: string;
|
||||
capturedFromCommit: string;
|
||||
capturedFromBranch: string;
|
||||
totalSkills: number;
|
||||
totalCorpusBytes: number;
|
||||
estTotalCatalogTokens: number; // sum of all description lengths / 4
|
||||
topHeaviest: SkillBaselineEntry[]; // sorted desc by skillMdBytes
|
||||
skills: Record<string, SkillBaselineEntry>;
|
||||
}
|
||||
|
||||
export interface CaptureOptions {
|
||||
repoRoot: string;
|
||||
tag?: string;
|
||||
}
|
||||
|
||||
/** Extract the frontmatter description from a SKILL.md file. Empty string if none. */
|
||||
function extractDescription(content: string): string {
|
||||
if (!content.startsWith('---\n')) return '';
|
||||
const fmEnd = content.indexOf('\n---', 4);
|
||||
if (fmEnd === -1) return '';
|
||||
const frontmatter = content.slice(4, fmEnd);
|
||||
const lines = frontmatter.split('\n');
|
||||
let inDescription = false;
|
||||
const descLines: string[] = [];
|
||||
for (const line of lines) {
|
||||
if (line.match(/^description:\s*\|?\s*$/)) {
|
||||
inDescription = true;
|
||||
continue;
|
||||
}
|
||||
if (line.match(/^description:\s+/)) {
|
||||
descLines.push(line.replace(/^description:\s+/, ''));
|
||||
inDescription = true;
|
||||
continue;
|
||||
}
|
||||
if (inDescription) {
|
||||
if (line.match(/^\w+:\s/)) break;
|
||||
descLines.push(line.trim());
|
||||
}
|
||||
}
|
||||
return descLines.join('\n').trim();
|
||||
}
|
||||
|
||||
/** Estimate token count via 4 chars/token. Crude but matches existing budget-regression usage. */
|
||||
function estimateTokens(bytes: number): number {
|
||||
return Math.round(bytes / 4);
|
||||
}
|
||||
|
||||
/** Find which top-level directories contain a SKILL.md (skills we capture). */
|
||||
function discoverSkillDirs(repoRoot: string): string[] {
|
||||
const entries = fs.readdirSync(repoRoot, { withFileTypes: true });
|
||||
const dirs: string[] = [];
|
||||
for (const e of entries) {
|
||||
if (!e.isDirectory()) continue;
|
||||
if (e.name.startsWith('.')) continue;
|
||||
if (e.name === 'node_modules' || e.name === 'docs') continue;
|
||||
const skillMd = path.join(repoRoot, e.name, 'SKILL.md');
|
||||
if (fs.existsSync(skillMd)) dirs.push(e.name);
|
||||
}
|
||||
return dirs.sort();
|
||||
}
|
||||
|
||||
/** Check whether a skill has E2E gate / periodic eval coverage by scanning test/. */
|
||||
function discoverEvalCoverage(repoRoot: string, skills: string[]): {
|
||||
gate: Set<string>;
|
||||
periodic: Set<string>;
|
||||
} {
|
||||
const gate = new Set<string>();
|
||||
const periodic = new Set<string>();
|
||||
const testDir = path.join(repoRoot, 'test');
|
||||
if (!fs.existsSync(testDir)) return { gate, periodic };
|
||||
const testFiles = fs.readdirSync(testDir).filter(f => f.startsWith('skill-e2e-') && f.endsWith('.test.ts'));
|
||||
// Try to map each test file to a skill by reading its contents for skill names.
|
||||
for (const file of testFiles) {
|
||||
const content = fs.readFileSync(path.join(testDir, file), 'utf-8');
|
||||
for (const skill of skills) {
|
||||
// Match the skill name as a word boundary, also try /skill-name slash form.
|
||||
const re = new RegExp(`(/${skill}|['"\`]${skill}['"\`]|skill[s]?[/=:]\\s*['"\`]${skill}['"\`])`);
|
||||
if (re.test(content)) {
|
||||
// Crude tier inference: if file name contains "regression" / known-periodic markers, classify periodic.
|
||||
if (file.includes('chain') || file.includes('multi') || file.includes('idempotency') || file.includes('finding-floor')) {
|
||||
periodic.add(skill);
|
||||
} else {
|
||||
gate.add(skill);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return { gate, periodic };
|
||||
}
|
||||
|
||||
function getGitInfo(repoRoot: string): { commit: string; branch: string } {
|
||||
try {
|
||||
const commit = execSync('git rev-parse --short HEAD', { cwd: repoRoot, encoding: 'utf-8' }).trim();
|
||||
const branch = execSync('git rev-parse --abbrev-ref HEAD', { cwd: repoRoot, encoding: 'utf-8' }).trim();
|
||||
return { commit, branch };
|
||||
} catch {
|
||||
return { commit: 'unknown', branch: 'unknown' };
|
||||
}
|
||||
}
|
||||
|
||||
export function captureBaseline(opts: CaptureOptions): ParityBaseline {
|
||||
const { repoRoot, tag } = opts;
|
||||
const skillDirs = discoverSkillDirs(repoRoot);
|
||||
const evalCoverage = discoverEvalCoverage(repoRoot, skillDirs);
|
||||
const skills: Record<string, SkillBaselineEntry> = {};
|
||||
let totalCorpusBytes = 0;
|
||||
let totalDescriptionBytes = 0;
|
||||
for (const dir of skillDirs) {
|
||||
const skillMdPath = path.join(repoRoot, dir, 'SKILL.md');
|
||||
const tmplPath = path.join(repoRoot, dir, 'SKILL.md.tmpl');
|
||||
const content = fs.readFileSync(skillMdPath, 'utf-8');
|
||||
const bytes = Buffer.byteLength(content, 'utf-8');
|
||||
const lines = content.split('\n').length;
|
||||
const description = extractDescription(content);
|
||||
const descriptionLen = Buffer.byteLength(description, 'utf-8');
|
||||
const tmplBytes = fs.existsSync(tmplPath)
|
||||
? Buffer.byteLength(fs.readFileSync(tmplPath, 'utf-8'), 'utf-8')
|
||||
: null;
|
||||
const entry: SkillBaselineEntry = {
|
||||
skill: dir,
|
||||
skillMdBytes: bytes,
|
||||
skillMdLines: lines,
|
||||
estTokens: estimateTokens(bytes),
|
||||
tmplBytes,
|
||||
descriptionLen,
|
||||
hasGateEval: evalCoverage.gate.has(dir),
|
||||
hasPeriodicEval: evalCoverage.periodic.has(dir),
|
||||
};
|
||||
skills[dir] = entry;
|
||||
totalCorpusBytes += bytes;
|
||||
totalDescriptionBytes += descriptionLen;
|
||||
}
|
||||
const topHeaviest = Object.values(skills)
|
||||
.slice()
|
||||
.sort((a, b) => b.skillMdBytes - a.skillMdBytes)
|
||||
.slice(0, 10);
|
||||
const git = getGitInfo(repoRoot);
|
||||
return {
|
||||
tag: tag ?? 'untagged',
|
||||
capturedAt: new Date().toISOString(),
|
||||
capturedFromCommit: git.commit,
|
||||
capturedFromBranch: git.branch,
|
||||
totalSkills: skillDirs.length,
|
||||
totalCorpusBytes,
|
||||
estTotalCatalogTokens: estimateTokens(totalDescriptionBytes),
|
||||
topHeaviest,
|
||||
skills,
|
||||
};
|
||||
}
|
||||
|
||||
/** Diff two baselines; useful for v2 vs v1.44 deltas. */
|
||||
export interface BaselineDiff {
|
||||
totalCorpusDelta: number;
|
||||
totalCorpusDeltaPct: number;
|
||||
catalogTokensDelta: number;
|
||||
catalogTokensDeltaPct: number;
|
||||
perSkill: Array<{
|
||||
skill: string;
|
||||
beforeBytes: number;
|
||||
afterBytes: number;
|
||||
deltaBytes: number;
|
||||
deltaPct: number;
|
||||
}>;
|
||||
}
|
||||
|
||||
export function diffBaselines(before: ParityBaseline, after: ParityBaseline): BaselineDiff {
|
||||
const totalCorpusDelta = after.totalCorpusBytes - before.totalCorpusBytes;
|
||||
const totalCorpusDeltaPct = before.totalCorpusBytes
|
||||
? (totalCorpusDelta / before.totalCorpusBytes) * 100
|
||||
: 0;
|
||||
const catalogTokensDelta = after.estTotalCatalogTokens - before.estTotalCatalogTokens;
|
||||
const catalogTokensDeltaPct = before.estTotalCatalogTokens
|
||||
? (catalogTokensDelta / before.estTotalCatalogTokens) * 100
|
||||
: 0;
|
||||
const perSkill: BaselineDiff['perSkill'] = [];
|
||||
const allSkills = new Set([...Object.keys(before.skills), ...Object.keys(after.skills)]);
|
||||
for (const skill of allSkills) {
|
||||
const b = before.skills[skill]?.skillMdBytes ?? 0;
|
||||
const a = after.skills[skill]?.skillMdBytes ?? 0;
|
||||
perSkill.push({
|
||||
skill,
|
||||
beforeBytes: b,
|
||||
afterBytes: a,
|
||||
deltaBytes: a - b,
|
||||
deltaPct: b ? ((a - b) / b) * 100 : 0,
|
||||
});
|
||||
}
|
||||
perSkill.sort((x, y) => Math.abs(y.deltaBytes) - Math.abs(x.deltaBytes));
|
||||
return {
|
||||
totalCorpusDelta,
|
||||
totalCorpusDeltaPct,
|
||||
catalogTokensDelta,
|
||||
catalogTokensDeltaPct,
|
||||
perSkill,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,230 @@
|
||||
/**
|
||||
* Cathedral parity-eval harness (v1.45.0.0 T0b).
|
||||
*
|
||||
* Compares CURRENT SKILL.md output to a v1.44.1 golden baseline along three
|
||||
* axes: STRUCTURE (frontmatter shape), CONTENT (must-preserve phrases per
|
||||
* skill family), and SIZE (per-skill byte budget). The fourth axis —
|
||||
* BEHAVIORAL parity via LLM-as-judge — runs on top of this harness in the
|
||||
* periodic-tier eval suite (paid, ~$0.20 per skill judge call).
|
||||
*
|
||||
* The structural + content checks ship in v1.45.0.0 as the foundation; the
|
||||
* LLM-judge layer lands in v2.0.0.0 alongside the sections/ pattern. Both
|
||||
* use this module's APIs.
|
||||
*
|
||||
* Why a separate harness from skill-size-budget.test.ts: that one enforces
|
||||
* size discipline only. This module supports content invariants per skill
|
||||
* family (e.g., cso must preserve OWASP/STRIDE; plan-ceo must preserve
|
||||
* mode-selection phrasing) so future compression can't silently strip
|
||||
* load-bearing prose even when size stays within ratio.
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import type { ParityBaseline, SkillBaselineEntry } from './capture-parity-baseline';
|
||||
import { captureBaseline } from './capture-parity-baseline';
|
||||
|
||||
export interface ParityInvariant {
|
||||
skill: string;
|
||||
/** Phrases that MUST appear in the generated SKILL.md (case-insensitive substring). */
|
||||
mustContain?: string[];
|
||||
/** Markdown H2 headings that MUST appear. */
|
||||
mustHaveHeadings?: string[];
|
||||
/** Maximum byte size growth ratio vs baseline. 1.0 = no growth allowed. */
|
||||
maxSizeRatio?: number;
|
||||
/** Minimum byte size (catches over-stripping cliffs). */
|
||||
minBytes?: number;
|
||||
}
|
||||
|
||||
export interface ParityCheckResult {
|
||||
skill: string;
|
||||
passed: boolean;
|
||||
failures: string[];
|
||||
}
|
||||
|
||||
export function checkSkillParity(
|
||||
invariant: ParityInvariant,
|
||||
current: SkillBaselineEntry,
|
||||
baseline: SkillBaselineEntry | undefined,
|
||||
repoRoot: string,
|
||||
): ParityCheckResult {
|
||||
const failures: string[] = [];
|
||||
|
||||
// SIZE checks
|
||||
if (invariant.maxSizeRatio !== undefined && baseline) {
|
||||
const ratio = current.skillMdBytes / baseline.skillMdBytes;
|
||||
if (ratio > invariant.maxSizeRatio) {
|
||||
failures.push(`size ratio ${ratio.toFixed(3)} > maxSizeRatio ${invariant.maxSizeRatio}`);
|
||||
}
|
||||
}
|
||||
if (invariant.minBytes !== undefined && current.skillMdBytes < invariant.minBytes) {
|
||||
failures.push(`size ${current.skillMdBytes} < minBytes ${invariant.minBytes}`);
|
||||
}
|
||||
|
||||
// CONTENT checks (read live file for fresh content)
|
||||
if (invariant.mustContain?.length || invariant.mustHaveHeadings?.length) {
|
||||
const skillMdPath = path.join(repoRoot, invariant.skill, 'SKILL.md');
|
||||
let content: string | null = null;
|
||||
try {
|
||||
content = fs.readFileSync(skillMdPath, 'utf-8');
|
||||
} catch (err) {
|
||||
failures.push(`cannot read ${skillMdPath}: ${(err as Error).message}`);
|
||||
}
|
||||
if (content) {
|
||||
const lower = content.toLowerCase();
|
||||
for (const phrase of invariant.mustContain ?? []) {
|
||||
if (!lower.includes(phrase.toLowerCase())) {
|
||||
failures.push(`missing required phrase: "${phrase}"`);
|
||||
}
|
||||
}
|
||||
for (const heading of invariant.mustHaveHeadings ?? []) {
|
||||
if (!content.includes(heading)) {
|
||||
failures.push(`missing required heading: "${heading}"`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
skill: invariant.skill,
|
||||
passed: failures.length === 0,
|
||||
failures,
|
||||
};
|
||||
}
|
||||
|
||||
export interface ParityReport {
|
||||
baselineTag: string;
|
||||
currentCapturedAt: string;
|
||||
totalChecks: number;
|
||||
passed: number;
|
||||
failed: number;
|
||||
details: ParityCheckResult[];
|
||||
}
|
||||
|
||||
export function runParityChecks(opts: {
|
||||
repoRoot: string;
|
||||
baseline: ParityBaseline;
|
||||
invariants: ParityInvariant[];
|
||||
}): ParityReport {
|
||||
const { repoRoot, baseline, invariants } = opts;
|
||||
const current = captureBaseline({ repoRoot });
|
||||
const details: ParityCheckResult[] = [];
|
||||
for (const invariant of invariants) {
|
||||
const baselineEntry = baseline.skills[invariant.skill];
|
||||
const currentEntry = current.skills[invariant.skill];
|
||||
if (!currentEntry) {
|
||||
details.push({
|
||||
skill: invariant.skill,
|
||||
passed: false,
|
||||
failures: [`skill removed: ${invariant.skill} present in baseline but not current state`],
|
||||
});
|
||||
continue;
|
||||
}
|
||||
details.push(checkSkillParity(invariant, currentEntry, baselineEntry, repoRoot));
|
||||
}
|
||||
return {
|
||||
baselineTag: baseline.tag,
|
||||
currentCapturedAt: current.capturedAt,
|
||||
totalChecks: details.length,
|
||||
passed: details.filter(d => d.passed).length,
|
||||
failed: details.filter(d => !d.passed).length,
|
||||
details,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Standard invariant registry — the v1.45.0.0 set.
|
||||
*
|
||||
* Each entry pins what must-not-break in a skill family. Extend as future
|
||||
* skills land. Phase B (v2.0.0.0) adds LLM-judge invariants on top of these.
|
||||
*/
|
||||
export const PARITY_INVARIANTS: ParityInvariant[] = [
|
||||
{
|
||||
skill: 'cso',
|
||||
mustContain: ['OWASP', 'STRIDE', 'daily', 'comprehensive', 'verif'],
|
||||
mustHaveHeadings: ['## Preamble', '## When to invoke'],
|
||||
maxSizeRatio: 1.05,
|
||||
minBytes: 30_000,
|
||||
},
|
||||
{
|
||||
skill: 'ship',
|
||||
mustContain: [
|
||||
'VERSION',
|
||||
'CHANGELOG',
|
||||
'review',
|
||||
'merge',
|
||||
'PR',
|
||||
],
|
||||
mustHaveHeadings: ['## Preamble', '## When to invoke'],
|
||||
maxSizeRatio: 1.05,
|
||||
minBytes: 80_000,
|
||||
},
|
||||
{
|
||||
skill: 'plan-ceo-review',
|
||||
mustContain: [
|
||||
'SCOPE EXPANSION',
|
||||
'SELECTIVE EXPANSION',
|
||||
'HOLD SCOPE',
|
||||
'SCOPE REDUCTION',
|
||||
],
|
||||
mustHaveHeadings: ['## Preamble', '## When to invoke'],
|
||||
maxSizeRatio: 1.05,
|
||||
minBytes: 80_000,
|
||||
},
|
||||
{
|
||||
skill: 'plan-eng-review',
|
||||
mustContain: [
|
||||
'Architecture',
|
||||
'Code Quality',
|
||||
'Test',
|
||||
'Performance',
|
||||
],
|
||||
mustHaveHeadings: ['## Preamble', '## When to invoke'],
|
||||
maxSizeRatio: 1.05,
|
||||
minBytes: 70_000,
|
||||
},
|
||||
{
|
||||
skill: 'plan-design-review',
|
||||
mustContain: [
|
||||
'design',
|
||||
'visual',
|
||||
],
|
||||
mustHaveHeadings: ['## Preamble', '## When to invoke'],
|
||||
maxSizeRatio: 1.05,
|
||||
minBytes: 70_000,
|
||||
},
|
||||
{
|
||||
skill: 'review',
|
||||
mustContain: ['confidence', 'P1', 'P2'],
|
||||
mustHaveHeadings: ['## Preamble', '## When to invoke'],
|
||||
maxSizeRatio: 1.05,
|
||||
minBytes: 70_000,
|
||||
},
|
||||
{
|
||||
skill: 'qa',
|
||||
mustContain: ['bug', 'browse', 'fix'],
|
||||
mustHaveHeadings: ['## Preamble', '## When to invoke'],
|
||||
maxSizeRatio: 1.05,
|
||||
minBytes: 50_000,
|
||||
},
|
||||
{
|
||||
skill: 'investigate',
|
||||
mustContain: ['root cause', 'hypothes'],
|
||||
mustHaveHeadings: ['## Preamble', '## When to invoke'],
|
||||
maxSizeRatio: 1.05,
|
||||
minBytes: 30_000,
|
||||
},
|
||||
{
|
||||
skill: 'office-hours',
|
||||
mustContain: ['design doc', 'problem statement'],
|
||||
mustHaveHeadings: ['## Preamble', '## When to invoke'],
|
||||
maxSizeRatio: 1.05,
|
||||
minBytes: 70_000,
|
||||
},
|
||||
{
|
||||
skill: 'autoplan',
|
||||
mustContain: ['ceo', 'eng', 'design'],
|
||||
mustHaveHeadings: ['## Preamble', '## When to invoke'],
|
||||
maxSizeRatio: 1.05,
|
||||
minBytes: 70_000,
|
||||
},
|
||||
];
|
||||
@@ -0,0 +1,25 @@
|
||||
import { describe, expect, test } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const FILES = ['investigate/SKILL.md.tmpl', 'investigate/SKILL.md'];
|
||||
|
||||
describe('investigate freeze path resolution', () => {
|
||||
for (const rel of FILES) {
|
||||
const content = fs.readFileSync(path.join(ROOT, rel), 'utf-8');
|
||||
|
||||
test(`${rel} hook falls back to standalone gstack-freeze install`, () => {
|
||||
expect(content).toContain('${CLAUDE_SKILL_DIR}/../freeze/bin/check-freeze.sh');
|
||||
expect(content).toContain('${CLAUDE_SKILL_DIR}/../gstack-freeze/bin/check-freeze.sh');
|
||||
expect(content).toContain('[ -x "$S" ] && bash "$S" || exit 0');
|
||||
expect(content).toContain("command: 'bash -c ''");
|
||||
});
|
||||
|
||||
test(`${rel} scope lock availability check supports standalone install`, () => {
|
||||
expect(content).toContain('_FREEZE_SCRIPT="${CLAUDE_SKILL_DIR}/../freeze/bin/check-freeze.sh"');
|
||||
expect(content).toContain('[ -x "$_FREEZE_SCRIPT" ] || _FREEZE_SCRIPT="${CLAUDE_SKILL_DIR}/../gstack-freeze/bin/check-freeze.sh"');
|
||||
expect(content).toContain('[ -x "$_FREEZE_SCRIPT" ] && echo "FREEZE_AVAILABLE" || echo "FREEZE_UNAVAILABLE"');
|
||||
});
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1,145 @@
|
||||
/**
|
||||
* Gap C (v1.46.0.0): parity-baseline-v1.44.1.json integrity check.
|
||||
*
|
||||
* The v1.44.1 baseline file is the source of every "v1 was X bytes" claim
|
||||
* in CHANGELOG.md (v1.46.0.0 entry) and the reference for the per-skill
|
||||
* size-budget gate, the parity-suite content invariants, and the published
|
||||
* compression numbers. If a contributor (or a sloppy rebase) edits the
|
||||
* file, every downstream claim silently becomes unverifiable.
|
||||
*
|
||||
* This test pins:
|
||||
* 1. The file exists.
|
||||
* 2. Its top-level `tag` is "v1.44.1" (rejects a rename-by-edit).
|
||||
* 3. Its `capturedFromCommit` is the v1.44.1.0 release commit (or earlier
|
||||
* commit on the slim-skill-tokens branch where the baseline was
|
||||
* captured — both are immutable historic SHAs).
|
||||
* 4. The headline numbers reported in CHANGELOG.md are present in the
|
||||
* baseline JSON. If someone "fixes" the JSON numbers without updating
|
||||
* CHANGELOG (or vice versa), this surfaces the mismatch.
|
||||
* 5. A whitelist of known stable commits — anything else means someone
|
||||
* regenerated the baseline against fresh-current-state, which defeats
|
||||
* the v1→v2 reference contract.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as crypto from 'crypto';
|
||||
|
||||
const REPO_ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BASELINE_PATH = path.join(REPO_ROOT, 'test', 'fixtures', 'parity-baseline-v1.44.1.json');
|
||||
const CHANGELOG_PATH = path.join(REPO_ROOT, 'CHANGELOG.md');
|
||||
|
||||
/**
|
||||
* The baseline was captured at this commit on the slim-skill-tokens branch
|
||||
* (commit 74bc8054, just after v2_PLAN.md landed and before any compression
|
||||
* work). If the baseline is ever regenerated, this whitelist must change AND
|
||||
* the v1.46.0.0 CHANGELOG numbers table must be updated to reflect the new
|
||||
* v1.x baseline.
|
||||
*/
|
||||
const ALLOWED_BASELINE_COMMITS = new Set([
|
||||
'74bc8054',
|
||||
]);
|
||||
|
||||
/**
|
||||
* Headline numbers from the v1.46.0.0 CHANGELOG entry. If the baseline JSON
|
||||
* is edited, these no longer match and the user's published claims become
|
||||
* unverifiable. We assert the baseline still contains these values.
|
||||
*/
|
||||
const EXPECTED_v144_NUMBERS = {
|
||||
totalSkills: 51,
|
||||
totalCorpusBytesMin: 2_900_000, // CHANGELOG says ~2,847 KB (uses Math.round(/1024)); allow ±10K slack
|
||||
totalCorpusBytesMax: 2_930_000,
|
||||
estTotalCatalogTokensMin: 9_300,
|
||||
estTotalCatalogTokensMax: 9_340, // CHANGELOG cites ~9,319
|
||||
};
|
||||
|
||||
describe('parity-baseline-v1.44.1.json integrity (v1→v2 reference)', () => {
|
||||
test('file exists at the canonical path', () => {
|
||||
expect(fs.existsSync(BASELINE_PATH)).toBe(true);
|
||||
});
|
||||
|
||||
test('tag is "v1.44.1" — file was not renamed by edit', () => {
|
||||
const baseline = JSON.parse(fs.readFileSync(BASELINE_PATH, 'utf-8'));
|
||||
expect(baseline.tag).toBe('v1.44.1');
|
||||
});
|
||||
|
||||
test('capturedFromCommit is on the allowlist (rejects ad-hoc regeneration)', () => {
|
||||
const baseline = JSON.parse(fs.readFileSync(BASELINE_PATH, 'utf-8'));
|
||||
if (!ALLOWED_BASELINE_COMMITS.has(baseline.capturedFromCommit)) {
|
||||
throw new Error(
|
||||
`parity-baseline-v1.44.1.json was captured at commit ${baseline.capturedFromCommit}, ` +
|
||||
`not on the allowlist (${[...ALLOWED_BASELINE_COMMITS].join(', ')}).\n` +
|
||||
`If you intentionally regenerated the baseline, add the new commit to ` +
|
||||
`ALLOWED_BASELINE_COMMITS in test/parity-baseline-integrity.test.ts AND ` +
|
||||
`update the v1.46.0.0 CHANGELOG numbers table to match the new baseline.\n` +
|
||||
`If you didn't intend to regenerate it, restore the file from git history.`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test('totalSkills matches expected (51)', () => {
|
||||
const baseline = JSON.parse(fs.readFileSync(BASELINE_PATH, 'utf-8'));
|
||||
expect(baseline.totalSkills).toBe(EXPECTED_v144_NUMBERS.totalSkills);
|
||||
});
|
||||
|
||||
test('totalCorpusBytes is within the CHANGELOG-cited range (~2,847 KB)', () => {
|
||||
const baseline = JSON.parse(fs.readFileSync(BASELINE_PATH, 'utf-8'));
|
||||
expect(baseline.totalCorpusBytes).toBeGreaterThanOrEqual(EXPECTED_v144_NUMBERS.totalCorpusBytesMin);
|
||||
expect(baseline.totalCorpusBytes).toBeLessThanOrEqual(EXPECTED_v144_NUMBERS.totalCorpusBytesMax);
|
||||
});
|
||||
|
||||
test('estTotalCatalogTokens matches the CHANGELOG-cited ~9,319', () => {
|
||||
const baseline = JSON.parse(fs.readFileSync(BASELINE_PATH, 'utf-8'));
|
||||
expect(baseline.estTotalCatalogTokens).toBeGreaterThanOrEqual(EXPECTED_v144_NUMBERS.estTotalCatalogTokensMin);
|
||||
expect(baseline.estTotalCatalogTokens).toBeLessThanOrEqual(EXPECTED_v144_NUMBERS.estTotalCatalogTokensMax);
|
||||
});
|
||||
|
||||
test('CHANGELOG v1.46.0.0 entry references this baseline file by path', () => {
|
||||
const changelog = fs.readFileSync(CHANGELOG_PATH, 'utf-8');
|
||||
// The CHANGELOG entry must mention the baseline file so reviewers know
|
||||
// where the numbers come from. If someone edits one without the other,
|
||||
// this test surfaces the drift.
|
||||
expect(changelog).toContain('parity-baseline-v1.44.1.json');
|
||||
});
|
||||
|
||||
test('every per-skill entry has the required shape', () => {
|
||||
const baseline = JSON.parse(fs.readFileSync(BASELINE_PATH, 'utf-8'));
|
||||
for (const [skill, entry] of Object.entries(baseline.skills)) {
|
||||
const e = entry as Record<string, unknown>;
|
||||
expect(typeof e.skill).toBe('string');
|
||||
expect(e.skill).toBe(skill);
|
||||
expect(typeof e.skillMdBytes).toBe('number');
|
||||
expect(typeof e.skillMdLines).toBe('number');
|
||||
expect(typeof e.estTokens).toBe('number');
|
||||
expect(typeof e.descriptionLen).toBe('number');
|
||||
expect(e.skillMdBytes as number).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
|
||||
test('content hash is stable (catches any byte-level edit)', () => {
|
||||
// Pinning the SHA256 of the file content is the strongest possible
|
||||
// integrity check. When the baseline file LEGITIMATELY needs to change
|
||||
// (rare — e.g. adding new skills since v1.44.1), this test fails with
|
||||
// a clear "the hash changed from X to Y; update the constant if
|
||||
// intentional" signal. The commit that updates the hash MUST also
|
||||
// explain why and update the v1.46.0.0 CHANGELOG numbers if any
|
||||
// headline changes.
|
||||
//
|
||||
// To re-capture: `shasum -a 256 test/fixtures/parity-baseline-v1.44.1.json`
|
||||
const buf = fs.readFileSync(BASELINE_PATH);
|
||||
const hash = crypto.createHash('sha256').update(buf).digest('hex');
|
||||
const EXPECTED_HASH = '29da01be6493bb2c7308b072f3066c09bdeb0397cb79ae1c708b5a38850efe46';
|
||||
if (hash !== EXPECTED_HASH) {
|
||||
throw new Error(
|
||||
`parity-baseline-v1.44.1.json content hash changed.\n` +
|
||||
` expected: ${EXPECTED_HASH}\n` +
|
||||
` current: ${hash}\n` +
|
||||
`If you intentionally regenerated the baseline, update EXPECTED_HASH in ` +
|
||||
`test/parity-baseline-integrity.test.ts AND justify the change in the ` +
|
||||
`commit message AND update the v1.46.0.0 CHANGELOG numbers table.\n` +
|
||||
`If you didn't intend to regenerate it, restore the file from git history.`,
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
* Cathedral parity suite — gate-tier (free, structural + content checks).
|
||||
*
|
||||
* Runs every PARITY_INVARIANTS check against the current SKILL.md output
|
||||
* vs the v1.44.1 baseline. Failures get an actionable, per-skill report
|
||||
* showing missing phrases, missing headings, and size ratios.
|
||||
*
|
||||
* Periodic-tier LLM-judge parity (paid) lands in Phase B (v2.0.0.0)
|
||||
* alongside the sections/ extraction. Plumbing is in parity-harness.ts.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { runParityChecks, PARITY_INVARIANTS } from './helpers/parity-harness';
|
||||
import type { ParityBaseline } from './helpers/capture-parity-baseline';
|
||||
|
||||
const REPO_ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BASELINE_PATH = path.join(REPO_ROOT, 'test', 'fixtures', 'parity-baseline-v1.44.1.json');
|
||||
|
||||
describe('parity suite vs v1.44.1 baseline (gate, free)', () => {
|
||||
test('baseline exists', () => {
|
||||
expect(fs.existsSync(BASELINE_PATH)).toBe(true);
|
||||
});
|
||||
|
||||
test('all PARITY_INVARIANTS pass', () => {
|
||||
const baseline: ParityBaseline = JSON.parse(fs.readFileSync(BASELINE_PATH, 'utf-8'));
|
||||
const report = runParityChecks({
|
||||
repoRoot: REPO_ROOT,
|
||||
baseline,
|
||||
invariants: PARITY_INVARIANTS,
|
||||
});
|
||||
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(
|
||||
`[parity] ${report.passed}/${report.totalChecks} skills passed parity vs ${baseline.tag}`,
|
||||
);
|
||||
|
||||
if (report.failed === 0) return;
|
||||
|
||||
const failureMessages = report.details
|
||||
.filter(d => !d.passed)
|
||||
.map(d => ` ${d.skill}:\n - ${d.failures.join('\n - ')}`)
|
||||
.join('\n');
|
||||
throw new Error(
|
||||
`${report.failed} skill(s) failed parity checks vs v1.44.1:\n${failureMessages}`,
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,186 @@
|
||||
/**
|
||||
* Unit tests for the ResolverEntry / unwrapResolver mechanism.
|
||||
*
|
||||
* Verifies the conditional-injection plumbing added in T2 (v1.45.0.0).
|
||||
* Plain functions still work; gated entries skip when appliesTo returns false.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import { unwrapResolver, type ResolverFn, type ResolverEntry, type TemplateContext } from '../scripts/resolvers/types';
|
||||
|
||||
function makeCtx(overrides: Partial<TemplateContext> = {}): TemplateContext {
|
||||
return {
|
||||
skillName: 'test-skill',
|
||||
tmplPath: '/tmp/test/SKILL.md.tmpl',
|
||||
host: 'claude',
|
||||
paths: {
|
||||
skillRoot: '~/.claude/skills/gstack',
|
||||
localSkillRoot: '.claude/skills',
|
||||
binDir: '~/.claude/skills/gstack/bin',
|
||||
browseDir: '~/.claude/skills/gstack/browse/dist',
|
||||
designDir: '~/.claude/skills/gstack/design/dist',
|
||||
makePdfDir: '~/.claude/skills/gstack/make-pdf/dist',
|
||||
},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('unwrapResolver — plain function pass-through', () => {
|
||||
test('returns the function as-is, no gate', () => {
|
||||
const fn: ResolverFn = (ctx) => `hello-${ctx.skillName}`;
|
||||
const { resolve, appliesTo } = unwrapResolver(fn);
|
||||
expect(resolve(makeCtx())).toBe('hello-test-skill');
|
||||
expect(appliesTo).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('unwrapResolver — gated entry', () => {
|
||||
test('returns resolve + gate', () => {
|
||||
const entry: ResolverEntry = {
|
||||
resolve: (ctx) => `gated-${ctx.skillName}`,
|
||||
appliesTo: (ctx) => ['ship', 'review'].includes(ctx.skillName),
|
||||
};
|
||||
const { resolve, appliesTo } = unwrapResolver(entry);
|
||||
expect(resolve(makeCtx({ skillName: 'ship' }))).toBe('gated-ship');
|
||||
expect(appliesTo!(makeCtx({ skillName: 'ship' }))).toBe(true);
|
||||
expect(appliesTo!(makeCtx({ skillName: 'qa' }))).toBe(false);
|
||||
});
|
||||
|
||||
test('gate returning false should signal skip — gen-skill-docs substitutes empty string', () => {
|
||||
// This mirrors the gen-skill-docs.ts contract:
|
||||
// if (appliesTo && !appliesTo(ctx)) return '';
|
||||
const entry: ResolverEntry = {
|
||||
resolve: () => 'CONTENT',
|
||||
appliesTo: () => false,
|
||||
};
|
||||
const { resolve, appliesTo } = unwrapResolver(entry);
|
||||
const result = appliesTo && !appliesTo(makeCtx()) ? '' : resolve(makeCtx());
|
||||
expect(result).toBe('');
|
||||
});
|
||||
|
||||
test('gate returning true allows resolve to fire', () => {
|
||||
const entry: ResolverEntry = {
|
||||
resolve: () => 'CONTENT',
|
||||
appliesTo: () => true,
|
||||
};
|
||||
const { resolve, appliesTo } = unwrapResolver(entry);
|
||||
const result = appliesTo && !appliesTo(makeCtx()) ? '' : resolve(makeCtx());
|
||||
expect(result).toBe('CONTENT');
|
||||
});
|
||||
|
||||
test('entry without appliesTo behaves like ungated', () => {
|
||||
const entry: ResolverEntry = { resolve: () => 'ALWAYS' };
|
||||
const { resolve, appliesTo } = unwrapResolver(entry);
|
||||
expect(appliesTo).toBeUndefined();
|
||||
expect(resolve(makeCtx())).toBe('ALWAYS');
|
||||
});
|
||||
});
|
||||
|
||||
describe('RESOLVERS registry still loads with mixed shapes', () => {
|
||||
test('importing the live registry produces a record with expected resolvers', async () => {
|
||||
const { RESOLVERS } = await import('../scripts/resolvers/index');
|
||||
// Spot-check that core resolvers are present.
|
||||
expect(RESOLVERS.PREAMBLE).toBeDefined();
|
||||
expect(RESOLVERS.REVIEW_DASHBOARD).toBeDefined();
|
||||
expect(RESOLVERS.SLUG_EVAL).toBeDefined();
|
||||
// Each entry should unwrap cleanly.
|
||||
for (const [name, entry] of Object.entries(RESOLVERS)) {
|
||||
const { resolve } = unwrapResolver(entry);
|
||||
expect(typeof resolve).toBe('function');
|
||||
expect(name.length).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Gap D (v1.46.0.0): live appliesTo gate end-to-end integration.
|
||||
*
|
||||
* The ResolverEntry / unwrapResolver machinery has unit coverage above. The
|
||||
* remaining gap: does the gen-skill-docs.ts:444 substitution loop actually
|
||||
* USE the gate? A refactor that drops the `if (appliesTo && !appliesTo(ctx))`
|
||||
* check would silently break every future gated resolver.
|
||||
*
|
||||
* This test simulates the exact 4-line shape the live pipeline uses against
|
||||
* a synthetic registry. If gen-skill-docs.ts is refactored and someone
|
||||
* forgets to keep the gate check in sync, this assertion fails.
|
||||
*/
|
||||
describe('gen-skill-docs substitution loop respects the appliesTo gate', () => {
|
||||
function simulateGenSubstitution(
|
||||
template: string,
|
||||
registry: Record<string, import('../scripts/resolvers/types').ResolverValue>,
|
||||
ctx: TemplateContext,
|
||||
): string {
|
||||
// Mirrors scripts/gen-skill-docs.ts:457-467 (the {{NAME}} substitution
|
||||
// loop). Keep this in sync with the real loop. Drift here is what the
|
||||
// test is designed to catch.
|
||||
return template.replace(/\{\{(\w+(?::[^}]+)?)\}\}/g, (_match, fullKey) => {
|
||||
const parts = fullKey.split(':');
|
||||
const resolverName = parts[0];
|
||||
const args = parts.slice(1);
|
||||
const entry = registry[resolverName];
|
||||
if (!entry) throw new Error(`Unknown placeholder {{${resolverName}}}`);
|
||||
const { resolve, appliesTo } = unwrapResolver(entry);
|
||||
if (appliesTo && !appliesTo(ctx)) return '';
|
||||
return args.length > 0 ? resolve(ctx, args) : resolve(ctx);
|
||||
});
|
||||
}
|
||||
|
||||
test('plain-function resolver fires unconditionally', () => {
|
||||
const tpl = '{{ALWAYS}}';
|
||||
const out = simulateGenSubstitution(tpl, {
|
||||
ALWAYS: () => 'fired',
|
||||
}, makeCtx({ skillName: 'whatever' }));
|
||||
expect(out).toBe('fired');
|
||||
});
|
||||
|
||||
test('gated resolver fires only when appliesTo returns true', () => {
|
||||
const tpl = 'before-{{GATED}}-after';
|
||||
const out = simulateGenSubstitution(tpl, {
|
||||
GATED: {
|
||||
resolve: () => 'CONTENT',
|
||||
appliesTo: (ctx) => ctx.skillName === 'allowed',
|
||||
},
|
||||
}, makeCtx({ skillName: 'allowed' }));
|
||||
expect(out).toBe('before-CONTENT-after');
|
||||
});
|
||||
|
||||
test('gated resolver is substituted with empty string when appliesTo returns false', () => {
|
||||
const tpl = 'before-{{GATED}}-after';
|
||||
const out = simulateGenSubstitution(tpl, {
|
||||
GATED: {
|
||||
resolve: () => 'CONTENT',
|
||||
appliesTo: (ctx) => ctx.skillName === 'allowed',
|
||||
},
|
||||
}, makeCtx({ skillName: 'something-else' }));
|
||||
expect(out).toBe('before--after');
|
||||
});
|
||||
|
||||
test('mixed registry: gated + plain resolvers in the same template', () => {
|
||||
const tpl = '{{PLAIN}} / {{GATED_ON}} / {{GATED_OFF}}';
|
||||
const ctx = makeCtx({ skillName: 'ship' });
|
||||
const out = simulateGenSubstitution(tpl, {
|
||||
PLAIN: () => 'plain',
|
||||
GATED_ON: { resolve: () => 'on', appliesTo: () => true },
|
||||
GATED_OFF: { resolve: () => 'off', appliesTo: () => false },
|
||||
}, ctx);
|
||||
expect(out).toBe('plain / on / ');
|
||||
});
|
||||
|
||||
test('parameterized resolver still respects gate', () => {
|
||||
const tpl = '{{GATED:arg1:arg2}}';
|
||||
const ctx = makeCtx({ skillName: 'no' });
|
||||
const out = simulateGenSubstitution(tpl, {
|
||||
GATED: {
|
||||
resolve: (_c, args) => `fired-with-${(args ?? []).join('-')}`,
|
||||
appliesTo: (c) => c.skillName === 'yes',
|
||||
},
|
||||
}, ctx);
|
||||
expect(out).toBe(''); // gated off, args ignored
|
||||
});
|
||||
|
||||
test('unknown resolver throws (matches real gen-skill-docs error contract)', () => {
|
||||
expect(() =>
|
||||
simulateGenSubstitution('{{NEVER_DEFINED}}', {}, makeCtx()),
|
||||
).toThrow(/Unknown placeholder/);
|
||||
});
|
||||
});
|
||||
@@ -35,6 +35,27 @@ import {
|
||||
assertNoBudgetRegression,
|
||||
type EvalResult,
|
||||
} from './helpers/eval-store';
|
||||
import { logBudgetOverride } from './helpers/budget-override';
|
||||
|
||||
/**
|
||||
* v1.45.0.0 T5 — hard eval cost cap.
|
||||
*
|
||||
* Per-tier defaults (override via env):
|
||||
* EVALS_BUDGET_HARD_CAP_GATE default $25/run
|
||||
* EVALS_BUDGET_HARD_CAP_PERIODIC default $70/run
|
||||
* EVALS_BUDGET_HARD_CAP umbrella cap if a tier-specific isn't set; default $30
|
||||
* EVALS_BUDGET_OVERRIDE_REASON if set, override fires AND audit-logs to
|
||||
* ~/.gstack/analytics/spend-overrides.jsonl
|
||||
*
|
||||
* Caps are dollars-per-run, not dollars-per-test. A test that legitimately
|
||||
* gets more expensive should bake into the baseline; a runaway eval (infinite
|
||||
* retry, model price change) gets stopped here.
|
||||
*/
|
||||
const DEFAULT_HARD_CAP_USD = Number(process.env.EVALS_BUDGET_HARD_CAP) || 30;
|
||||
const TIER_CAPS: Record<'e2e' | 'llm-judge', number> = {
|
||||
e2e: Number(process.env.EVALS_BUDGET_HARD_CAP_GATE) || DEFAULT_HARD_CAP_USD,
|
||||
'llm-judge': Number(process.env.EVALS_BUDGET_HARD_CAP_PERIODIC) || Math.max(70, DEFAULT_HARD_CAP_USD),
|
||||
};
|
||||
|
||||
function currentGitBranch(): string {
|
||||
try {
|
||||
@@ -137,6 +158,40 @@ function checkTier(tier: 'e2e' | 'llm-judge'): void {
|
||||
);
|
||||
}
|
||||
|
||||
/** Enforce a hard dollar cap on per-run eval cost. */
|
||||
function checkHardCap(tier: 'e2e' | 'llm-judge'): void {
|
||||
const evalDir = getProjectEvalDir();
|
||||
const latest = findLatestRun(evalDir, tier);
|
||||
if (!latest) return;
|
||||
const cap = TIER_CAPS[tier];
|
||||
const cost = latest.result.total_cost_usd;
|
||||
if (cost <= cap) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(`[budget-hard-cap:${tier}] OK — $${cost.toFixed(2)} ≤ $${cap.toFixed(2)} cap`);
|
||||
return;
|
||||
}
|
||||
const overrideReason = process.env.EVALS_BUDGET_OVERRIDE_REASON?.trim();
|
||||
if (overrideReason) {
|
||||
logBudgetOverride({
|
||||
scope: `evals-cost-cap-${tier}`,
|
||||
reason: overrideReason,
|
||||
details: { tier, cap, observed_cost_usd: cost, run_file: latest.filepath },
|
||||
});
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn(
|
||||
`[budget-hard-cap:${tier}] OVERRIDE APPLIED ("${overrideReason}") — $${cost.toFixed(2)} > $${cap.toFixed(2)} cap`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
throw new Error(
|
||||
`Eval cost exceeded hard cap for tier ${tier}: ` +
|
||||
`$${cost.toFixed(2)} > $${cap.toFixed(2)}. ` +
|
||||
`Set EVALS_BUDGET_OVERRIDE_REASON="why this is OK" to allow + audit. ` +
|
||||
`Per-tier override: EVALS_BUDGET_HARD_CAP_${tier === 'e2e' ? 'GATE' : 'PERIODIC'}=<dollars>. ` +
|
||||
`Run: ${latest.filepath}`,
|
||||
);
|
||||
}
|
||||
|
||||
describe('tool budget regression (gate, free)', () => {
|
||||
test('no e2e test exceeds 2× prior tool calls or turns', () => {
|
||||
checkTier('e2e');
|
||||
@@ -145,4 +200,13 @@ describe('tool budget regression (gate, free)', () => {
|
||||
test('no llm-judge test exceeds 2× prior tool calls or turns', () => {
|
||||
checkTier('llm-judge');
|
||||
});
|
||||
|
||||
// T5: hard dollar cap on per-run cost (different from regression ratio above)
|
||||
test('e2e run cost ≤ EVALS_BUDGET_HARD_CAP_GATE', () => {
|
||||
checkHardCap('e2e');
|
||||
});
|
||||
|
||||
test('llm-judge run cost ≤ EVALS_BUDGET_HARD_CAP_PERIODIC', () => {
|
||||
checkHardCap('llm-judge');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,153 @@
|
||||
/**
|
||||
* Skill coverage floor — gate-tier, free, runs every PR.
|
||||
*
|
||||
* Phase 0 of the cathedral parity-eval suite: structural-compliance smoke
|
||||
* test that covers every gstack skill with file-IO assertions. The intent
|
||||
* is "every skill ships with at least one CI-blocking check" — even when
|
||||
* a skill doesn't (yet) have a behavioral E2E test, this floor catches
|
||||
* frontmatter regressions, missing generated header, empty/trivial bodies,
|
||||
* and dangling SKILL.md.tmpl-without-SKILL.md mismatches.
|
||||
*
|
||||
* Pairs with test/skill-coverage-matrix.ts (the registry) and
|
||||
* test/parity-suite.test.ts (the content-invariant suite). Together,
|
||||
* v1.45.0.0 ships with: floor (this file) + matrix (registry CI gate)
|
||||
* + invariants (content per skill family) + size budget. That's the
|
||||
* eval-first foundation the v2.0.0.0 sections/ work builds on.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { SKILL_COVERAGE } from './skill-coverage-matrix';
|
||||
|
||||
const REPO_ROOT = path.resolve(import.meta.dir, '..');
|
||||
|
||||
function readSkillMd(skill: string): string | null {
|
||||
const p = path.join(REPO_ROOT, skill, 'SKILL.md');
|
||||
try {
|
||||
return fs.readFileSync(p, 'utf-8');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function listSkillDirs(): string[] {
|
||||
const entries = fs.readdirSync(REPO_ROOT, { withFileTypes: true });
|
||||
return entries
|
||||
.filter(e => e.isDirectory() && !e.name.startsWith('.'))
|
||||
.filter(e => e.name !== 'node_modules' && e.name !== 'docs' && e.name !== 'test')
|
||||
.filter(e => fs.existsSync(path.join(REPO_ROOT, e.name, 'SKILL.md')))
|
||||
.map(e => e.name)
|
||||
.sort();
|
||||
}
|
||||
|
||||
describe('skill-coverage-floor: every skill passes structural compliance', () => {
|
||||
const skills = listSkillDirs();
|
||||
|
||||
test('skill registry mentions every skill on disk', () => {
|
||||
const onDisk = new Set(skills);
|
||||
const inRegistry = new Set(Object.keys(SKILL_COVERAGE));
|
||||
const missingFromRegistry: string[] = [];
|
||||
for (const s of onDisk) {
|
||||
if (!inRegistry.has(s)) missingFromRegistry.push(s);
|
||||
}
|
||||
if (missingFromRegistry.length > 0) {
|
||||
throw new Error(
|
||||
`Skills on disk missing from test/skill-coverage-matrix.ts: ${missingFromRegistry.join(', ')}. ` +
|
||||
`Add an entry to SKILL_COVERAGE with at least 'test/skill-coverage-floor.test.ts' in gate[].`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test('every registry entry has at least one gate-tier test', () => {
|
||||
const missingGate: string[] = [];
|
||||
for (const [skill, coverage] of Object.entries(SKILL_COVERAGE)) {
|
||||
if (!coverage.gate || coverage.gate.length === 0) missingGate.push(skill);
|
||||
}
|
||||
if (missingGate.length > 0) {
|
||||
throw new Error(
|
||||
`Skills with no gate-tier eval: ${missingGate.join(', ')}. ` +
|
||||
`Eval-first foundation requires at least one CI-blocking check per skill.`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test('every gate-tier test path referenced in registry exists on disk', () => {
|
||||
const missing: string[] = [];
|
||||
for (const [skill, coverage] of Object.entries(SKILL_COVERAGE)) {
|
||||
for (const testPath of [...coverage.gate, ...coverage.periodic]) {
|
||||
const fullPath = path.join(REPO_ROOT, testPath);
|
||||
if (!fs.existsSync(fullPath)) {
|
||||
missing.push(`${skill} → ${testPath}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (missing.length > 0) {
|
||||
throw new Error(`Registry references missing test files:\n ${missing.join('\n ')}`);
|
||||
}
|
||||
});
|
||||
|
||||
// Per-skill structural compliance (file IO only, no LLM)
|
||||
for (const skill of skills) {
|
||||
describe(`skill: ${skill}`, () => {
|
||||
test('SKILL.md exists', () => {
|
||||
const content = readSkillMd(skill);
|
||||
expect(content).not.toBeNull();
|
||||
});
|
||||
|
||||
test('frontmatter is well-formed and contains name + description', () => {
|
||||
const content = readSkillMd(skill)!;
|
||||
expect(content.startsWith('---\n')).toBe(true);
|
||||
const fmEnd = content.indexOf('\n---', 4);
|
||||
expect(fmEnd).toBeGreaterThan(0);
|
||||
const fm = content.slice(4, fmEnd);
|
||||
// name: ...
|
||||
expect(/^name:\s*\S/m.test(fm)).toBe(true);
|
||||
// description: ... (either inline or block form)
|
||||
expect(/^description:\s*(\S|\|)/m.test(fm)).toBe(true);
|
||||
});
|
||||
|
||||
test('frontmatter description fits the catalog-trim contract', () => {
|
||||
const content = readSkillMd(skill)!;
|
||||
const fmEnd = content.indexOf('\n---', 4);
|
||||
const fm = content.slice(4, fmEnd);
|
||||
// Inline form: description: <one line>
|
||||
const inlineMatch = fm.match(/^description:\s+(.+)$/m);
|
||||
// Block form: description: |\n multiline
|
||||
const blockMatch = fm.match(/^description:\s*\|/m);
|
||||
if (inlineMatch) {
|
||||
// Catalog-trimmed: should be ≤ 250 chars
|
||||
expect(inlineMatch[1].length).toBeLessThanOrEqual(250);
|
||||
} else if (blockMatch) {
|
||||
// Block form is acceptable for small skills (under-120-chars baseline
|
||||
// didn't trigger catalog trim). No size cap here; the parity-suite
|
||||
// and size-budget tests handle bytes.
|
||||
} else {
|
||||
throw new Error(`${skill}: description field is not in inline or block form`);
|
||||
}
|
||||
});
|
||||
|
||||
test('generated header present (only edit .tmpl, not .md)', () => {
|
||||
const content = readSkillMd(skill)!;
|
||||
expect(content).toContain('AUTO-GENERATED from SKILL.md.tmpl');
|
||||
});
|
||||
|
||||
test('body is non-trivial (≥ 200 bytes after frontmatter)', () => {
|
||||
const content = readSkillMd(skill)!;
|
||||
const fmEnd = content.indexOf('\n---', 4);
|
||||
const body = content.slice(fmEnd + 5).trim();
|
||||
expect(body.length).toBeGreaterThanOrEqual(200);
|
||||
});
|
||||
|
||||
test('no unresolved {{TEMPLATE}} placeholders leaked into output', () => {
|
||||
const content = readSkillMd(skill)!;
|
||||
const leaks = content.match(/\{\{[A-Z_]+(?::[^}]+)?\}\}/g);
|
||||
if (leaks) {
|
||||
throw new Error(
|
||||
`${skill}: ${leaks.length} unresolved placeholder(s) in generated SKILL.md: ${leaks.slice(0, 3).join(', ')}${leaks.length > 3 ? ', ...' : ''}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1,72 @@
|
||||
/**
|
||||
* Skill coverage matrix CI gate (v1.45.0.0 T1).
|
||||
*
|
||||
* Asserts every skill on disk has an entry in SKILL_COVERAGE with at
|
||||
* least one gate-tier test. The detailed per-skill structural checks
|
||||
* live in test/skill-coverage-floor.test.ts; this file is the matrix-
|
||||
* level gate that surfaces "skill added but eval not registered" cleanly.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { SKILL_COVERAGE, type SkillCoverage } from './skill-coverage-matrix';
|
||||
|
||||
const REPO_ROOT = path.resolve(import.meta.dir, '..');
|
||||
|
||||
function discoverSkills(): string[] {
|
||||
return fs.readdirSync(REPO_ROOT, { withFileTypes: true })
|
||||
.filter(e => e.isDirectory() && !e.name.startsWith('.'))
|
||||
.filter(e => fs.existsSync(path.join(REPO_ROOT, e.name, 'SKILL.md')))
|
||||
.map(e => e.name)
|
||||
.sort();
|
||||
}
|
||||
|
||||
describe('skill coverage matrix', () => {
|
||||
test('SKILL_COVERAGE is exported and non-empty', () => {
|
||||
expect(typeof SKILL_COVERAGE).toBe('object');
|
||||
expect(Object.keys(SKILL_COVERAGE).length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test('every entry has the right shape', () => {
|
||||
for (const [skill, coverage] of Object.entries(SKILL_COVERAGE)) {
|
||||
expect(Array.isArray(coverage.gate)).toBe(true);
|
||||
expect(Array.isArray(coverage.periodic)).toBe(true);
|
||||
expect(coverage.gate.length).toBeGreaterThan(0);
|
||||
for (const p of [...coverage.gate, ...coverage.periodic]) {
|
||||
expect(typeof p).toBe('string');
|
||||
expect(p.startsWith('test/')).toBe(true);
|
||||
expect(p.endsWith('.test.ts')).toBe(true);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('every skill on disk has a registry entry', () => {
|
||||
const skills = discoverSkills();
|
||||
const missing: string[] = [];
|
||||
for (const s of skills) {
|
||||
if (!SKILL_COVERAGE[s]) missing.push(s);
|
||||
}
|
||||
if (missing.length > 0) {
|
||||
throw new Error(
|
||||
`Skills on disk missing from SKILL_COVERAGE: ${missing.join(', ')}. ` +
|
||||
`Add an entry to test/skill-coverage-matrix.ts with at least ` +
|
||||
`'test/skill-coverage-floor.test.ts' in gate[].`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test('no registry entry references a skill that does not exist on disk', () => {
|
||||
const skills = new Set(discoverSkills());
|
||||
const orphans: string[] = [];
|
||||
for (const skill of Object.keys(SKILL_COVERAGE)) {
|
||||
if (!skills.has(skill)) orphans.push(skill);
|
||||
}
|
||||
if (orphans.length > 0) {
|
||||
throw new Error(
|
||||
`Registry references skills not on disk: ${orphans.join(', ')}. ` +
|
||||
`Remove from SKILL_COVERAGE or restore the skill directory.`,
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,193 @@
|
||||
/**
|
||||
* Skill coverage matrix (v1.45.0.0 T1, cathedral Phase 0).
|
||||
*
|
||||
* Single source of truth mapping each gstack skill to its E2E test files.
|
||||
* The CI gate at test/skill-coverage-matrix.test.ts fails if a skill has
|
||||
* no gate-tier entry, ensuring the eval-first foundation holds: every
|
||||
* skill has at least one CI-blocking check that asserts must-have
|
||||
* behavior.
|
||||
*
|
||||
* Two tiers per entry:
|
||||
* gate CI-blocking, runs on every PR, target <$0.50/test or free.
|
||||
* periodic Weekly cron, deeper coverage, can cost ~$1-$3/test.
|
||||
*
|
||||
* The 'floor' entry refers to test/skill-coverage-floor.test.ts —
|
||||
* a structural-compliance smoke test that covers every skill with
|
||||
* file-IO checks (free, no LLM cost). When a skill has only 'floor'
|
||||
* coverage, that's the eval-first minimum; future work can layer
|
||||
* behavioral checks on top.
|
||||
*/
|
||||
|
||||
export interface SkillCoverage {
|
||||
/** Gate-tier test file paths (relative to repo root). At least one required per skill. */
|
||||
gate: string[];
|
||||
/** Periodic-tier test file paths. Optional but recommended. */
|
||||
periodic: string[];
|
||||
/** Brief note on why this coverage is the right shape for this skill. */
|
||||
rationale?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-skill coverage. Keys MUST match the top-level skill directory name.
|
||||
* The CI test asserts every skill in the repo has an entry here AND that
|
||||
* gate[] is non-empty.
|
||||
*
|
||||
* Adding a new skill: add an entry here AND either reference an existing
|
||||
* test that covers it OR add 'test/skill-coverage-floor.test.ts' as the
|
||||
* minimum gate-tier check.
|
||||
*/
|
||||
export const SKILL_COVERAGE: Record<string, SkillCoverage> = {
|
||||
// ─── Core loop ──────────────────────────────────────────────
|
||||
ship: {
|
||||
gate: ['test/skill-e2e-ship-idempotency.test.ts', 'test/skill-coverage-floor.test.ts'],
|
||||
periodic: ['test/skill-e2e-workflow.test.ts'],
|
||||
},
|
||||
review: {
|
||||
gate: ['test/skill-e2e-review.test.ts', 'test/skill-coverage-floor.test.ts'],
|
||||
periodic: ['test/skill-e2e-review-army.test.ts', 'test/regression-1539-review-self-verify.test.ts'],
|
||||
},
|
||||
qa: {
|
||||
gate: ['test/skill-e2e-qa-workflow.test.ts', 'test/skill-coverage-floor.test.ts'],
|
||||
periodic: ['test/skill-e2e-qa-bugs.test.ts'],
|
||||
},
|
||||
'qa-only': {
|
||||
gate: ['test/skill-coverage-floor.test.ts'],
|
||||
periodic: [],
|
||||
rationale: 'qa-only is qa with --report-only; behavior tested via /qa coverage.',
|
||||
},
|
||||
investigate: {
|
||||
gate: ['test/skill-coverage-floor.test.ts'],
|
||||
periodic: [],
|
||||
},
|
||||
browse: {
|
||||
gate: ['test/skill-coverage-floor.test.ts'],
|
||||
periodic: [],
|
||||
rationale: 'browse binary has its own integration suite under browse/test/.',
|
||||
},
|
||||
spec: {
|
||||
gate: [
|
||||
'test/spec-template-invariants.test.ts',
|
||||
'test/spec-template-sync.test.ts',
|
||||
'test/skill-coverage-floor.test.ts',
|
||||
],
|
||||
periodic: [
|
||||
'test/skill-e2e-spec-execute.test.ts',
|
||||
'test/skill-llm-eval-spec.test.ts',
|
||||
],
|
||||
rationale: '37 deterministic invariants pin Phase 1/3 gating, --execute race/security hardening, quality-gate redaction, archive contract, plan-mode-aware Phase 5. Periodic adds full PTY pipeline + LLM-judge.',
|
||||
},
|
||||
|
||||
// ─── Plan triad ─────────────────────────────────────────────
|
||||
'plan-ceo-review': {
|
||||
gate: [
|
||||
'test/skill-e2e-plan-ceo-finding-floor.test.ts',
|
||||
'test/skill-e2e-plan-ceo-plan-mode.test.ts',
|
||||
'test/skill-coverage-floor.test.ts',
|
||||
],
|
||||
periodic: [
|
||||
'test/skill-e2e-plan-ceo-finding-count.test.ts',
|
||||
'test/skill-e2e-plan-ceo-mode-routing.test.ts',
|
||||
],
|
||||
},
|
||||
'plan-eng-review': {
|
||||
gate: [
|
||||
'test/skill-e2e-plan-eng-finding-floor.test.ts',
|
||||
'test/skill-e2e-plan-eng-plan-mode.test.ts',
|
||||
'test/skill-coverage-floor.test.ts',
|
||||
],
|
||||
periodic: [
|
||||
'test/skill-e2e-plan-eng-finding-count.test.ts',
|
||||
'test/skill-e2e-plan-eng-multi-finding-batching.test.ts',
|
||||
],
|
||||
},
|
||||
'plan-design-review': {
|
||||
gate: [
|
||||
'test/skill-e2e-plan-design-finding-floor.test.ts',
|
||||
'test/skill-e2e-plan-design-plan-mode.test.ts',
|
||||
'test/skill-e2e-plan-design-with-ui.test.ts',
|
||||
'test/skill-coverage-floor.test.ts',
|
||||
],
|
||||
periodic: ['test/skill-e2e-plan-design-finding-count.test.ts'],
|
||||
},
|
||||
'plan-devex-review': {
|
||||
gate: [
|
||||
'test/skill-e2e-plan-devex-finding-floor.test.ts',
|
||||
'test/skill-e2e-plan-devex-plan-mode.test.ts',
|
||||
'test/skill-coverage-floor.test.ts',
|
||||
],
|
||||
periodic: ['test/skill-e2e-plan-devex-finding-count.test.ts'],
|
||||
},
|
||||
autoplan: {
|
||||
gate: ['test/skill-coverage-floor.test.ts'],
|
||||
periodic: ['test/skill-e2e-autoplan-chain.test.ts', 'test/skill-e2e-autoplan-dual-voice.test.ts'],
|
||||
},
|
||||
'office-hours': {
|
||||
gate: ['test/skill-e2e-office-hours.test.ts', 'test/skill-coverage-floor.test.ts'],
|
||||
periodic: ['test/skill-e2e-office-hours-auto-mode.test.ts', 'test/skill-e2e-office-hours-phase4.test.ts'],
|
||||
},
|
||||
|
||||
// ─── Polish + design ────────────────────────────────────────
|
||||
'design-review': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'design-consultation': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'design-shotgun': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'design-html': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
cso: {
|
||||
gate: ['test/skill-e2e-cso.test.ts', 'test/cso-preserved.test.ts', 'test/skill-coverage-floor.test.ts'],
|
||||
periodic: [],
|
||||
rationale: 'cso-preserved.test.ts pins must-not-strip security guidance phrases.',
|
||||
},
|
||||
'document-release': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'document-generate': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
|
||||
// ─── Ops + integrations ─────────────────────────────────────
|
||||
'land-and-deploy': { gate: ['test/skill-e2e-deploy.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
canary: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
benchmark: { gate: ['test/skill-e2e-benchmark-providers.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'benchmark-models': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
codex: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
retro: {
|
||||
gate: ['test/skill-coverage-floor.test.ts'],
|
||||
periodic: ['test/regression-1624-retro-stale-base.test.ts'],
|
||||
},
|
||||
'gstack-upgrade': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'context-save': { gate: ['test/skill-e2e-context-skills.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'context-restore': { gate: ['test/skill-e2e-context-skills.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'setup-deploy': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'setup-browser-cookies': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'setup-gbrain': {
|
||||
gate: [
|
||||
'test/skill-e2e-setup-gbrain-bad-token.test.ts',
|
||||
'test/skill-e2e-setup-gbrain-path4-local-pglite.test.ts',
|
||||
'test/skill-e2e-setup-gbrain-remote.test.ts',
|
||||
'test/skill-coverage-floor.test.ts',
|
||||
],
|
||||
periodic: [],
|
||||
},
|
||||
'sync-gbrain': {
|
||||
gate: ['test/skill-coverage-floor.test.ts'],
|
||||
periodic: ['test/regression-1611-gbrain-sync-resume.test.ts'],
|
||||
},
|
||||
'open-gstack-browser': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'pair-agent': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
scrape: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
skillify: { gate: ['test/skill-e2e-skillify.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
learn: { gate: ['test/skill-e2e-learnings.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'plan-tune': { gate: ['test/skill-e2e-plan-tune.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
|
||||
// ─── iOS family ─────────────────────────────────────────────
|
||||
'ios-qa': { gate: ['test/skill-e2e-ios.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: ['test/skill-e2e-ios-device.test.ts', 'test/skill-e2e-ios-swift-build.test.ts'] },
|
||||
'ios-fix': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'ios-clean': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'ios-sync': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'ios-design-review': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
|
||||
// ─── Safety / housekeeping ──────────────────────────────────
|
||||
careful: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
freeze: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
unfreeze: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
guard: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'landing-report': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
health: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'make-pdf': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
'devex-review': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] },
|
||||
};
|
||||
@@ -0,0 +1,220 @@
|
||||
/**
|
||||
* Per-skill SKILL.md size budget regression (v1.46.0.0 T5).
|
||||
*
|
||||
* Asserts that no skill's generated SKILL.md grew beyond the v1.44.1
|
||||
* baseline. Catches preamble/resolver changes that bloat skills back to
|
||||
* the pre-compression size. Free — pure file IO + JSON diff.
|
||||
*
|
||||
* Why a separate test from skill-budget-regression.test.ts: that one
|
||||
* compares LIVE eval runs (tool calls, turns, cost); this one compares
|
||||
* static SKILL.md sizes. Both gate-tier.
|
||||
*
|
||||
* The baseline lives at test/fixtures/parity-baseline-v1.44.1.json,
|
||||
* captured by scripts/capture-baseline.ts before any Phase A work landed.
|
||||
*
|
||||
* Override:
|
||||
* - GSTACK_SIZE_BUDGET_RATIO=<n> changes the per-skill regression ratio.
|
||||
* Default 1.0 (no growth allowed). Set to 1.10 to permit 10% growth
|
||||
* (e.g., during deliberate feature additions that the catalog trim
|
||||
* doesn't offset).
|
||||
* - GSTACK_SIZE_BUDGET_OVERRIDE_REASON="text" allows a regression to
|
||||
* pass and logs the reason to ~/.gstack/analytics/spend-overrides.jsonl
|
||||
* for audit. Use sparingly; the next baseline should bake in the new
|
||||
* size.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { captureBaseline, type ParityBaseline } from './helpers/capture-parity-baseline';
|
||||
import { logBudgetOverride } from './helpers/budget-override';
|
||||
|
||||
const REPO_ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BASELINE_PATH = path.join(REPO_ROOT, 'test', 'fixtures', 'parity-baseline-v1.44.1.json');
|
||||
|
||||
// Default per-skill ratio is 1.05 (5% growth tolerance). T4 catalog trim
|
||||
// MOVES text from frontmatter (always-loaded catalog) to a body section
|
||||
// ("## When to invoke"), so small skills with already-short descriptions
|
||||
// see a tiny body growth from the section header itself (~20 bytes). The
|
||||
// 5% per-skill tolerance accommodates that while still catching real bloat;
|
||||
// the always-loaded catalog cost is enforced separately with a hard ceiling.
|
||||
const DEFAULT_RATIO = 1.05;
|
||||
const RATIO = Number(process.env.GSTACK_SIZE_BUDGET_RATIO) || DEFAULT_RATIO;
|
||||
|
||||
interface Regression {
|
||||
skill: string;
|
||||
beforeBytes: number;
|
||||
afterBytes: number;
|
||||
growth: number;
|
||||
}
|
||||
|
||||
describe('SKILL.md size budget regression (gate, free)', () => {
|
||||
test('parity-baseline-v1.44.1.json exists', () => {
|
||||
expect(fs.existsSync(BASELINE_PATH)).toBe(true);
|
||||
});
|
||||
|
||||
test('no skill exceeds v1.44.1 baseline size × ratio', () => {
|
||||
const baseline: ParityBaseline = JSON.parse(fs.readFileSync(BASELINE_PATH, 'utf-8'));
|
||||
const current = captureBaseline({ repoRoot: REPO_ROOT });
|
||||
|
||||
const regressions: Regression[] = [];
|
||||
for (const [skill, before] of Object.entries(baseline.skills)) {
|
||||
const after = current.skills[skill];
|
||||
if (!after) continue; // skill removed since v1.44 — not a regression
|
||||
if (after.skillMdBytes <= before.skillMdBytes * RATIO) continue;
|
||||
regressions.push({
|
||||
skill,
|
||||
beforeBytes: before.skillMdBytes,
|
||||
afterBytes: after.skillMdBytes,
|
||||
growth: after.skillMdBytes / before.skillMdBytes,
|
||||
});
|
||||
}
|
||||
|
||||
if (regressions.length === 0) return;
|
||||
|
||||
const overrideReason = process.env.GSTACK_SIZE_BUDGET_OVERRIDE_REASON?.trim();
|
||||
if (overrideReason) {
|
||||
logBudgetOverride({
|
||||
scope: 'skill-size-budget',
|
||||
reason: overrideReason,
|
||||
details: { ratio: RATIO, regressions },
|
||||
});
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn(
|
||||
`[skill-size-budget] OVERRIDE APPLIED (${overrideReason}) — ${regressions.length} regression(s) allowed:`,
|
||||
);
|
||||
for (const r of regressions) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn(` ${r.skill}: ${r.beforeBytes} → ${r.afterBytes} bytes (×${r.growth.toFixed(2)})`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const msg = regressions.map(r =>
|
||||
` ${r.skill}: ${r.beforeBytes} → ${r.afterBytes} bytes (×${r.growth.toFixed(2)})`,
|
||||
).join('\n');
|
||||
throw new Error(
|
||||
`${regressions.length} skill(s) regressed past v1.44.1 baseline × ${RATIO}:\n${msg}\n` +
|
||||
`Override: set GSTACK_SIZE_BUDGET_OVERRIDE_REASON="why this is OK" to allow and audit-log.`,
|
||||
);
|
||||
});
|
||||
|
||||
test('total corpus byte count does not regress past baseline × ratio', () => {
|
||||
const baseline: ParityBaseline = JSON.parse(fs.readFileSync(BASELINE_PATH, 'utf-8'));
|
||||
const current = captureBaseline({ repoRoot: REPO_ROOT });
|
||||
const ratio = current.totalCorpusBytes / baseline.totalCorpusBytes;
|
||||
if (current.totalCorpusBytes <= baseline.totalCorpusBytes * RATIO) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(
|
||||
`[skill-size-budget] corpus OK: ${baseline.totalCorpusBytes} → ${current.totalCorpusBytes} bytes (×${ratio.toFixed(3)})`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
const overrideReason = process.env.GSTACK_SIZE_BUDGET_OVERRIDE_REASON?.trim();
|
||||
if (overrideReason) {
|
||||
logBudgetOverride({
|
||||
scope: 'skill-size-budget-corpus',
|
||||
reason: overrideReason,
|
||||
details: { ratio: RATIO, observed: ratio, before: baseline.totalCorpusBytes, after: current.totalCorpusBytes },
|
||||
});
|
||||
return;
|
||||
}
|
||||
throw new Error(
|
||||
`Total corpus regressed past v1.44.1 baseline × ${RATIO}: ` +
|
||||
`${baseline.totalCorpusBytes} → ${current.totalCorpusBytes} bytes (×${ratio.toFixed(3)}). ` +
|
||||
`Override: set GSTACK_SIZE_BUDGET_OVERRIDE_REASON to allow.`,
|
||||
);
|
||||
});
|
||||
|
||||
/**
|
||||
* Gap E (v1.46.0.0): per-skill min-size floor.
|
||||
*
|
||||
* The existing skill-coverage-floor enforces body ≥ 200 bytes, which is
|
||||
* a tiny noise floor. A skill that was 100 KB at v1.44.1 and shrinks to
|
||||
* 250 bytes passes that check despite losing 99.75% of content. The
|
||||
* parity-suite content invariants cover this for 10 hand-picked skills
|
||||
* (cso, ship, plan-ceo, etc.); the remaining 41 skills had no per-skill
|
||||
* shrinkage floor.
|
||||
*
|
||||
* Floor: 80% of the v1.44.1 baseline. v1.46 actual shrinkage is <1% per
|
||||
* skill, so this is a comfortable ceiling that still catches accidental
|
||||
* mass deletion (e.g., a refactor that strips the body of a skill).
|
||||
*
|
||||
* v2.0.0.0 will introduce the sections/ pattern for 5 heavyweights
|
||||
* (ship, plan-ceo-review, office-hours, plan-eng-review,
|
||||
* plan-design-review). Those skills will legitimately shrink to ~15 KB
|
||||
* skeletons. When that lands, add them to SECTIONS_EXTRACTED so the floor
|
||||
* relaxes for them.
|
||||
*/
|
||||
test('no skill shrinks past 80% of v1.44.1 baseline (catches accidental body strip)', () => {
|
||||
const baseline: ParityBaseline = JSON.parse(fs.readFileSync(BASELINE_PATH, 'utf-8'));
|
||||
const current = captureBaseline({ repoRoot: REPO_ROOT });
|
||||
const MIN_RATIO = 0.80; // a skill at <80% of its v1.44 size signals mass-deletion
|
||||
const SECTIONS_EXTRACTED = new Set<string>(); // populate in v2.0.0.0 when sections/ lands
|
||||
|
||||
const undershoots: Array<{
|
||||
skill: string; beforeBytes: number; afterBytes: number; ratio: number;
|
||||
}> = [];
|
||||
for (const [skill, before] of Object.entries(baseline.skills)) {
|
||||
if (SECTIONS_EXTRACTED.has(skill)) continue;
|
||||
const after = current.skills[skill];
|
||||
if (!after) continue; // skill removed since baseline — separate concern
|
||||
const ratio = after.skillMdBytes / before.skillMdBytes;
|
||||
if (ratio < MIN_RATIO) {
|
||||
undershoots.push({
|
||||
skill, beforeBytes: before.skillMdBytes, afterBytes: after.skillMdBytes, ratio,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (undershoots.length === 0) return;
|
||||
|
||||
const overrideReason = process.env.GSTACK_SIZE_BUDGET_OVERRIDE_REASON?.trim();
|
||||
if (overrideReason) {
|
||||
logBudgetOverride({
|
||||
scope: 'skill-size-budget-floor',
|
||||
reason: overrideReason,
|
||||
details: { min_ratio: MIN_RATIO, undershoots },
|
||||
});
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn(
|
||||
`[skill-size-budget-floor] OVERRIDE APPLIED (${overrideReason}) — ${undershoots.length} undershoot(s) allowed`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const msg = undershoots.map(u =>
|
||||
` ${u.skill}: ${u.beforeBytes} → ${u.afterBytes} bytes (×${u.ratio.toFixed(2)} — below ${MIN_RATIO} floor)`,
|
||||
).join('\n');
|
||||
throw new Error(
|
||||
`${undershoots.length} skill(s) shrunk past v1.44.1 × ${MIN_RATIO} floor:\n${msg}\n` +
|
||||
`This usually signals accidental body strip (e.g., a resolver returning empty, a ` +
|
||||
`template losing a section). If the shrinkage is intentional (e.g., the skill moved ` +
|
||||
`to the sections/ pattern), add it to SECTIONS_EXTRACTED in this test. Override: ` +
|
||||
`GSTACK_SIZE_BUDGET_OVERRIDE_REASON="why" allows + audit-logs.`,
|
||||
);
|
||||
});
|
||||
|
||||
test('catalog token estimate stays compressed (v1.45 target ≤ 7000)', () => {
|
||||
const current = captureBaseline({ repoRoot: REPO_ROOT });
|
||||
const v145Target = 7000;
|
||||
if (current.estTotalCatalogTokens <= v145Target) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(`[skill-size-budget] catalog OK: ~${current.estTotalCatalogTokens} tokens (target ≤${v145Target})`);
|
||||
return;
|
||||
}
|
||||
const overrideReason = process.env.GSTACK_SIZE_BUDGET_OVERRIDE_REASON?.trim();
|
||||
if (overrideReason) {
|
||||
logBudgetOverride({
|
||||
scope: 'skill-size-budget-catalog',
|
||||
reason: overrideReason,
|
||||
details: { target: v145Target, observed: current.estTotalCatalogTokens },
|
||||
});
|
||||
return;
|
||||
}
|
||||
throw new Error(
|
||||
`Catalog token estimate regressed past v1.45 target: ${current.estTotalCatalogTokens} tokens > ${v145Target}. ` +
|
||||
`T4 catalog trim should keep this under control. Override: set GSTACK_SIZE_BUDGET_OVERRIDE_REASON to allow.`,
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -1480,14 +1480,15 @@ describe('Skill trigger phrases', () => {
|
||||
const skillPath = path.join(ROOT, skill, 'SKILL.md');
|
||||
if (!fs.existsSync(skillPath)) return;
|
||||
const content = fs.readFileSync(skillPath, 'utf-8');
|
||||
// Extract description from frontmatter
|
||||
const frontmatterEnd = content.indexOf('---', 4);
|
||||
const frontmatter = content.slice(0, frontmatterEnd);
|
||||
expect(frontmatter).toMatch(/Use when/i);
|
||||
// v1.45.0.0 catalog trim moved trigger prose out of frontmatter into a
|
||||
// body "## When to invoke" section. Search the full file content, not
|
||||
// just frontmatter. The trigger phrase must still appear somewhere in
|
||||
// the skill so agents can match user requests to the skill.
|
||||
expect(content).toMatch(/Use when/i);
|
||||
});
|
||||
}
|
||||
|
||||
// Skills with proactive triggers should have "Proactively suggest" in description
|
||||
// Skills with proactive triggers should have "Proactively suggest" somewhere in the skill.
|
||||
const SKILLS_REQUIRING_PROACTIVE = [
|
||||
'qa', 'qa-only', 'ship', 'review', 'investigate', 'office-hours',
|
||||
'plan-ceo-review', 'plan-eng-review', 'plan-design-review',
|
||||
@@ -1499,9 +1500,8 @@ describe('Skill trigger phrases', () => {
|
||||
const skillPath = path.join(ROOT, skill, 'SKILL.md');
|
||||
if (!fs.existsSync(skillPath)) return;
|
||||
const content = fs.readFileSync(skillPath, 'utf-8');
|
||||
const frontmatterEnd = content.indexOf('---', 4);
|
||||
const frontmatter = content.slice(0, frontmatterEnd);
|
||||
expect(frontmatter).toMatch(/Proactively (suggest|invoke)/i);
|
||||
// Same catalog-trim consideration — search the full file content.
|
||||
expect(content).toMatch(/Proactively (suggest|invoke)/i);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
@@ -0,0 +1,144 @@
|
||||
/**
|
||||
* Static invariant test for #1671: nothing in production code should
|
||||
* append directly to ~/.gstack/builder-profile.jsonl. All session writes
|
||||
* must go through `gstack-developer-profile --log-session`. The legacy
|
||||
* file is now read-only — populated only by the pre-existing migration
|
||||
* and reconcile paths in bin/gstack-developer-profile.
|
||||
*
|
||||
* Prevents future regressions onto the legacy file that would re-create
|
||||
* the original bug (writer and reader disagreeing on storage location).
|
||||
*
|
||||
* Mirrors `test/setup-windows-fallback.test.ts`'s style — static invariant
|
||||
* via grep, resilient to line-number drift.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
|
||||
// Paths allowed to mention builder-profile.jsonl. These read the file
|
||||
// or document its existence — they do not write to it.
|
||||
const ALLOWED_FILES = new Set<string>([
|
||||
// The binary that reads + reconciles the legacy file.
|
||||
'bin/gstack-developer-profile',
|
||||
// The legacy-shim binary that delegates reads.
|
||||
'bin/gstack-builder-profile',
|
||||
// Memory-ingest reads the legacy file during reconcile period.
|
||||
'bin/gstack-memory-ingest.ts',
|
||||
// The artifacts-init template registers the legacy file in
|
||||
// .brain-allowlist/.brain-privacy-map for users with pre-existing data.
|
||||
'bin/gstack-artifacts-init',
|
||||
// Documentation files mention the path.
|
||||
'CHANGELOG.md',
|
||||
'TODOS.md',
|
||||
'README.md',
|
||||
'office-hours/SKILL.md.tmpl',
|
||||
'office-hours/SKILL.md',
|
||||
'setup-gbrain/memory.md',
|
||||
'docs/designs/FIX_1671_PROFILE_MIGRATION.md',
|
||||
'docs/designs/PLAN_TUNING_V0.md',
|
||||
'docs/designs/PLAN_TUNING_V1.md',
|
||||
]);
|
||||
|
||||
// Directories to skip when walking the repo. Everything else is in scope —
|
||||
// any skill dir, migration script, resolver, or new top-level dir gets
|
||||
// covered automatically as the repo grows. Catches the "future contributor
|
||||
// adds the legacy write in retro/SKILL.md.tmpl" regression class.
|
||||
const SKIP_DIRS = new Set<string>([
|
||||
'node_modules', '.git', '.github', 'dist', 'test', 'docs',
|
||||
// Vendored binaries / build outputs.
|
||||
'browse/dist', 'design/dist', 'extension/node_modules',
|
||||
// The plan file's directory was already in ALLOWED_FILES; skip docs/ entirely.
|
||||
]);
|
||||
|
||||
function listSearchDirs(): string[] {
|
||||
return fs
|
||||
.readdirSync(ROOT, { withFileTypes: true })
|
||||
.filter((d) => d.isDirectory() && !SKIP_DIRS.has(d.name) && !d.name.startsWith('.'))
|
||||
.map((d) => d.name);
|
||||
}
|
||||
|
||||
const SEARCH_DIRS = listSearchDirs();
|
||||
|
||||
function* walk(dir: string): Generator<string> {
|
||||
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
||||
const p = path.join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
yield* walk(p);
|
||||
} else if (entry.isFile()) {
|
||||
yield p;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Match any literal-path append/write pattern targeting builder-profile.jsonl.
|
||||
// Captures: `>> .../builder-profile.jsonl`, `writeFileSync(...builder-profile.jsonl...)`,
|
||||
// `> .../builder-profile.jsonl`. NOTE: this only catches LITERAL-PATH writes —
|
||||
// variable-indirected writes (`FILE=...builder-profile.jsonl; echo >> "$FILE"`)
|
||||
// are not detected. The SKILL.md.tmpl assertions below pin the exact #1671
|
||||
// regression class directly; this regex is a backstop against the obvious
|
||||
// pattern, not a comprehensive variable-flow analyzer.
|
||||
const WRITE_PATTERN = /(>>?\s*["']?[^"'\s]*builder-profile\.jsonl|writeFileSync\([^)]*builder-profile\.jsonl|appendFileSync\([^)]*builder-profile\.jsonl)/;
|
||||
|
||||
describe('#1671 invariant: no production code writes to builder-profile.jsonl', () => {
|
||||
test('only allowlisted files mention writes to builder-profile.jsonl', () => {
|
||||
const offending: { file: string; line: number; content: string }[] = [];
|
||||
|
||||
for (const searchDir of SEARCH_DIRS) {
|
||||
const fullDir = path.join(ROOT, searchDir);
|
||||
if (!fs.existsSync(fullDir)) continue;
|
||||
|
||||
for (const filePath of walk(fullDir)) {
|
||||
const rel = path.relative(ROOT, filePath);
|
||||
|
||||
// Skip allowlisted files.
|
||||
if (ALLOWED_FILES.has(rel)) continue;
|
||||
|
||||
// Only check text-like extensions to avoid binary files.
|
||||
if (!/\.(sh|ts|js|md|tmpl)$/.test(rel) && !rel.startsWith('bin/')) continue;
|
||||
|
||||
let content: string;
|
||||
try {
|
||||
content = fs.readFileSync(filePath, 'utf-8');
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
const lines = content.split('\n');
|
||||
lines.forEach((line, idx) => {
|
||||
if (WRITE_PATTERN.test(line)) {
|
||||
offending.push({ file: rel, line: idx + 1, content: line.trim() });
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (offending.length > 0) {
|
||||
const msg = offending
|
||||
.map((o) => ` ${o.file}:${o.line} ${o.content}`)
|
||||
.join('\n');
|
||||
throw new Error(
|
||||
`Found production writes to builder-profile.jsonl outside the allowlist.\n` +
|
||||
`These would re-create #1671 (writer/reader file mismatch).\n` +
|
||||
`Use \`gstack-developer-profile --log-session\` instead.\n${msg}`,
|
||||
);
|
||||
}
|
||||
expect(offending).toEqual([]);
|
||||
});
|
||||
|
||||
test('office-hours/SKILL.md uses --log-session, not raw echo append', () => {
|
||||
const skill = fs.readFileSync(path.join(ROOT, 'office-hours/SKILL.md'), 'utf-8');
|
||||
// The two known writer call-sites must use the new subcommand.
|
||||
expect(skill).toContain('gstack-developer-profile --log-session');
|
||||
// And must NOT contain the old echo-append pattern.
|
||||
expect(skill).not.toMatch(/echo\s+['"][^'"]*['"]?\s*>>\s*["'][^"']*builder-profile\.jsonl/);
|
||||
});
|
||||
|
||||
test('office-hours/SKILL.md.tmpl uses --log-session, not raw echo append', () => {
|
||||
const tmpl = fs.readFileSync(path.join(ROOT, 'office-hours/SKILL.md.tmpl'), 'utf-8');
|
||||
expect(tmpl).toContain('gstack-developer-profile --log-session');
|
||||
expect(tmpl).not.toMatch(/echo\s+['"][^'"]*['"]?\s*>>\s*["'][^"']*builder-profile\.jsonl/);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,151 @@
|
||||
/**
|
||||
* Unit tests for the terse-build flag (v1.46.0.0 T3).
|
||||
*
|
||||
* `--explain-level=terse` makes the gen-skill-docs pipeline drop 4 preamble
|
||||
* sections at gen time. Default builds keep them. Without these tests, a
|
||||
* refactor that breaks the explainLevel threading silently regresses one
|
||||
* of the opt-in compression paths — the runtime EXPLAIN_LEVEL: terse runtime
|
||||
* gate still works, so users wouldn't notice immediately.
|
||||
*
|
||||
* Pure-function tests against the resolvers — fast, free, no subprocess.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import type { TemplateContext } from '../scripts/resolvers/types';
|
||||
import { generateWritingStyle } from '../scripts/resolvers/preamble/generate-writing-style';
|
||||
import { generateCompletenessSection } from '../scripts/resolvers/preamble/generate-completeness-section';
|
||||
import { generateConfusionProtocol } from '../scripts/resolvers/preamble/generate-confusion-protocol';
|
||||
import { generateContextHealth } from '../scripts/resolvers/preamble/generate-context-health';
|
||||
import { generatePreamble } from '../scripts/resolvers/preamble';
|
||||
|
||||
function makeCtx(explainLevel?: 'default' | 'terse', tier: number = 4): TemplateContext {
|
||||
return {
|
||||
skillName: 'test-skill',
|
||||
tmplPath: '/tmp/test/SKILL.md.tmpl',
|
||||
host: 'claude',
|
||||
paths: {
|
||||
skillRoot: '~/.claude/skills/gstack',
|
||||
localSkillRoot: '.claude/skills',
|
||||
binDir: '~/.claude/skills/gstack/bin',
|
||||
browseDir: '~/.claude/skills/gstack/browse/dist',
|
||||
designDir: '~/.claude/skills/gstack/design/dist',
|
||||
makePdfDir: '~/.claude/skills/gstack/make-pdf/dist',
|
||||
},
|
||||
preambleTier: tier,
|
||||
explainLevel,
|
||||
};
|
||||
}
|
||||
|
||||
describe('terse build — per-resolver behavior', () => {
|
||||
describe('generateWritingStyle', () => {
|
||||
test('default: emits full section with jargon-list pointer', () => {
|
||||
const out = generateWritingStyle(makeCtx('default'));
|
||||
expect(out).toContain('## Writing Style');
|
||||
expect(out).toContain('jargon-list.json');
|
||||
expect(out).toContain('Curated jargon list');
|
||||
expect(out).toContain('outcome');
|
||||
});
|
||||
|
||||
test('terse: emits one-line terse directive only', () => {
|
||||
const out = generateWritingStyle(makeCtx('terse'));
|
||||
expect(out).toContain('## Writing Style');
|
||||
expect(out).toContain('Terse mode (build-time)');
|
||||
// Negative: NONE of the default-mode prose
|
||||
expect(out).not.toContain('jargon-list.json');
|
||||
expect(out).not.toContain('Curated jargon list');
|
||||
expect(out).not.toContain('Frame questions in outcome terms');
|
||||
});
|
||||
|
||||
test('terse is meaningfully shorter than default', () => {
|
||||
const fullLen = generateWritingStyle(makeCtx('default')).length;
|
||||
const terseLen = generateWritingStyle(makeCtx('terse')).length;
|
||||
expect(terseLen).toBeLessThan(fullLen / 3);
|
||||
});
|
||||
});
|
||||
|
||||
describe('generateCompletenessSection', () => {
|
||||
test('default: emits full section with Boil-the-Lake prose', () => {
|
||||
const out = generateCompletenessSection(makeCtx('default'));
|
||||
expect(out).toContain('## Completeness Principle');
|
||||
expect(out).toContain('Boil the Lake');
|
||||
});
|
||||
|
||||
test('terse: returns empty string', () => {
|
||||
expect(generateCompletenessSection(makeCtx('terse'))).toBe('');
|
||||
});
|
||||
|
||||
test('no ctx arg: defaults to non-terse (back-compat with old callers)', () => {
|
||||
const out = generateCompletenessSection();
|
||||
expect(out).toContain('## Completeness Principle');
|
||||
});
|
||||
});
|
||||
|
||||
describe('generateConfusionProtocol', () => {
|
||||
test('default: emits full section', () => {
|
||||
const out = generateConfusionProtocol(makeCtx('default'));
|
||||
expect(out).toContain('## Confusion Protocol');
|
||||
expect(out).toContain('high-stakes ambiguity');
|
||||
});
|
||||
|
||||
test('terse: returns empty string', () => {
|
||||
expect(generateConfusionProtocol(makeCtx('terse'))).toBe('');
|
||||
});
|
||||
|
||||
test('no ctx arg: defaults to non-terse', () => {
|
||||
expect(generateConfusionProtocol()).toContain('## Confusion Protocol');
|
||||
});
|
||||
});
|
||||
|
||||
describe('generateContextHealth', () => {
|
||||
test('default: emits full section', () => {
|
||||
const out = generateContextHealth(makeCtx('default'));
|
||||
expect(out).toContain('## Context Health');
|
||||
expect(out).toContain('PROGRESS');
|
||||
});
|
||||
|
||||
test('terse: returns empty string', () => {
|
||||
expect(generateContextHealth(makeCtx('terse'))).toBe('');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('terse build — generatePreamble integration', () => {
|
||||
test('default tier-2 preamble includes all 4 terse-gated sections', () => {
|
||||
const out = generatePreamble(makeCtx('default', 2));
|
||||
expect(out).toContain('## Writing Style');
|
||||
expect(out).toContain('## Completeness Principle');
|
||||
expect(out).toContain('## Confusion Protocol');
|
||||
expect(out).toContain('## Context Health');
|
||||
});
|
||||
|
||||
test('terse tier-2 preamble drops 3 of 4 sections + collapses Writing Style', () => {
|
||||
const out = generatePreamble(makeCtx('terse', 2));
|
||||
// Writing Style heading still present (collapsed to one line)
|
||||
expect(out).toContain('## Writing Style');
|
||||
expect(out).toContain('Terse mode (build-time)');
|
||||
// Three sections dropped entirely
|
||||
expect(out).not.toContain('## Completeness Principle');
|
||||
expect(out).not.toContain('## Confusion Protocol');
|
||||
expect(out).not.toContain('## Context Health');
|
||||
});
|
||||
|
||||
test('terse preamble is measurably smaller', () => {
|
||||
const defaultLen = generatePreamble(makeCtx('default', 2)).length;
|
||||
const terseLen = generatePreamble(makeCtx('terse', 2)).length;
|
||||
// Saving roughly 2-4 KB across the 4 sections; assert at least 1 KB saved.
|
||||
expect(defaultLen - terseLen).toBeGreaterThan(1024);
|
||||
});
|
||||
|
||||
test('terse preamble at tier 1 is identical to default (terse only affects tier-2+ sections)', () => {
|
||||
// Tier 1 doesn't include the 4 terse-gated sections in the first place.
|
||||
const defaultT1 = generatePreamble(makeCtx('default', 1));
|
||||
const terseT1 = generatePreamble(makeCtx('terse', 1));
|
||||
expect(terseT1).toBe(defaultT1);
|
||||
});
|
||||
|
||||
test('explainLevel undefined behaves as default', () => {
|
||||
const undefinedOut = generatePreamble(makeCtx(undefined, 2));
|
||||
const defaultOut = generatePreamble(makeCtx('default', 2));
|
||||
expect(undefinedOut).toBe(defaultOut);
|
||||
});
|
||||
});
|
||||
+26
-1
@@ -1,5 +1,5 @@
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import { execSync, ExecSyncOptionsWithStringEncoding } from 'child_process';
|
||||
import { execFileSync, execSync, ExecSyncOptionsWithStringEncoding } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
@@ -42,6 +42,20 @@ function runRead(args: string = ''): string {
|
||||
}
|
||||
}
|
||||
|
||||
function runReadArgs(args: string[] = []): string {
|
||||
const execOpts: ExecSyncOptionsWithStringEncoding = {
|
||||
cwd: ROOT,
|
||||
env: { ...process.env, GSTACK_HOME: tmpDir },
|
||||
encoding: 'utf-8',
|
||||
timeout: 15000,
|
||||
};
|
||||
try {
|
||||
return execFileSync(path.join(BIN, 'gstack-timeline-read'), args, execOpts).trim();
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-timeline-'));
|
||||
slugDir = path.join(tmpDir, 'projects');
|
||||
@@ -136,6 +150,17 @@ describe('gstack-timeline-read', () => {
|
||||
expect(output).not.toContain('feature-b');
|
||||
});
|
||||
|
||||
test('filters branch names containing single quotes', () => {
|
||||
runLog(JSON.stringify({ skill: 'review', event: 'completed', branch: "feature/o'hare", outcome: 'approved', ts: '2026-03-28T10:00:00Z' }));
|
||||
runLog(JSON.stringify({ skill: 'ship', event: 'completed', branch: 'feature-other', outcome: 'merged', ts: '2026-03-28T11:00:00Z' }));
|
||||
|
||||
const output = runReadArgs(['--branch', "feature/o'hare"]);
|
||||
|
||||
expect(output).toContain('review');
|
||||
expect(output).toContain("feature/o'hare");
|
||||
expect(output).not.toContain('feature-other');
|
||||
});
|
||||
|
||||
test('limits output with --limit', () => {
|
||||
for (let i = 0; i < 5; i++) {
|
||||
runLog(JSON.stringify({ skill: 'review', event: 'completed', branch: 'main', outcome: 'approved', ts: `2026-03-2${i}T10:00:00Z` }));
|
||||
|
||||
@@ -49,11 +49,17 @@ describe('Writing Style preamble section', () => {
|
||||
expect(out).toMatch(/terse|no explanations|user-turn override|current message/i);
|
||||
});
|
||||
|
||||
test('tier 2+ preamble inlines jargon list', () => {
|
||||
test('tier 2+ preamble references jargon list by path (v1.45.0.0 T3 — pointer, not inline)', () => {
|
||||
const out = generatePreamble(makeCtx('claude', 2));
|
||||
// Spot-check a few terms from scripts/jargon-list.json
|
||||
expect(out).toContain('idempotent');
|
||||
expect(out).toContain('race condition');
|
||||
// T3 dedup: the 80-term jargon list lives in scripts/jargon-list.json.
|
||||
// The Writing Style section points at the file rather than inlining it,
|
||||
// saving ~70 KB across the corpus. Agents Read the JSON on first
|
||||
// jargon term encountered per session.
|
||||
expect(out).toContain('jargon-list.json');
|
||||
expect(out).toContain('Curated jargon list');
|
||||
// Negative check: the literal term lines should NOT be inlined any more.
|
||||
expect(out).not.toMatch(/^- idempotent$/m);
|
||||
expect(out).not.toMatch(/^- race condition$/m);
|
||||
});
|
||||
|
||||
test('tier 2+ preamble includes terse-mode gate condition', () => {
|
||||
|
||||
Reference in New Issue
Block a user