Merge branch 'main' into garrytan/team-supabase-store

Brings in 48 commits from main (v0.15.7–v0.15.16): deterministic slugs,
TabSession refactor, pair-agent tunnel fix, content security layers,
community security wave, team-friendly install, interactive snapshots.

Conflict resolution:
- .gitignore: merged both sides (kept .factory/ + added .kiro/.opencode/
  .slate/.cursor/.openclaw/ from main)
- open-gstack-browser/SKILL.md: accepted main (renamed from .factory/)
- setup-team-sync/SKILL.md: regenerated via gen:skill-docs
- test/fixtures/golden/*: updated golden baselines for ship SKILL.md
- codex-ship-SKILL.md: accepted main (renamed from .factory/)
- package.json version: synced to VERSION (0.15.16.0)
- bin/gstack-uninstall: check settings file exists before claiming
  SessionStart hook removal (fixes false positive on clean systems)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-07 20:47:07 -10:00
258 changed files with 55174 additions and 2692 deletions
+34 -7
View File
@@ -45,15 +45,17 @@ describe('Audit compliance', () => {
expect(completionSection).toContain('_TEL" != "off"');
});
// Fix 3: W012 — Bun install is version-pinned
test('bun install commands use version pinning', () => {
// Round 2 Fix 1: W012 — Bun install uses checksum verification
test('bun install uses checksum-verified method', () => {
const browseResolver = readFileSync(join(ROOT, 'scripts/resolvers/browse.ts'), 'utf-8');
expect(browseResolver).toContain('BUN_VERSION');
// Should not have unpinned curl|bash (without BUN_VERSION on same line)
const lines = browseResolver.split('\n');
expect(browseResolver).toContain('shasum -a 256');
expect(browseResolver).toContain('BUN_INSTALL_SHA');
const setup = readFileSync(join(ROOT, 'setup'), 'utf-8');
// Setup error message should not have unverified curl|bash
const lines = setup.split('\n');
for (const line of lines) {
if (line.includes('bun.sh/install') && line.includes('bash') && !line.includes('BUN_VERSION') && !line.includes('command -v')) {
throw new Error(`Unpinned bun install found: ${line.trim()}`);
if (line.includes('bun.sh/install') && line.includes('| bash') && !line.includes('shasum')) {
throw new Error(`Unverified bun install found: ${line.trim()}`);
}
}
});
@@ -69,6 +71,17 @@ describe('Audit compliance', () => {
expect(between.toLowerCase()).toContain('untrusted');
});
// Round 2 Fix 2: Trust boundary markers + helper + wrapping in all paths
test('browse wraps untrusted content with trust boundary markers', () => {
const commands = readFileSync(join(ROOT, 'browse/src/commands.ts'), 'utf-8');
expect(commands).toContain('PAGE_CONTENT_COMMANDS');
expect(commands).toContain('wrapUntrustedContent');
const server = readFileSync(join(ROOT, 'browse/src/server.ts'), 'utf-8');
expect(server).toContain('wrapUntrustedContent');
const meta = readFileSync(join(ROOT, 'browse/src/meta-commands.ts'), 'utf-8');
expect(meta).toContain('wrapUntrustedContent');
});
// Fix 5: Data flow documentation in review.ts
test('review.ts has data flow documentation', () => {
const review = readFileSync(join(ROOT, 'scripts/resolvers/review.ts'), 'utf-8');
@@ -76,6 +89,20 @@ describe('Audit compliance', () => {
expect(review).toContain('Data NOT sent');
});
// Round 2 Fix 3: Extension sender validation + message type allowlist
test('extension background.js validates message sender', () => {
const bg = readFileSync(join(ROOT, 'extension/background.js'), 'utf-8');
expect(bg).toContain('sender.id !== chrome.runtime.id');
expect(bg).toContain('ALLOWED_TYPES');
});
// Round 2 Fix 4: Chrome CDP binds to localhost only
test('chrome-cdp binds to localhost only', () => {
const cdp = readFileSync(join(ROOT, 'bin/chrome-cdp'), 'utf-8');
expect(cdp).toContain('--remote-debugging-address=127.0.0.1');
expect(cdp).toContain('--remote-allow-origins=');
});
// Fix 2+6: All generated SKILL.md files with telemetry are conditional
test('all generated SKILL.md files with telemetry calls use conditional pattern', () => {
const skills = getAllSkillMds();
+165
View File
@@ -0,0 +1,165 @@
/**
* Tests for bin/gstack-diff-scope — verifies scope signal detection.
*
* Creates temp git repos with specific file patterns and verifies
* the correct SCOPE_* variables are output.
*/
import { describe, test, expect, afterAll } from 'bun:test';
import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from 'fs';
import { join } from 'path';
import { tmpdir } from 'os';
import { spawnSync } from 'child_process';
const SCRIPT = join(import.meta.dir, '..', 'bin', 'gstack-diff-scope');
const dirs: string[] = [];
function createRepo(files: string[]): string {
const dir = mkdtempSync(join(tmpdir(), 'diff-scope-test-'));
dirs.push(dir);
const run = (cmd: string, args: string[]) =>
spawnSync(cmd, args, { cwd: dir, stdio: 'pipe', timeout: 5000 });
run('git', ['init', '-b', 'main']);
run('git', ['config', 'user.email', 'test@test.com']);
run('git', ['config', 'user.name', 'Test']);
// Base commit
writeFileSync(join(dir, 'README.md'), '# test\n');
run('git', ['add', '.']);
run('git', ['commit', '-m', 'initial']);
// Feature branch with specified files
run('git', ['checkout', '-b', 'feature/test']);
for (const f of files) {
const fullPath = join(dir, f);
const dirPath = fullPath.substring(0, fullPath.lastIndexOf('/'));
if (dirPath !== dir) mkdirSync(dirPath, { recursive: true });
writeFileSync(fullPath, '# test content\n');
}
run('git', ['add', '.']);
run('git', ['commit', '-m', 'add files']);
return dir;
}
function runScope(dir: string): Record<string, string> {
const result = spawnSync('bash', [SCRIPT, 'main'], {
cwd: dir, stdio: 'pipe', timeout: 5000,
});
const output = result.stdout.toString().trim();
const vars: Record<string, string> = {};
for (const line of output.split('\n')) {
const [key, val] = line.split('=');
if (key && val) vars[key] = val;
}
return vars;
}
afterAll(() => {
for (const d of dirs) {
try { rmSync(d, { recursive: true, force: true }); } catch {}
}
});
describe('gstack-diff-scope', () => {
// --- Existing scope signals ---
test('detects frontend files', () => {
const dir = createRepo(['styles.css', 'component.tsx']);
const scope = runScope(dir);
expect(scope.SCOPE_FRONTEND).toBe('true');
});
test('detects backend files', () => {
const dir = createRepo(['app.rb', 'service.py']);
const scope = runScope(dir);
expect(scope.SCOPE_BACKEND).toBe('true');
});
test('detects test files', () => {
const dir = createRepo(['test/app.test.ts']);
const scope = runScope(dir);
expect(scope.SCOPE_TESTS).toBe('true');
});
// --- New scope signals (Review Army) ---
test('detects migrations via db/migrate/', () => {
const dir = createRepo(['db/migrate/20260330_create_users.rb']);
const scope = runScope(dir);
expect(scope.SCOPE_MIGRATIONS).toBe('true');
});
test('detects migrations via generic migrations/', () => {
const dir = createRepo(['app/migrations/0001_initial.py']);
const scope = runScope(dir);
expect(scope.SCOPE_MIGRATIONS).toBe('true');
});
test('detects migrations via prisma', () => {
const dir = createRepo(['prisma/migrations/20260330/migration.sql']);
const scope = runScope(dir);
expect(scope.SCOPE_MIGRATIONS).toBe('true');
});
test('detects API via controller files', () => {
const dir = createRepo(['app/controllers/users_controller.rb']);
const scope = runScope(dir);
expect(scope.SCOPE_API).toBe('true');
});
test('detects API via route files', () => {
const dir = createRepo(['src/routes/api.ts']);
const scope = runScope(dir);
expect(scope.SCOPE_API).toBe('true');
});
test('detects API via GraphQL schemas', () => {
const dir = createRepo(['schema.graphql']);
const scope = runScope(dir);
expect(scope.SCOPE_API).toBe('true');
});
test('detects auth files', () => {
const dir = createRepo(['app/services/auth_service.rb']);
const scope = runScope(dir);
expect(scope.SCOPE_AUTH).toBe('true');
});
test('detects session files', () => {
const dir = createRepo(['lib/session_manager.ts']);
const scope = runScope(dir);
expect(scope.SCOPE_AUTH).toBe('true');
});
test('detects JWT files', () => {
const dir = createRepo(['utils/jwt_helper.py']);
const scope = runScope(dir);
expect(scope.SCOPE_AUTH).toBe('true');
});
test('returns false for all new signals when no matching files', () => {
const dir = createRepo(['docs/readme.md', 'config.yml']);
const scope = runScope(dir);
expect(scope.SCOPE_MIGRATIONS).toBe('false');
expect(scope.SCOPE_API).toBe('false');
expect(scope.SCOPE_AUTH).toBe('false');
});
test('outputs all 9 scope variables', () => {
const dir = createRepo(['app.ts']);
const scope = runScope(dir);
expect(Object.keys(scope)).toHaveLength(9);
expect(scope).toHaveProperty('SCOPE_FRONTEND');
expect(scope).toHaveProperty('SCOPE_BACKEND');
expect(scope).toHaveProperty('SCOPE_PROMPTS');
expect(scope).toHaveProperty('SCOPE_TESTS');
expect(scope).toHaveProperty('SCOPE_DOCS');
expect(scope).toHaveProperty('SCOPE_CONFIG');
expect(scope).toHaveProperty('SCOPE_MIGRATIONS');
expect(scope).toHaveProperty('SCOPE_API');
expect(scope).toHaveProperty('SCOPE_AUTH');
});
});
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+5
View File
@@ -0,0 +1,5 @@
-- Migration: Drop user email column
-- WARNING: This migration is intentionally unsafe for testing
ALTER TABLE users DROP COLUMN email;
ALTER TABLE users DROP COLUMN phone_number;
-- No backfill, no reversibility check, no data preservation
+12
View File
@@ -0,0 +1,12 @@
# N+1 query example — intentionally bad for testing
class PostsController
def index
@posts = Post.all
@posts.each do |post|
# N+1: queries Author table for every post
puts post.author.name
# N+1: queries Comments table for every post
puts post.comments.count
end
end
end
+647 -34
View File
@@ -213,11 +213,20 @@ describe('gen-skill-docs', () => {
expect(browseTmpl).toContain('{{PREAMBLE}}');
});
test('generated SKILL.md contains contributor mode check', () => {
test('generated SKILL.md contains operational self-improvement (replaced contributor mode)', () => {
const content = fs.readFileSync(path.join(ROOT, 'SKILL.md'), 'utf-8');
expect(content).toContain('Contributor Mode');
expect(content).toContain('gstack_contributor');
expect(content).toContain('contributor-logs');
expect(content).not.toContain('Contributor Mode');
expect(content).not.toContain('gstack_contributor');
expect(content).not.toContain('contributor-logs');
expect(content).toContain('Operational Self-Improvement');
expect(content).toContain('gstack-learnings-log');
expect(content).toContain('gstack-learnings-search --limit 3');
});
test('generated SKILL.md with LEARNINGS_LOG contains operational type', () => {
// Check a skill that has LEARNINGS_LOG (e.g., review)
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
expect(content).toContain('operational');
});
test('generated SKILL.md contains session awareness', () => {
@@ -586,10 +595,12 @@ describe('REVIEW_DASHBOARD resolver', () => {
expect(content).toContain('/plan-ceo-review');
});
test('plan-design-review chaining mentions eng and ceo reviews', () => {
test('plan-design-review chaining mentions eng, ceo, and design skills', () => {
const content = fs.readFileSync(path.join(ROOT, 'plan-design-review', 'SKILL.md'), 'utf-8');
expect(content).toContain('/plan-eng-review');
expect(content).toContain('/plan-ceo-review');
expect(content).toContain('/design-shotgun');
expect(content).toContain('/design-html');
});
test('ship does NOT contain review chaining', () => {
@@ -605,7 +616,8 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
const shipSkill = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
const reviewSkill = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
test('all three modes share codepath tracing methodology', () => {
test('plan and ship modes share codepath tracing methodology', () => {
// Review mode delegates test coverage to the Testing specialist subagent (Review Army)
const sharedPhrases = [
'Trace data flow',
'Diagram the execution',
@@ -617,33 +629,40 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
for (const phrase of sharedPhrases) {
expect(planSkill).toContain(phrase);
expect(shipSkill).toContain(phrase);
expect(reviewSkill).toContain(phrase);
}
// Plan mode traces the plan, not a git diff
expect(planSkill).toContain('Trace every codepath in the plan');
expect(planSkill).not.toContain('git diff origin');
// Ship and review modes trace the diff
// Ship mode traces the diff
expect(shipSkill).toContain('Trace every codepath changed');
expect(reviewSkill).toContain('Trace every codepath changed');
});
test('all three modes include E2E decision matrix', () => {
for (const skill of [planSkill, shipSkill, reviewSkill]) {
test('review mode uses Review Army for specialist dispatch', () => {
expect(reviewSkill).toContain('Review Army');
expect(reviewSkill).toContain('Specialist Dispatch');
expect(reviewSkill).toContain('testing.md');
});
test('plan and ship modes include E2E decision matrix', () => {
// Review mode delegates to Testing specialist
for (const skill of [planSkill, shipSkill]) {
expect(skill).toContain('E2E Test Decision Matrix');
expect(skill).toContain('→E2E');
expect(skill).toContain('→EVAL');
}
});
test('all three modes include regression rule', () => {
for (const skill of [planSkill, shipSkill, reviewSkill]) {
test('plan and ship modes include regression rule', () => {
// Review mode delegates to Testing specialist
for (const skill of [planSkill, shipSkill]) {
expect(skill).toContain('REGRESSION RULE');
expect(skill).toContain('IRON RULE');
}
});
test('all three modes include test framework detection', () => {
for (const skill of [planSkill, shipSkill, reviewSkill]) {
test('plan and ship modes include test framework detection', () => {
// Review mode delegates to Testing specialist
for (const skill of [planSkill, shipSkill]) {
expect(skill).toContain('Test Framework Detection');
expect(skill).toContain('CLAUDE.md');
}
@@ -662,11 +681,12 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
expect(shipSkill).toContain('ship-test-plan');
});
test('review mode generates via Fix-First + gaps are INFORMATIONAL', () => {
test('review mode uses Fix-First + Review Army for specialist coverage', () => {
expect(reviewSkill).toContain('Fix-First');
expect(reviewSkill).toContain('INFORMATIONAL');
expect(reviewSkill).toContain('Step 4.75');
expect(reviewSkill).toContain('subsumes the "Test Gaps" category');
// Review Army handles test coverage via Testing specialist subagent
expect(reviewSkill).toContain('Review Army');
expect(reviewSkill).toContain('Testing');
});
test('plan mode does NOT include ship-specific content', () => {
@@ -681,6 +701,35 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
expect(reviewSkill).not.toContain('ship-test-plan');
});
test('review/specialists/ directory has all expected checklist files', () => {
const specDir = path.join(ROOT, 'review', 'specialists');
const expected = [
'testing.md',
'maintainability.md',
'security.md',
'performance.md',
'data-migration.md',
'api-contract.md',
'red-team.md',
];
for (const f of expected) {
expect(fs.existsSync(path.join(specDir, f))).toBe(true);
}
});
test('each specialist file has standard header with scope and output format', () => {
const specDir = path.join(ROOT, 'review', 'specialists');
const files = fs.readdirSync(specDir).filter(f => f.endsWith('.md'));
for (const f of files) {
const content = fs.readFileSync(path.join(specDir, f), 'utf-8');
// All specialist files must have Scope and Output/JSON in header
expect(content).toContain('Scope:');
expect(content.toLowerCase()).toMatch(/output|json/);
// Must define NO FINDINGS behavior
expect(content).toContain('NO FINDINGS');
}
});
// Regression guard: ship output contains key phrases from before the refactor
test('ship SKILL.md regression guard — key phrases preserved', () => {
const regressionPhrases = [
@@ -700,6 +749,22 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
expect(shipSkill).toContain(phrase);
}
});
test('ship SKILL.md contains review army specialist dispatch', () => {
expect(shipSkill).toContain('Specialist Dispatch');
expect(shipSkill).toContain('Step 3.55');
expect(shipSkill).toContain('Step 3.56');
});
test('ship SKILL.md contains cross-review finding dedup', () => {
expect(shipSkill).toContain('Cross-review finding dedup');
expect(shipSkill).toContain('Step 3.57');
});
test('ship SKILL.md contains re-run idempotency behavior', () => {
expect(shipSkill).toContain('Re-run behavior (idempotency)');
expect(shipSkill).toContain('Never skip a verification step');
});
});
// --- {{TEST_FAILURE_TRIAGE}} resolver tests ---
@@ -868,12 +933,9 @@ describe('Coverage gate in ship', () => {
expect(shipSkill).toContain('could not determine percentage — skipping');
});
test('review SKILL.md contains coverage WARNING', () => {
expect(reviewSkill).toContain('COVERAGE WARNING');
expect(reviewSkill).toContain('Consider writing tests before running /ship');
});
test('review coverage warning is INFORMATIONAL', () => {
test('review SKILL.md delegates coverage to Testing specialist', () => {
// Coverage audit moved to Testing specialist subagent in Review Army
expect(reviewSkill).toContain('testing.md');
expect(reviewSkill).toContain('INFORMATIONAL');
});
});
@@ -942,6 +1004,18 @@ describe('Plan status footer in preamble', () => {
});
});
// --- Skill invocation during plan mode in preamble ---
describe('Skill invocation during plan mode in preamble', () => {
test('preamble contains skill invocation plan mode section', () => {
const content = fs.readFileSync(path.join(ROOT, 'office-hours', 'SKILL.md'), 'utf-8');
expect(content).toContain('Skill Invocation During Plan Mode');
expect(content).toContain('precedence over generic plan mode behavior');
expect(content).toContain('Do not continue the workflow');
expect(content).toContain('cancel the skill or leave plan mode');
});
});
// --- {{SPEC_REVIEW_LOOP}} resolver tests ---
describe('SPEC_REVIEW_LOOP resolver', () => {
@@ -1153,6 +1227,138 @@ describe('BENEFITS_FROM resolver', () => {
expect(ceoContent).toContain('office-hours/SKILL.md');
expect(engContent).toContain('office-hours/SKILL.md');
});
test('BENEFITS_FROM delegates to INVOKE_SKILL pattern', () => {
// Should contain the INVOKE_SKILL-style loading prose (not the old manual skip list)
expect(engContent).toContain('Follow its instructions from top to bottom');
expect(engContent).toContain('skipping these sections');
expect(ceoContent).toContain('Follow its instructions from top to bottom');
});
});
// --- {{INVOKE_SKILL}} resolver tests ---
describe('INVOKE_SKILL resolver', () => {
const ceoContent = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8');
test('plan-ceo-review uses INVOKE_SKILL for mid-session office-hours fallback', () => {
// The mid-session detection path should use INVOKE_SKILL-generated prose
expect(ceoContent).toContain('office-hours/SKILL.md');
expect(ceoContent).toContain('Follow its instructions from top to bottom');
});
test('INVOKE_SKILL output includes default skip list', () => {
expect(ceoContent).toContain('Preamble (run first)');
expect(ceoContent).toContain('Telemetry (run last)');
expect(ceoContent).toContain('AskUserQuestion Format');
});
test('INVOKE_SKILL output includes error handling', () => {
expect(ceoContent).toContain('If unreadable');
expect(ceoContent).toContain('Could not load');
});
test('template uses {{INVOKE_SKILL:office-hours}} placeholder', () => {
const tmpl = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md.tmpl'), 'utf-8');
expect(tmpl).toContain('{{INVOKE_SKILL:office-hours}}');
});
});
// --- {{CHANGELOG_WORKFLOW}} resolver tests ---
describe('CHANGELOG_WORKFLOW resolver', () => {
const shipContent = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
test('ship SKILL.md contains changelog workflow', () => {
expect(shipContent).toContain('CHANGELOG (auto-generate)');
expect(shipContent).toContain('git log <base>..HEAD --oneline');
});
test('changelog workflow includes cross-check step', () => {
expect(shipContent).toContain('Cross-check');
expect(shipContent).toContain('Every commit must map to at least one bullet point');
});
test('changelog workflow includes voice guidance', () => {
expect(shipContent).toContain('Lead with what the user can now **do**');
});
test('template uses {{CHANGELOG_WORKFLOW}} placeholder', () => {
const tmpl = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md.tmpl'), 'utf-8');
expect(tmpl).toContain('{{CHANGELOG_WORKFLOW}}');
// Should NOT contain the old inline changelog content
expect(tmpl).not.toContain('Group commits by theme');
});
test('changelog workflow includes keep-changelog format', () => {
expect(shipContent).toContain('### Added');
expect(shipContent).toContain('### Fixed');
});
});
// --- Parameterized resolver infrastructure tests ---
describe('parameterized resolver support', () => {
test('gen-skill-docs regex handles colon-separated args', () => {
// Verify the template containing {{INVOKE_SKILL:office-hours}} was processed
// without leaving unresolved placeholders
const ceoContent = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8');
expect(ceoContent).not.toMatch(/\{\{INVOKE_SKILL:[^}]+\}\}/);
});
test('templates with parameterized resolvers pass unresolved check', () => {
// All generated SKILL.md files should have no unresolved {{...}} placeholders
const skillDirs = fs.readdirSync(ROOT).filter(d =>
fs.existsSync(path.join(ROOT, d, 'SKILL.md'))
);
for (const dir of skillDirs) {
const content = fs.readFileSync(path.join(ROOT, dir, 'SKILL.md'), 'utf-8');
const unresolved = content.match(/\{\{[A-Z_]+(?::[^}]*)?\}\}/g);
if (unresolved) {
throw new Error(`${dir}/SKILL.md has unresolved placeholders: ${unresolved.join(', ')}`);
}
}
});
});
// --- Preamble routing injection tests ---
describe('preamble routing injection', () => {
const shipContent = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
test('preamble bash checks for routing section in CLAUDE.md', () => {
expect(shipContent).toContain('grep -q "## Skill routing" CLAUDE.md');
expect(shipContent).toContain('HAS_ROUTING');
});
test('preamble bash reads routing_declined config', () => {
expect(shipContent).toContain('routing_declined');
expect(shipContent).toContain('ROUTING_DECLINED');
});
test('preamble includes routing injection AskUserQuestion', () => {
expect(shipContent).toContain('Add routing rules to CLAUDE.md');
expect(shipContent).toContain("I'll invoke skills manually");
});
test('routing injection respects prior decline', () => {
expect(shipContent).toContain('ROUTING_DECLINED');
expect(shipContent).toMatch(/routing_declined.*true/);
});
test('routing injection only fires when all conditions met', () => {
// Must be: HAS_ROUTING=no AND ROUTING_DECLINED=false AND PROACTIVE_PROMPTED=yes
expect(shipContent).toContain('HAS_ROUTING');
expect(shipContent).toContain('ROUTING_DECLINED');
expect(shipContent).toContain('PROACTIVE_PROMPTED');
});
test('routing section content includes key routing rules', () => {
expect(shipContent).toContain('invoke office-hours');
expect(shipContent).toContain('invoke investigate');
expect(shipContent).toContain('invoke ship');
expect(shipContent).toContain('invoke qa');
});
});
// --- {{DESIGN_OUTSIDE_VOICES}} resolver tests ---
@@ -1470,10 +1676,9 @@ describe('Codex generation (--host codex)', () => {
const content = fs.readFileSync(path.join(AGENTS_DIR, 'gstack-review', 'SKILL.md'), 'utf-8');
// Correct: references to sidecar files use gstack/review/ path
expect(content).toContain('.agents/skills/gstack/review/checklist.md');
expect(content).toContain('.agents/skills/gstack/review/design-checklist.md');
// design-checklist.md is now referenced via Review Army specialist (Claude only, stripped for Codex)
// Wrong: must NOT reference gstack-review/checklist.md (file doesn't exist there)
expect(content).not.toContain('.agents/skills/gstack-review/checklist.md');
expect(content).not.toContain('.agents/skills/gstack-review/design-checklist.md');
});
test('sidecar paths in ship skill point to gstack/review/ for pre-landing review', () => {
@@ -1550,7 +1755,10 @@ describe('Codex generation (--host codex)', () => {
test('Claude output unchanged: all Claude skills have zero Codex paths', () => {
for (const skill of ALL_SKILLS) {
const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
expect(content).not.toContain('~/.codex/');
// pair-agent legitimately documents how Codex agents store credentials
if (skill.dir !== 'pair-agent') {
expect(content).not.toContain('~/.codex/');
}
// gstack-upgrade legitimately references .agents/skills for cross-platform detection
if (skill.dir !== 'gstack-upgrade') {
expect(content).not.toContain('.agents/skills');
@@ -1709,19 +1917,95 @@ describe('Factory generation (--host factory)', () => {
});
});
// ─── Parameterized host smoke tests (config-driven) ─────────
import { ALL_HOST_CONFIGS, getExternalHosts } from '../hosts/index';
describe('Parameterized host smoke tests', () => {
for (const hostConfig of getExternalHosts()) {
describe(`${hostConfig.displayName} (--host ${hostConfig.name})`, () => {
const hostDir = path.join(ROOT, hostConfig.hostSubdir, 'skills');
test('generates output that exists on disk', () => {
// Generated dir should exist (created by earlier bun run gen:skill-docs --host all)
if (!fs.existsSync(hostDir)) {
// Generate if not already done
Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', hostConfig.name], {
cwd: ROOT, stdout: 'pipe', stderr: 'pipe',
});
}
expect(fs.existsSync(hostDir)).toBe(true);
const skills = fs.readdirSync(hostDir).filter(d =>
fs.existsSync(path.join(hostDir, d, 'SKILL.md'))
);
expect(skills.length).toBeGreaterThan(0);
});
test('no .claude/skills path leakage in non-root skills', () => {
if (!fs.existsSync(hostDir)) return; // skip if not generated
const skills = fs.readdirSync(hostDir);
for (const skill of skills) {
// Skip root gstack skill — it contains preamble with intentional .claude/skills
// fallback paths for binary lookup and skill prefix instructions
if (skill === 'gstack') continue;
const skillMd = path.join(hostDir, skill, 'SKILL.md');
if (!fs.existsSync(skillMd)) continue;
const content = fs.readFileSync(skillMd, 'utf-8');
// Strip bash blocks (which have legitimate fallback paths)
const noBash = content.replace(/```bash\n[\s\S]*?```/g, '');
const leaks = noBash.split('\n').filter(l => l.includes('.claude/skills'));
if (leaks.length > 0) {
throw new Error(`${skill}: .claude/skills leakage:\n${leaks.slice(0, 3).join('\n')}`);
}
}
});
test('frontmatter has name and description', () => {
if (!fs.existsSync(hostDir)) return;
const skills = fs.readdirSync(hostDir);
for (const skill of skills) {
const skillMd = path.join(hostDir, skill, 'SKILL.md');
if (!fs.existsSync(skillMd)) continue;
const content = fs.readFileSync(skillMd, 'utf-8');
expect(content).toMatch(/^---\n/);
expect(content).toMatch(/^name:\s/m);
expect(content).toMatch(/^description:\s/m);
}
});
test('--dry-run freshness check passes', () => {
const result = Bun.spawnSync(
['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', hostConfig.name, '--dry-run'],
{ cwd: ROOT, stdout: 'pipe', stderr: 'pipe' }
);
expect(result.exitCode).toBe(0);
const output = result.stdout.toString();
expect(output).not.toContain('STALE');
});
if (hostConfig.generation.skipSkills?.includes('codex')) {
test('/codex skill excluded', () => {
expect(fs.existsSync(path.join(hostDir, 'gstack-codex', 'SKILL.md'))).toBe(false);
});
}
});
}
});
// ─── --host all tests ────────────────────────────────────────
describe('--host all', () => {
test('--host all generates for claude, codex, and factory', () => {
test('--host all generates for all registered hosts', () => {
const result = Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', 'all', '--dry-run'], {
cwd: ROOT, stdout: 'pipe', stderr: 'pipe',
});
expect(result.exitCode).toBe(0);
const output = result.stdout.toString();
// All three hosts should appear in output
// All hosts should appear in output
expect(output).toContain('FRESH: SKILL.md'); // claude
expect(output).toContain('FRESH: .agents/skills/'); // codex
expect(output).toContain('FRESH: .factory/skills/'); // factory
for (const hostConfig of getExternalHosts()) {
expect(output).toContain(`FRESH: ${hostConfig.hostSubdir}/skills/`);
}
});
});
@@ -1792,12 +2076,43 @@ describe('setup script validation', () => {
expect(fnBody).toContain('gstack*');
});
test('link_claude_skill_dirs creates relative symlinks', () => {
// Claude links should be relative: ln -snf "gstack/skill_name"
test('link_claude_skill_dirs creates real directories with absolute SKILL.md symlinks', () => {
// Claude links should be real directories with absolute SKILL.md symlinks
// to ensure Claude Code discovers them as top-level skills (not nested under gstack/)
const fnStart = setupContent.indexOf('link_claude_skill_dirs()');
const fnEnd = setupContent.indexOf('}', setupContent.indexOf('linked[@]}', fnStart));
const fnBody = setupContent.slice(fnStart, fnEnd);
expect(fnBody).toContain('ln -snf "gstack/$skill_name"');
expect(fnBody).toContain('mkdir -p "$target"');
expect(fnBody).toContain('ln -snf "$gstack_dir/$dir_name/SKILL.md" "$target/SKILL.md"');
});
// REGRESSION: cleanup functions must handle both old symlinks AND new real-directory pattern
test('cleanup functions handle real directories with symlinked SKILL.md', () => {
// cleanup_old_claude_symlinks must detect and remove real dirs with SKILL.md symlinks
const cleanupOldStart = setupContent.indexOf('cleanup_old_claude_symlinks()');
const cleanupOldEnd = setupContent.indexOf('}', setupContent.indexOf('cleaned up old', cleanupOldStart));
const cleanupOldBody = setupContent.slice(cleanupOldStart, cleanupOldEnd);
expect(cleanupOldBody).toContain('-d "$old_target"');
expect(cleanupOldBody).toContain('-L "$old_target/SKILL.md"');
expect(cleanupOldBody).toContain('rm -rf "$old_target"');
// cleanup_prefixed_claude_symlinks must also handle the new pattern
const cleanupPrefixedStart = setupContent.indexOf('cleanup_prefixed_claude_symlinks()');
const cleanupPrefixedEnd = setupContent.indexOf('}', setupContent.indexOf('cleaned up prefixed', cleanupPrefixedStart));
const cleanupPrefixedBody = setupContent.slice(cleanupPrefixedStart, cleanupPrefixedEnd);
expect(cleanupPrefixedBody).toContain('-d "$prefixed_target"');
expect(cleanupPrefixedBody).toContain('-L "$prefixed_target/SKILL.md"');
expect(cleanupPrefixedBody).toContain('rm -rf "$prefixed_target"');
});
// REGRESSION: link function must upgrade old directory symlinks
test('link_claude_skill_dirs removes old directory symlinks before creating real dirs', () => {
const fnStart = setupContent.indexOf('link_claude_skill_dirs()');
const fnEnd = setupContent.indexOf('}', setupContent.indexOf('linked[@]}', fnStart));
const fnBody = setupContent.slice(fnStart, fnEnd);
// Must check for and remove old symlinks before mkdir
expect(fnBody).toContain('if [ -L "$target" ]');
expect(fnBody).toContain('rm -f "$target"');
});
test('setup supports --host auto|claude|codex|kiro', () => {
@@ -2036,6 +2351,100 @@ describe('telemetry', () => {
});
});
describe('community fixes wave', () => {
// Helper to get all generated SKILL.md files
function getAllSkillMds(): Array<{ name: string; content: string }> {
const results: Array<{ name: string; content: string }> = [];
const rootPath = path.join(ROOT, 'SKILL.md');
if (fs.existsSync(rootPath)) {
results.push({ name: 'root', content: fs.readFileSync(rootPath, 'utf-8') });
}
for (const entry of fs.readdirSync(ROOT, { withFileTypes: true })) {
if (!entry.isDirectory() || entry.name.startsWith('.') || entry.name === 'node_modules') continue;
const skillPath = path.join(ROOT, entry.name, 'SKILL.md');
if (fs.existsSync(skillPath)) {
results.push({ name: entry.name, content: fs.readFileSync(skillPath, 'utf-8') });
}
}
return results;
}
// #594 — Discoverability: every SKILL.md.tmpl description contains "gstack"
test('every SKILL.md.tmpl description contains "gstack"', () => {
for (const skill of ALL_SKILLS) {
const tmplPath = skill.dir === '.' ? path.join(ROOT, 'SKILL.md.tmpl') : path.join(ROOT, skill.dir, 'SKILL.md.tmpl');
const content = fs.readFileSync(tmplPath, 'utf-8');
const desc = extractDescription(content);
expect(desc.toLowerCase()).toContain('gstack');
}
});
// #594 — Discoverability: first line of each description is under 120 chars
test('every SKILL.md.tmpl description first line is under 120 chars', () => {
for (const skill of ALL_SKILLS) {
const tmplPath = skill.dir === '.' ? path.join(ROOT, 'SKILL.md.tmpl') : path.join(ROOT, skill.dir, 'SKILL.md.tmpl');
const content = fs.readFileSync(tmplPath, 'utf-8');
const desc = extractDescription(content);
const firstLine = desc.split('\n')[0];
expect(firstLine.length).toBeLessThanOrEqual(120);
}
});
// #573 — Feature signals: ship/SKILL.md contains feature signal detection
test('ship/SKILL.md contains feature signal detection in Step 4', () => {
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
expect(content.toLowerCase()).toContain('feature signal');
});
// #510 — Context warnings: no SKILL.md contains "running low on context"
test('no generated SKILL.md contains "running low on context"', () => {
const skills = getAllSkillMds();
for (const { name, content } of skills) {
expect(content).not.toContain('running low on context');
}
});
// #510 — Context warnings: plan-eng-review has explicit anti-warning
test('plan-eng-review/SKILL.md contains "Do not preemptively warn"', () => {
const content = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8');
expect(content).toContain('Do not preemptively warn');
});
// #474 — Safety Net: no SKILL.md uses find with -delete
test('no generated SKILL.md contains find with -delete flag', () => {
const skills = getAllSkillMds();
for (const { name, content } of skills) {
// Match find commands that use -delete (but not prose mentioning the word "delete")
const lines = content.split('\n');
for (const line of lines) {
if (line.includes('find ') && line.includes('-delete')) {
throw new Error(`${name}/SKILL.md contains find with -delete: ${line.trim()}`);
}
}
}
});
// #467 — Telemetry: preamble JSONL writes are gated by telemetry setting
test('preamble JSONL writes are inside telemetry conditional', () => {
const preamble = fs.readFileSync(path.join(ROOT, 'scripts/resolvers/preamble.ts'), 'utf-8');
// Find all skill-usage.jsonl write lines
const lines = preamble.split('\n');
for (let i = 0; i < lines.length; i++) {
if (lines[i].includes('skill-usage.jsonl') && lines[i].includes('>>')) {
// Look backwards for a telemetry conditional within 5 lines
let foundConditional = false;
for (let j = i - 1; j >= Math.max(0, i - 5); j--) {
if (lines[j].includes('_TEL') && lines[j].includes('off')) {
foundConditional = true;
break;
}
}
expect(foundConditional).toBe(true);
}
}
});
});
describe('codex commands must not use inline $(git rev-parse --show-toplevel) for cwd', () => {
// Regression test: inline $(git rev-parse --show-toplevel) in codex exec -C
// or codex review without cd evaluates in whatever cwd the background shell
@@ -2123,3 +2532,207 @@ describe('codex commands must not use inline $(git rev-parse --show-toplevel) fo
expect(violations).toEqual([]);
});
});
// ─── Learnings + Confidence Resolver Tests ─────────────────────
describe('LEARNINGS_SEARCH resolver', () => {
const SEARCH_SKILLS = ['review', 'ship', 'plan-eng-review', 'investigate', 'office-hours', 'plan-ceo-review'];
for (const skill of SEARCH_SKILLS) {
test(`${skill} generated SKILL.md contains learnings search`, () => {
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
expect(content).toContain('Prior Learnings');
expect(content).toContain('gstack-learnings-search');
});
}
test('learnings search includes cross-project config check', () => {
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
expect(content).toContain('cross_project_learnings');
expect(content).toContain('--cross-project');
});
test('learnings search includes AskUserQuestion for first-time cross-project opt-in', () => {
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
expect(content).toContain('Enable cross-project learnings');
expect(content).toContain('project-scoped only');
});
test('learnings search mentions prior learning applied display format', () => {
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
expect(content).toContain('Prior learning applied');
});
});
describe('LEARNINGS_LOG resolver', () => {
const LOG_SKILLS = ['review', 'retro', 'investigate'];
for (const skill of LOG_SKILLS) {
test(`${skill} generated SKILL.md contains learnings log`, () => {
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
expect(content).toContain('Capture Learnings');
expect(content).toContain('gstack-learnings-log');
});
}
test('learnings log documents all type values', () => {
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
for (const type of ['pattern', 'pitfall', 'preference', 'architecture', 'tool']) {
expect(content).toContain(type);
}
});
test('learnings log documents all source values', () => {
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
for (const source of ['observed', 'user-stated', 'inferred', 'cross-model']) {
expect(content).toContain(source);
}
});
test('learnings log includes files field for staleness detection', () => {
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
expect(content).toContain('"files"');
expect(content).toContain('staleness detection');
});
});
describe('CONFIDENCE_CALIBRATION resolver', () => {
const CONFIDENCE_SKILLS = ['review', 'ship', 'plan-eng-review', 'cso'];
for (const skill of CONFIDENCE_SKILLS) {
test(`${skill} generated SKILL.md contains confidence calibration`, () => {
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
expect(content).toContain('Confidence Calibration');
expect(content).toContain('confidence score');
});
}
test('confidence calibration includes scoring rubric with all tiers', () => {
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
expect(content).toContain('9-10');
expect(content).toContain('7-8');
expect(content).toContain('5-6');
expect(content).toContain('3-4');
expect(content).toContain('1-2');
});
test('confidence calibration includes display rules', () => {
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
expect(content).toContain('Show normally');
expect(content).toContain('Suppress from main report');
});
test('confidence calibration includes finding format example', () => {
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
expect(content).toContain('[P1] (confidence:');
expect(content).toContain('SQL injection');
});
test('confidence calibration includes calibration learning feedback loop', () => {
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
expect(content).toContain('calibration event');
expect(content).toContain('Log the corrected pattern');
});
test('skills without confidence calibration do NOT contain it', () => {
// office-hours and retro do NOT use confidence calibration
for (const skill of ['office-hours', 'retro']) {
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
expect(content).not.toContain('## Confidence Calibration');
}
});
});
describe('gen-skill-docs prefix warning (#620/#578)', () => {
const { execSync } = require('child_process');
test('warns about skill_prefix when config has prefix=true', () => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-prefix-warn-'));
try {
// Create a fake ~/.gstack/config.yaml with skill_prefix: true
const fakeHome = tmpDir;
const fakeGstack = path.join(fakeHome, '.gstack');
fs.mkdirSync(fakeGstack, { recursive: true });
fs.writeFileSync(path.join(fakeGstack, 'config.yaml'), 'skill_prefix: true\n');
const output = execSync('bun run scripts/gen-skill-docs.ts', {
cwd: ROOT,
env: { ...process.env, HOME: fakeHome },
encoding: 'utf-8',
timeout: 30000,
});
expect(output).toContain('skill_prefix is true');
expect(output).toContain('gstack-relink');
} finally {
fs.rmSync(tmpDir, { recursive: true, force: true });
}
});
test('no warning when skill_prefix is false or absent', () => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-prefix-warn-'));
try {
const fakeHome = tmpDir;
const fakeGstack = path.join(fakeHome, '.gstack');
fs.mkdirSync(fakeGstack, { recursive: true });
fs.writeFileSync(path.join(fakeGstack, 'config.yaml'), 'skill_prefix: false\n');
const output = execSync('bun run scripts/gen-skill-docs.ts', {
cwd: ROOT,
env: { ...process.env, HOME: fakeHome },
encoding: 'utf-8',
timeout: 30000,
});
expect(output).not.toContain('skill_prefix is true');
} finally {
fs.rmSync(tmpDir, { recursive: true, force: true });
}
});
});
describe('voice-triggers processing', () => {
const { extractVoiceTriggers, processVoiceTriggers } = require('../scripts/gen-skill-docs') as {
extractVoiceTriggers: (content: string) => string[];
processVoiceTriggers: (content: string) => string;
};
test('extractVoiceTriggers parses valid YAML list', () => {
const content = `---\nname: cso\ndescription: |\n Security audit.\nvoice-triggers:\n - "see-so"\n - "security review"\n---\nBody`;
const triggers = extractVoiceTriggers(content);
expect(triggers).toEqual(['see-so', 'security review']);
});
test('extractVoiceTriggers returns [] when no field present', () => {
const content = `---\nname: qa\ndescription: |\n QA testing.\n---\nBody`;
expect(extractVoiceTriggers(content)).toEqual([]);
});
test('processVoiceTriggers appends voice triggers to description', () => {
const content = `---\nname: cso\ndescription: |\n Security audit. (gstack)\nvoice-triggers:\n - "see-so"\n - "security review"\n---\nBody`;
const result = processVoiceTriggers(content);
expect(result).toContain('Voice triggers (speech-to-text aliases): "see-so", "security review".');
});
test('processVoiceTriggers strips voice-triggers field from output', () => {
const content = `---\nname: cso\ndescription: |\n Security audit. (gstack)\nvoice-triggers:\n - "see-so"\n---\nBody`;
const result = processVoiceTriggers(content);
expect(result).not.toContain('voice-triggers:');
});
test('processVoiceTriggers returns content unchanged when no voice-triggers', () => {
const content = `---\nname: qa\ndescription: |\n QA testing.\n---\nBody`;
expect(processVoiceTriggers(content)).toBe(content);
});
test('generated CSO SKILL.md contains voice triggers in description', () => {
const content = fs.readFileSync(path.join(ROOT, 'cso', 'SKILL.md'), 'utf-8');
expect(content).toContain('"see-so"');
expect(content).toContain('Voice triggers (speech-to-text aliases):');
});
test('generated CSO SKILL.md does NOT contain raw voice-triggers field', () => {
const content = fs.readFileSync(path.join(ROOT, 'cso', 'SKILL.md'), 'utf-8');
const fmEnd = content.indexOf('\n---', 4);
const frontmatter = content.slice(0, fmEnd);
expect(frontmatter).not.toContain('voice-triggers:');
});
});
+159
View File
@@ -131,6 +131,165 @@ describe("gstack-global-discover", () => {
});
});
describe("codex large session_meta parsing", () => {
let codexDir: string;
let tmpDir: string;
beforeEach(() => {
tmpDir = mkdtempSync(join(tmpdir(), "gstack-codex-test-"));
// Build a realistic ~/.codex/sessions/YYYY/MM/DD structure
const now = new Date();
const y = now.getFullYear().toString();
const m = String(now.getMonth() + 1).padStart(2, "0");
const d = String(now.getDate()).padStart(2, "0");
codexDir = join(tmpDir, "codex-home", "sessions", y, m, d);
mkdirSync(codexDir, { recursive: true });
});
afterEach(() => {
rmSync(tmpDir, { recursive: true, force: true });
});
function writeCodexSession(
dir: string,
cwd: string,
baseInstructionsSize: number
): string {
const padding = "x".repeat(baseInstructionsSize);
const line = JSON.stringify({
timestamp: new Date().toISOString(),
type: "session_meta",
payload: {
id: `test-${Date.now()}`,
timestamp: new Date().toISOString(),
cwd,
originator: "codex_exec",
cli_version: "0.118.0",
source: "exec",
model_provider: "openai",
base_instructions: { text: padding },
},
});
const name = `rollout-${new Date().toISOString().replace(/[:.]/g, "-")}-${Math.random().toString(36).slice(2)}.jsonl`;
const filePath = join(dir, name);
writeFileSync(filePath, line + "\n");
return filePath;
}
test("discovers codex sessions with >4KB session_meta via CLI", () => {
// Create a git repo as the session target
const repoDir = join(tmpDir, "fake-repo");
mkdirSync(repoDir);
spawnSync("git", ["init"], { cwd: repoDir, stdio: "pipe" });
spawnSync("git", ["commit", "--allow-empty", "-m", "init"], {
cwd: repoDir,
stdio: "pipe",
});
// Write a session with a 20KB first line (simulates Codex v0.117+)
writeCodexSession(codexDir, repoDir, 20000);
// Run discovery with CODEX_SESSIONS_DIR override
const result = spawnSync(
"bun",
["run", scriptPath, "--since", "1h", "--format", "json"],
{
encoding: "utf-8",
timeout: 30000,
env: {
...process.env,
CODEX_SESSIONS_DIR: join(tmpDir, "codex-home", "sessions"),
},
}
);
expect(result.status).toBe(0);
const json = JSON.parse(result.stdout);
expect(json.tools.codex.total_sessions).toBeGreaterThanOrEqual(1);
});
test("4KB buffer truncates session_meta, 128KB buffer parses it", () => {
const padding = "x".repeat(20000);
const sessionMeta = JSON.stringify({
timestamp: new Date().toISOString(),
type: "session_meta",
payload: {
id: "test-id",
timestamp: new Date().toISOString(),
cwd: "/tmp/test-repo",
originator: "codex_exec",
cli_version: "0.118.0",
source: "exec",
model_provider: "openai",
base_instructions: { text: padding },
},
});
expect(sessionMeta.length).toBeGreaterThan(4096);
const filePath = join(codexDir, "test.jsonl");
writeFileSync(filePath, sessionMeta + "\n");
// 4KB buffer: JSON.parse fails (the old bug)
const { openSync, readSync, closeSync } = require("fs");
const fd4k = openSync(filePath, "r");
const buf4k = Buffer.alloc(4096);
readSync(fd4k, buf4k, 0, 4096, 0);
closeSync(fd4k);
expect(() =>
JSON.parse(buf4k.toString("utf-8").split("\n")[0])
).toThrow();
// 128KB buffer: JSON.parse succeeds (the fix)
const fd128k = openSync(filePath, "r");
const buf128k = Buffer.alloc(131072);
const bytesRead = readSync(fd128k, buf128k, 0, 131072, 0);
closeSync(fd128k);
const firstLine = buf128k.toString("utf-8", 0, bytesRead).split("\n")[0];
const meta = JSON.parse(firstLine);
expect(meta.type).toBe("session_meta");
expect(meta.payload.cwd).toBe("/tmp/test-repo");
});
test("regression: session_meta beyond 128KB still needs streaming parse", () => {
// This test documents the current limitation: 128KB buffer is a heuristic.
// If Codex ever embeds >128KB in session_meta, this test will fail,
// signaling that the buffer needs to increase or be replaced with streaming.
const padding = "x".repeat(140000); // ~140KB payload
const sessionMeta = JSON.stringify({
timestamp: new Date().toISOString(),
type: "session_meta",
payload: {
id: "test-large",
timestamp: new Date().toISOString(),
cwd: "/tmp/large-test",
originator: "codex_exec",
cli_version: "0.200.0",
source: "exec",
model_provider: "openai",
base_instructions: { text: padding },
},
});
expect(sessionMeta.length).toBeGreaterThan(131072);
const filePath = join(codexDir, "large-test.jsonl");
writeFileSync(filePath, sessionMeta + "\n");
// 128KB buffer: JSON.parse FAILS for >128KB lines (current limitation)
const { openSync, readSync, closeSync } = require("fs");
const fd = openSync(filePath, "r");
const buf = Buffer.alloc(131072);
readSync(fd, buf, 0, 131072, 0);
closeSync(fd);
expect(() =>
JSON.parse(buf.toString("utf-8").split("\n")[0])
).toThrow();
// When this test starts passing (e.g., after implementing streaming parse),
// update it to verify correct parsing instead of documenting the limitation.
});
});
describe("discovery output structure", () => {
test("repos have required fields", () => {
const result = spawnSync(
+2 -1
View File
@@ -305,12 +305,13 @@ export async function runSkillTest(options: {
// Use resultLine for structured result data
if (resultLine) {
if (resultLine.is_error) {
if (resultLine.subtype === 'success' && resultLine.is_error) {
// claude -p can return subtype=success with is_error=true (e.g. API connection failure)
exitReason = 'error_api';
} else if (resultLine.subtype === 'success') {
exitReason = 'success';
} else if (resultLine.subtype) {
// Preserve known subtypes like error_max_turns even if is_error is set
exitReason = resultLine.subtype;
}
}
+6 -1
View File
@@ -15,6 +15,11 @@ import { parseSnapshotArgs } from '../../browse/src/snapshot';
import * as fs from 'fs';
import * as path from 'path';
/** CLI-only commands: valid $B invocations that are handled by the CLI, not the server */
const CLI_COMMANDS = new Set([
'status', 'pair-agent', 'tunnel',
]);
export interface BrowseCommand {
command: string;
args: string[];
@@ -112,7 +117,7 @@ export function validateSkill(skillPath: string): ValidationResult {
}
for (const cmd of commands) {
if (!ALL_COMMANDS.has(cmd.command)) {
if (!ALL_COMMANDS.has(cmd.command) && !CLI_COMMANDS.has(cmd.command)) {
result.invalid.push(cmd);
continue;
}
+40 -2
View File
@@ -41,8 +41,8 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
'skillmd-no-local-binary': ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
'skillmd-outside-git': ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
'contributor-mode': ['SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
'session-awareness': ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
'operational-learning': ['scripts/resolvers/preamble.ts', 'bin/gstack-learnings-log'],
// QA (+ test-server dependency)
'qa-quick': ['qa/**', 'browse/src/**', 'browse/test/test-server.ts'],
@@ -59,6 +59,15 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
'review-base-branch': ['review/**'],
'review-design-lite': ['review/**', 'test/fixtures/review-eval-design-slop.*'],
// Review Army (specialist dispatch)
'review-army-migration-safety': ['review/**', 'scripts/resolvers/review-army.ts', 'bin/gstack-diff-scope'],
'review-army-perf-n-plus-one': ['review/**', 'scripts/resolvers/review-army.ts', 'bin/gstack-diff-scope'],
'review-army-delivery-audit': ['review/**', 'scripts/resolvers/review.ts', 'scripts/resolvers/review-army.ts'],
'review-army-quality-score': ['review/**', 'scripts/resolvers/review-army.ts'],
'review-army-json-findings': ['review/**', 'scripts/resolvers/review-army.ts'],
'review-army-red-team': ['review/**', 'scripts/resolvers/review-army.ts'],
'review-army-consensus': ['review/**', 'scripts/resolvers/review-army.ts'],
// Office Hours
'office-hours-spec-review': ['office-hours/**', 'scripts/gen-skill-docs.ts'],
@@ -95,6 +104,14 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
'cso-diff-mode': ['cso/**'],
'cso-infra-scope': ['cso/**'],
// Learnings
'learnings-show': ['learn/**', 'bin/gstack-learnings-search', 'bin/gstack-learnings-log', 'scripts/resolvers/learnings.ts'],
// Session Intelligence (timeline, context recovery, checkpoint)
'timeline-event-flow': ['bin/gstack-timeline-log', 'bin/gstack-timeline-read'],
'context-recovery-artifacts': ['scripts/resolvers/preamble.ts', 'bin/gstack-timeline-log', 'bin/gstack-slug', 'learn/**'],
'checkpoint-save-resume': ['checkpoint/**', 'bin/gstack-slug'],
// Document-release
'document-release': ['document-release/**'],
@@ -119,6 +136,7 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
// Plan completion audit + verification
'ship-plan-completion': ['ship/**', 'scripts/gen-skill-docs.ts'],
'ship-plan-verification': ['ship/**', 'qa-only/**', 'scripts/gen-skill-docs.ts'],
'ship-idempotency': ['ship/**', 'scripts/resolvers/utility.ts'],
'review-plan-completion': ['review/**', 'scripts/gen-skill-docs.ts'],
// Design
@@ -149,6 +167,7 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
// Sidebar agent
'sidebar-navigate': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/sidebar-utils.ts', 'extension/**'],
'sidebar-url-accuracy': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/sidebar-utils.ts', 'extension/background.js'],
'sidebar-css-interaction': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/write-commands.ts', 'browse/src/read-commands.ts', 'browse/src/cdp-inspector.ts', 'extension/**'],
// Autoplan
'autoplan-core': ['autoplan/**', 'plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**'],
@@ -179,8 +198,8 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
'skillmd-setup-discovery': 'gate',
'skillmd-no-local-binary': 'gate',
'skillmd-outside-git': 'gate',
'contributor-mode': 'gate',
'session-awareness': 'gate',
'operational-learning': 'gate',
// QA — gate for functional, periodic for quality/benchmarks
'qa-quick': 'gate',
@@ -200,6 +219,15 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
'review-plan-completion': 'gate',
'review-dashboard-via': 'gate',
// Review Army — gate for core functionality, periodic for multi-specialist
'review-army-migration-safety': 'gate', // Specialist activation guardrail
'review-army-perf-n-plus-one': 'gate', // Specialist activation guardrail
'review-army-delivery-audit': 'gate', // Delivery integrity guardrail
'review-army-quality-score': 'gate', // Score computation
'review-army-json-findings': 'gate', // JSON schema compliance
'review-army-red-team': 'periodic', // Multi-agent coordination
'review-army-consensus': 'periodic', // Multi-specialist agreement
// Office Hours
'office-hours-spec-review': 'gate',
@@ -218,6 +246,11 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
'codex-offered-design-review': 'gate',
'codex-offered-eng-review': 'gate',
// Session Intelligence — gate for data flow, periodic for agent integration
'timeline-event-flow': 'gate', // Binary data flow (no LLM needed)
'context-recovery-artifacts': 'gate', // Preamble reads seeded artifacts
'checkpoint-save-resume': 'gate', // Checkpoint round-trip
// Ship — gate (end-to-end ship path)
'ship-base-branch': 'gate',
'ship-local-workflow': 'gate',
@@ -225,6 +258,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
'ship-triage': 'gate',
'ship-plan-completion': 'gate',
'ship-plan-verification': 'gate',
'ship-idempotency': 'periodic',
// Retro — gate for cheap branch detection, periodic for full Opus retro
'retro': 'periodic',
@@ -238,6 +272,9 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
'cso-diff-mode': 'gate',
'cso-infra-scope': 'periodic',
// Learnings — gate (functional guardrail: seeded learnings must appear)
'learnings-show': 'gate',
// Document-release — gate (CHANGELOG guardrail)
'document-release': 'gate',
@@ -276,6 +313,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
// Sidebar agent
'sidebar-navigate': 'periodic',
'sidebar-url-accuracy': 'periodic',
'sidebar-css-interaction': 'periodic',
// Autoplan — periodic (not yet implemented)
'autoplan-core': 'periodic',
+524
View File
@@ -0,0 +1,524 @@
/**
* Host config system tests — 100% coverage of host-config.ts, hosts/index.ts,
* host-config-export.ts, and golden-file regression checks.
*/
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import { validateHostConfig, validateAllConfigs, type HostConfig } from '../scripts/host-config';
import {
ALL_HOST_CONFIGS,
ALL_HOST_NAMES,
HOST_CONFIG_MAP,
getHostConfig,
resolveHostArg,
getExternalHosts,
claude,
codex,
factory,
kiro,
opencode,
slate,
cursor,
openclaw,
} from '../hosts/index';
import { HOST_PATHS } from '../scripts/resolvers/types';
const ROOT = path.resolve(import.meta.dir, '..');
// ─── hosts/index.ts ─────────────────────────────────────────
describe('hosts/index.ts', () => {
test('ALL_HOST_CONFIGS has 8 hosts', () => {
expect(ALL_HOST_CONFIGS.length).toBe(8);
});
test('ALL_HOST_NAMES matches config names', () => {
expect(ALL_HOST_NAMES).toEqual(ALL_HOST_CONFIGS.map(c => c.name));
});
test('HOST_CONFIG_MAP keys match names', () => {
for (const config of ALL_HOST_CONFIGS) {
expect(HOST_CONFIG_MAP[config.name]).toBe(config);
}
});
test('individual config re-exports match registry', () => {
expect(claude.name).toBe('claude');
expect(codex.name).toBe('codex');
expect(factory.name).toBe('factory');
expect(kiro.name).toBe('kiro');
expect(opencode.name).toBe('opencode');
expect(slate.name).toBe('slate');
expect(cursor.name).toBe('cursor');
expect(openclaw.name).toBe('openclaw');
});
test('getHostConfig returns correct config', () => {
const c = getHostConfig('codex');
expect(c.name).toBe('codex');
expect(c.displayName).toBe('OpenAI Codex CLI');
});
test('getHostConfig throws on unknown host', () => {
expect(() => getHostConfig('nonexistent')).toThrow('Unknown host');
});
test('resolveHostArg resolves direct names', () => {
for (const name of ALL_HOST_NAMES) {
expect(resolveHostArg(name)).toBe(name);
}
});
test('resolveHostArg resolves aliases', () => {
expect(resolveHostArg('agents')).toBe('codex');
expect(resolveHostArg('droid')).toBe('factory');
});
test('resolveHostArg throws on unknown alias', () => {
expect(() => resolveHostArg('nonexistent')).toThrow('Unknown host');
});
test('getExternalHosts excludes claude', () => {
const external = getExternalHosts();
expect(external.find(c => c.name === 'claude')).toBeUndefined();
expect(external.length).toBe(ALL_HOST_CONFIGS.length - 1);
});
test('every host has a unique name', () => {
const names = new Set(ALL_HOST_NAMES);
expect(names.size).toBe(ALL_HOST_NAMES.length);
});
test('every host has a unique hostSubdir', () => {
const subdirs = new Set(ALL_HOST_CONFIGS.map(c => c.hostSubdir));
expect(subdirs.size).toBe(ALL_HOST_CONFIGS.length);
});
test('every host has a unique globalRoot', () => {
const roots = new Set(ALL_HOST_CONFIGS.map(c => c.globalRoot));
expect(roots.size).toBe(ALL_HOST_CONFIGS.length);
});
});
// ─── validateHostConfig ─────────────────────────────────────
describe('validateHostConfig', () => {
function makeValid(): HostConfig {
return {
name: 'test-host',
displayName: 'Test Host',
cliCommand: 'testcli',
globalRoot: '.test/skills/gstack',
localSkillRoot: '.test/skills/gstack',
hostSubdir: '.test',
usesEnvVars: true,
frontmatter: { mode: 'allowlist', keepFields: ['name', 'description'] },
generation: { generateMetadata: false },
pathRewrites: [],
runtimeRoot: { globalSymlinks: ['bin'] },
install: { prefixable: false, linkingStrategy: 'symlink-generated' },
};
}
test('valid config passes', () => {
expect(validateHostConfig(makeValid())).toEqual([]);
});
test('invalid name is caught', () => {
const c = makeValid();
c.name = 'UPPER_CASE';
const errors = validateHostConfig(c);
expect(errors.some(e => e.includes('name'))).toBe(true);
});
test('name with special chars is caught', () => {
const c = makeValid();
c.name = 'has spaces';
expect(validateHostConfig(c).length).toBeGreaterThan(0);
});
test('empty displayName is caught', () => {
const c = makeValid();
c.displayName = '';
expect(validateHostConfig(c).some(e => e.includes('displayName'))).toBe(true);
});
test('invalid cliCommand is caught', () => {
const c = makeValid();
c.cliCommand = 'has spaces';
expect(validateHostConfig(c).some(e => e.includes('cliCommand'))).toBe(true);
});
test('invalid cliAlias is caught', () => {
const c = makeValid();
c.cliAliases = ['good', 'BAD!'];
expect(validateHostConfig(c).some(e => e.includes('cliAlias'))).toBe(true);
});
test('valid cliAliases pass', () => {
const c = makeValid();
c.cliAliases = ['alias-one', 'alias-two'];
expect(validateHostConfig(c)).toEqual([]);
});
test('invalid globalRoot is caught', () => {
const c = makeValid();
c.globalRoot = 'path with spaces';
expect(validateHostConfig(c).some(e => e.includes('globalRoot'))).toBe(true);
});
test('invalid localSkillRoot is caught', () => {
const c = makeValid();
c.localSkillRoot = 'invalid<path>';
expect(validateHostConfig(c).some(e => e.includes('localSkillRoot'))).toBe(true);
});
test('invalid hostSubdir is caught', () => {
const c = makeValid();
c.hostSubdir = 'no spaces allowed';
expect(validateHostConfig(c).some(e => e.includes('hostSubdir'))).toBe(true);
});
test('invalid frontmatter.mode is caught', () => {
const c = makeValid();
(c.frontmatter as any).mode = 'invalid';
expect(validateHostConfig(c).some(e => e.includes('frontmatter.mode'))).toBe(true);
});
test('invalid linkingStrategy is caught', () => {
const c = makeValid();
(c.install as any).linkingStrategy = 'invalid';
expect(validateHostConfig(c).some(e => e.includes('linkingStrategy'))).toBe(true);
});
test('paths with $ and ~ are valid', () => {
const c = makeValid();
c.globalRoot = '$HOME/.test/skills/gstack';
c.localSkillRoot = '~/.test/skills/gstack';
expect(validateHostConfig(c)).toEqual([]);
});
test('shell injection attempt in cliCommand is caught', () => {
const c = makeValid();
c.cliCommand = 'opencode;rm -rf /';
expect(validateHostConfig(c).some(e => e.includes('cliCommand'))).toBe(true);
});
});
// ─── validateAllConfigs ─────────────────────────────────────
describe('validateAllConfigs', () => {
test('real configs all pass validation', () => {
const errors = validateAllConfigs(ALL_HOST_CONFIGS);
expect(errors).toEqual([]);
});
test('duplicate name detected', () => {
const dup = { ...codex, name: 'claude' } as HostConfig;
const errors = validateAllConfigs([claude, dup]);
expect(errors.some(e => e.includes('Duplicate name'))).toBe(true);
});
test('duplicate hostSubdir detected', () => {
const dup = { ...codex, name: 'dup-host', hostSubdir: '.claude', globalRoot: '.dup/skills/gstack' } as HostConfig;
const errors = validateAllConfigs([claude, dup]);
expect(errors.some(e => e.includes('Duplicate hostSubdir'))).toBe(true);
});
test('duplicate globalRoot detected', () => {
const dup = { ...codex, name: 'dup-host', hostSubdir: '.dup', globalRoot: '.claude/skills/gstack' } as HostConfig;
const errors = validateAllConfigs([claude, dup]);
expect(errors.some(e => e.includes('Duplicate globalRoot'))).toBe(true);
});
test('per-config validation errors are prefixed with host name', () => {
const bad = { ...codex, name: 'BAD', cliCommand: 'also bad' } as HostConfig;
const errors = validateAllConfigs([bad]);
expect(errors.every(e => e.startsWith('[BAD]'))).toBe(true);
});
});
// ─── HOST_PATHS derivation ──────────────────────────────────
describe('HOST_PATHS derivation from configs', () => {
test('Claude uses literal home paths (no env vars)', () => {
expect(HOST_PATHS.claude.skillRoot).toBe('~/.claude/skills/gstack');
expect(HOST_PATHS.claude.binDir).toBe('~/.claude/skills/gstack/bin');
expect(HOST_PATHS.claude.browseDir).toBe('~/.claude/skills/gstack/browse/dist');
expect(HOST_PATHS.claude.designDir).toBe('~/.claude/skills/gstack/design/dist');
});
test('Codex uses $GSTACK_ROOT env vars', () => {
expect(HOST_PATHS.codex.skillRoot).toBe('$GSTACK_ROOT');
expect(HOST_PATHS.codex.binDir).toBe('$GSTACK_BIN');
expect(HOST_PATHS.codex.browseDir).toBe('$GSTACK_BROWSE');
expect(HOST_PATHS.codex.designDir).toBe('$GSTACK_DESIGN');
});
test('every host with usesEnvVars=true gets env var paths', () => {
for (const config of ALL_HOST_CONFIGS) {
if (config.usesEnvVars) {
expect(HOST_PATHS[config.name].skillRoot).toBe('$GSTACK_ROOT');
expect(HOST_PATHS[config.name].binDir).toBe('$GSTACK_BIN');
}
}
});
test('every host with usesEnvVars=false gets literal paths', () => {
for (const config of ALL_HOST_CONFIGS) {
if (!config.usesEnvVars) {
expect(HOST_PATHS[config.name].skillRoot).toContain('~/');
expect(HOST_PATHS[config.name].binDir).toContain('/bin');
}
}
});
test('localSkillRoot matches config for every host', () => {
for (const config of ALL_HOST_CONFIGS) {
expect(HOST_PATHS[config.name].localSkillRoot).toBe(config.localSkillRoot);
}
});
test('HOST_PATHS has entry for every registered host', () => {
for (const name of ALL_HOST_NAMES) {
expect(HOST_PATHS[name]).toBeDefined();
}
});
});
// ─── host-config-export.ts CLI ──────────────────────────────
describe('host-config-export.ts CLI', () => {
const EXPORT_SCRIPT = path.join(ROOT, 'scripts', 'host-config-export.ts');
function run(...args: string[]): { stdout: string; stderr: string; exitCode: number } {
const result = Bun.spawnSync(['bun', 'run', EXPORT_SCRIPT, ...args], {
cwd: ROOT, stdout: 'pipe', stderr: 'pipe',
});
return {
stdout: result.stdout.toString().trim(),
stderr: result.stderr.toString().trim(),
exitCode: result.exitCode,
};
}
test('list prints all host names', () => {
const { stdout, exitCode } = run('list');
expect(exitCode).toBe(0);
const names = stdout.split('\n');
expect(names).toEqual(ALL_HOST_NAMES);
});
test('get returns string field', () => {
const { stdout, exitCode } = run('get', 'codex', 'globalRoot');
expect(exitCode).toBe(0);
expect(stdout).toBe('.codex/skills/gstack');
});
test('get returns boolean as 1/0', () => {
const { stdout: t } = run('get', 'claude', 'usesEnvVars');
expect(t).toBe('0');
const { stdout: f } = run('get', 'codex', 'usesEnvVars');
expect(f).toBe('1');
});
test('get with missing args exits 1', () => {
const { exitCode } = run('get', 'codex');
expect(exitCode).toBe(1);
});
test('get with unknown field exits 1', () => {
const { exitCode } = run('get', 'codex', 'nonexistent');
expect(exitCode).toBe(1);
});
test('get with unknown host exits 1', () => {
const { exitCode } = run('get', 'nonexistent', 'name');
expect(exitCode).not.toBe(0);
});
test('validate passes for real configs', () => {
const { stdout, exitCode } = run('validate');
expect(exitCode).toBe(0);
expect(stdout).toContain('configs valid');
});
test('symlinks returns asset list', () => {
const { stdout, exitCode } = run('symlinks', 'codex');
expect(exitCode).toBe(0);
const lines = stdout.split('\n');
expect(lines).toContain('bin');
expect(lines).toContain('ETHOS.md');
expect(lines).toContain('review/checklist.md');
});
test('symlinks with missing host exits 1', () => {
const { exitCode } = run('symlinks');
expect(exitCode).toBe(1);
});
test('detect finds claude (since we are running in claude)', () => {
const { stdout, exitCode } = run('detect');
expect(exitCode).toBe(0);
// claude binary should be on PATH in this environment
expect(stdout).toContain('claude');
});
test('unknown command exits 1', () => {
const { exitCode } = run('badcommand');
expect(exitCode).toBe(1);
});
});
// ─── Golden-file regression ─────────────────────────────────
describe('golden-file regression', () => {
const GOLDEN_DIR = path.join(ROOT, 'test', 'fixtures', 'golden');
test('Claude ship skill matches golden baseline', () => {
const golden = fs.readFileSync(path.join(GOLDEN_DIR, 'claude-ship-SKILL.md'), 'utf-8');
const current = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
expect(current).toBe(golden);
});
test('Codex ship skill matches golden baseline', () => {
const golden = fs.readFileSync(path.join(GOLDEN_DIR, 'codex-ship-SKILL.md'), 'utf-8');
const current = fs.readFileSync(path.join(ROOT, '.agents', 'skills', 'gstack-ship', 'SKILL.md'), 'utf-8');
expect(current).toBe(golden);
});
test('Factory ship skill matches golden baseline', () => {
const golden = fs.readFileSync(path.join(GOLDEN_DIR, 'factory-ship-SKILL.md'), 'utf-8');
const current = fs.readFileSync(path.join(ROOT, '.factory', 'skills', 'gstack-ship', 'SKILL.md'), 'utf-8');
expect(current).toBe(golden);
});
});
// ─── Individual host config correctness ─────────────────────
describe('host config correctness', () => {
test('claude is the only prefixable host', () => {
for (const config of ALL_HOST_CONFIGS) {
if (config.name === 'claude') {
expect(config.install.prefixable).toBe(true);
} else {
expect(config.install.prefixable).toBe(false);
}
}
});
test('claude is the only host with real-dir-symlink strategy', () => {
for (const config of ALL_HOST_CONFIGS) {
if (config.name === 'claude') {
expect(config.install.linkingStrategy).toBe('real-dir-symlink');
} else {
expect(config.install.linkingStrategy).toBe('symlink-generated');
}
}
});
test('claude does not use env vars', () => {
expect(claude.usesEnvVars).toBe(false);
});
test('all external hosts use env vars', () => {
for (const config of getExternalHosts()) {
expect(config.usesEnvVars).toBe(true);
}
});
test('codex has 1024-char description limit with error behavior', () => {
expect(codex.frontmatter.descriptionLimit).toBe(1024);
expect(codex.frontmatter.descriptionLimitBehavior).toBe('error');
});
test('codex generates openai.yaml metadata', () => {
expect(codex.generation.generateMetadata).toBe(true);
expect(codex.generation.metadataFormat).toBe('openai.yaml');
});
test('codex has sidecar config', () => {
expect(codex.sidecar).toBeDefined();
expect(codex.sidecar!.path).toBe('.agents/skills/gstack');
});
test('factory has tool rewrites', () => {
expect(factory.toolRewrites).toBeDefined();
expect(Object.keys(factory.toolRewrites!).length).toBeGreaterThan(0);
expect(factory.toolRewrites!['use the Bash tool']).toBe('run this command');
});
test('factory has conditional disable-model-invocation field', () => {
expect(factory.frontmatter.conditionalFields).toBeDefined();
expect(factory.frontmatter.conditionalFields!.length).toBe(1);
expect(factory.frontmatter.conditionalFields![0].if).toEqual({ sensitive: true });
expect(factory.frontmatter.conditionalFields![0].add).toEqual({ 'disable-model-invocation': true });
});
test('codex has suppressedResolvers for self-invocation prevention', () => {
expect(codex.suppressedResolvers).toBeDefined();
expect(codex.suppressedResolvers).toContain('CODEX_SECOND_OPINION');
expect(codex.suppressedResolvers).toContain('ADVERSARIAL_STEP');
expect(codex.suppressedResolvers).toContain('REVIEW_ARMY');
});
test('codex has boundary instruction', () => {
expect(codex.boundaryInstruction).toBeDefined();
expect(codex.boundaryInstruction).toContain('Do NOT read');
});
test('openclaw has tool rewrites for exec/read/write', () => {
expect(openclaw.toolRewrites).toBeDefined();
expect(openclaw.toolRewrites!['use the Bash tool']).toBe('use the exec tool');
expect(openclaw.toolRewrites!['use the Read tool']).toBe('use the read tool');
});
test('openclaw has CLAUDE.md→AGENTS.md path rewrite', () => {
expect(openclaw.pathRewrites.some(r => r.from === 'CLAUDE.md' && r.to === 'AGENTS.md')).toBe(true);
});
test('openclaw has adapter path', () => {
expect(openclaw.adapter).toBeDefined();
expect(openclaw.adapter).toContain('openclaw-adapter');
});
test('openclaw has no staticFiles (SOUL.md removed)', () => {
expect(openclaw.staticFiles).toBeUndefined();
});
test('openclaw includeSkills is empty (native skills replaced generated ones)', () => {
expect(openclaw.generation.includeSkills).toBeDefined();
expect(openclaw.generation.includeSkills!.length).toBe(0);
});
test('every host has coAuthorTrailer or undefined', () => {
// Claude, Codex, Factory, OpenClaw have explicit trailers
expect(claude.coAuthorTrailer).toContain('Claude');
expect(codex.coAuthorTrailer).toContain('Codex');
expect(factory.coAuthorTrailer).toContain('Factory');
expect(openclaw.coAuthorTrailer).toContain('OpenClaw');
});
test('every external host skips the codex skill', () => {
for (const config of getExternalHosts()) {
expect(config.generation.skipSkills).toContain('codex');
}
});
test('every host has at least one pathRewrite (except claude)', () => {
for (const config of getExternalHosts()) {
expect(config.pathRewrites.length).toBeGreaterThan(0);
}
expect(claude.pathRewrites.length).toBe(0);
});
test('every host has runtimeRoot.globalSymlinks', () => {
for (const config of ALL_HOST_CONFIGS) {
expect(config.runtimeRoot.globalSymlinks.length).toBeGreaterThan(0);
expect(config.runtimeRoot.globalSymlinks).toContain('bin');
expect(config.runtimeRoot.globalSymlinks).toContain('ETHOS.md');
}
});
});
+48
View File
@@ -0,0 +1,48 @@
import { describe, test, expect } from "bun:test";
import { readFileSync } from "fs";
import path from "path";
const SCRIPT = path.join(import.meta.dir, "..", "bin", "gstack-learnings-search");
describe("gstack-learnings-search injection prevention", () => {
const script = readFileSync(SCRIPT, "utf-8");
test("no shell interpolation inside bun -e string", () => {
// Extract the bun -e block (everything between `bun -e "` and the closing `"`)
const bunBlock = script.slice(script.indexOf('bun -e "'));
// Should NOT contain ${VAR} patterns (shell interpolation)
// These are RCE vectors: a malicious learnings entry with '; rm -rf / ;' in the
// query field would execute arbitrary commands via shell interpolation.
const shellInterpolations = bunBlock.match(/'\$\{[A-Z_]+\}'/g) || [];
const bareInterpolations = bunBlock.match(/\$\{[A-Z_]+\}/g) || [];
// Filter out any that are inside process.env references (those are safe)
const unsafeInterpolations = [
...shellInterpolations,
...bareInterpolations,
].filter((m) => !m.includes("process.env"));
expect(unsafeInterpolations).toEqual([]);
});
test("uses process.env for all user-controlled values", () => {
const bunBlock = script.slice(script.indexOf('bun -e "'));
// Must use process.env for TYPE, QUERY, LIMIT, SLUG, CROSS_PROJECT
expect(bunBlock).toContain("process.env.GSTACK_SEARCH_TYPE");
expect(bunBlock).toContain("process.env.GSTACK_SEARCH_QUERY");
expect(bunBlock).toContain("process.env.GSTACK_SEARCH_LIMIT");
expect(bunBlock).toContain("process.env.GSTACK_SEARCH_SLUG");
expect(bunBlock).toContain("process.env.GSTACK_SEARCH_CROSS");
});
test("env vars are set on the bun command line", () => {
// The env vars must be passed to bun, not just set in the shell
expect(script).toContain("GSTACK_SEARCH_TYPE=");
expect(script).toContain("GSTACK_SEARCH_QUERY=");
expect(script).toContain("GSTACK_SEARCH_LIMIT=");
expect(script).toContain("GSTACK_SEARCH_SLUG=");
expect(script).toContain("GSTACK_SEARCH_CROSS=");
});
});
+283
View File
@@ -0,0 +1,283 @@
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import { execSync, ExecSyncOptionsWithStringEncoding } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
const ROOT = path.resolve(import.meta.dir, '..');
const BIN = path.join(ROOT, 'bin');
let tmpDir: string;
let slugDir: string;
let learningsFile: string;
function runLog(input: string, opts: { expectFail?: boolean } = {}): { stdout: string; exitCode: number } {
const execOpts: ExecSyncOptionsWithStringEncoding = {
cwd: ROOT,
env: { ...process.env, GSTACK_HOME: tmpDir },
encoding: 'utf-8',
timeout: 15000,
};
try {
const stdout = execSync(`${BIN}/gstack-learnings-log '${input.replace(/'/g, "'\\''")}'`, execOpts).trim();
return { stdout, exitCode: 0 };
} catch (e: any) {
if (opts.expectFail) {
return { stdout: e.stderr?.toString() || '', exitCode: e.status || 1 };
}
throw e;
}
}
function runSearch(args: string = ''): string {
const execOpts: ExecSyncOptionsWithStringEncoding = {
cwd: ROOT,
env: { ...process.env, GSTACK_HOME: tmpDir },
encoding: 'utf-8',
timeout: 15000,
};
try {
return execSync(`${BIN}/gstack-learnings-search ${args}`, execOpts).trim();
} catch {
return '';
}
}
beforeEach(() => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-learn-'));
slugDir = path.join(tmpDir, 'projects');
fs.mkdirSync(slugDir, { recursive: true });
});
afterEach(() => {
fs.rmSync(tmpDir, { recursive: true, force: true });
});
function findLearningsFile(): string | null {
const projectDirs = fs.readdirSync(slugDir);
if (projectDirs.length === 0) return null;
const f = path.join(slugDir, projectDirs[0], 'learnings.jsonl');
return fs.existsSync(f) ? f : null;
}
describe('gstack-learnings-log', () => {
test('appends valid JSON to learnings.jsonl', () => {
const input = '{"skill":"review","type":"pattern","key":"test-key","insight":"test insight","confidence":8,"source":"observed"}';
const result = runLog(input);
expect(result.exitCode).toBe(0);
const f = findLearningsFile();
expect(f).not.toBeNull();
const content = fs.readFileSync(f!, 'utf-8').trim();
const parsed = JSON.parse(content);
expect(parsed.skill).toBe('review');
expect(parsed.key).toBe('test-key');
expect(parsed.confidence).toBe(8);
});
test('auto-injects timestamp when ts is missing', () => {
const input = '{"skill":"review","type":"pattern","key":"ts-test","insight":"test","confidence":5,"source":"observed"}';
runLog(input);
const f = findLearningsFile();
expect(f).not.toBeNull();
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
expect(parsed.ts).toBeDefined();
expect(new Date(parsed.ts).getTime()).toBeGreaterThan(0);
});
test('rejects non-JSON input with non-zero exit code', () => {
const result = runLog('not json at all', { expectFail: true });
expect(result.exitCode).not.toBe(0);
});
test('append-only: duplicate keys create multiple entries', () => {
const input1 = '{"skill":"review","type":"pattern","key":"dup-key","insight":"first version","confidence":6,"source":"observed"}';
const input2 = '{"skill":"review","type":"pattern","key":"dup-key","insight":"second version","confidence":8,"source":"observed"}';
runLog(input1);
runLog(input2);
const f = findLearningsFile();
expect(f).not.toBeNull();
const lines = fs.readFileSync(f!, 'utf-8').trim().split('\n');
expect(lines.length).toBe(2);
});
});
describe('gstack-learnings-search', () => {
test('returns empty and exits 0 when no learnings file exists', () => {
const output = runSearch();
expect(output).toBe('');
});
test('returns formatted output when learnings exist', () => {
runLog('{"skill":"review","type":"pattern","key":"test-search","insight":"search test insight","confidence":7,"source":"observed"}');
const output = runSearch();
expect(output).toContain('LEARNINGS:');
expect(output).toContain('test-search');
expect(output).toContain('search test insight');
});
test('deduplicates entries by key+type (latest wins)', () => {
const old = JSON.stringify({ skill: 'review', type: 'pattern', key: 'dedup-test', insight: 'old version', confidence: 5, source: 'observed', ts: '2026-01-01T00:00:00Z' });
const newer = JSON.stringify({ skill: 'review', type: 'pattern', key: 'dedup-test', insight: 'new version', confidence: 8, source: 'observed', ts: '2026-03-28T00:00:00Z' });
runLog(old);
runLog(newer);
const output = runSearch();
expect(output).toContain('new version');
expect(output).not.toContain('old version');
expect(output).toContain('1 loaded');
});
test('filters by --type', () => {
runLog('{"skill":"review","type":"pattern","key":"p1","insight":"a pattern","confidence":7,"source":"observed"}');
runLog('{"skill":"review","type":"pitfall","key":"p2","insight":"a pitfall","confidence":7,"source":"observed"}');
const patternOnly = runSearch('--type pattern');
expect(patternOnly).toContain('p1');
expect(patternOnly).not.toContain('p2');
});
test('filters by --query', () => {
runLog('{"skill":"review","type":"pattern","key":"auth-bypass","insight":"check session tokens","confidence":7,"source":"observed"}');
runLog('{"skill":"review","type":"pattern","key":"n-plus-one","insight":"use includes for associations","confidence":7,"source":"observed"}');
const authOnly = runSearch('--query auth');
expect(authOnly).toContain('auth-bypass');
expect(authOnly).not.toContain('n-plus-one');
});
test('respects --limit', () => {
for (let i = 0; i < 5; i++) {
runLog(`{"skill":"review","type":"pattern","key":"limit-${i}","insight":"insight ${i}","confidence":7,"source":"observed"}`);
}
const limited = runSearch('--limit 2');
// Should show 2, not 5
expect(limited).toContain('2 loaded');
});
test('applies confidence decay for observed/inferred sources', () => {
// Entry from 90 days ago with source=observed, confidence=8
// Should decay to 8 - floor(90/30) = 8 - 3 = 5
const ts = new Date(Date.now() - 90 * 86400000).toISOString();
runLog(`{"skill":"review","type":"pattern","key":"decay-test","insight":"old observation","confidence":8,"source":"observed","ts":"${ts}"}`);
const output = runSearch();
// Should show confidence 5 (decayed from 8)
expect(output).toContain('confidence: 5/10');
});
test('does NOT decay user-stated learnings', () => {
const ts = new Date(Date.now() - 90 * 86400000).toISOString();
runLog(`{"skill":"review","type":"preference","key":"no-decay-test","insight":"user preference","confidence":9,"source":"user-stated","ts":"${ts}"}`);
const output = runSearch();
// Should still show confidence 9 (no decay for user-stated)
expect(output).toContain('confidence: 9/10');
});
test('skips malformed JSONL lines gracefully', () => {
// Write a valid entry, then manually append a bad line
runLog('{"skill":"review","type":"pattern","key":"valid-entry","insight":"valid","confidence":7,"source":"observed"}');
const f = findLearningsFile();
expect(f).not.toBeNull();
fs.appendFileSync(f!, '\nthis is not json\n');
fs.appendFileSync(f!, '{"skill":"review","type":"pattern","key":"also-valid","insight":"also valid","confidence":6,"source":"observed","ts":"2026-03-28T00:00:00Z"}\n');
const output = runSearch();
expect(output).toContain('valid-entry');
expect(output).toContain('also-valid');
});
});
describe('gstack-learnings-log edge cases', () => {
test('preserves existing timestamp when ts is present', () => {
const input = '{"skill":"review","type":"pattern","key":"ts-preserve","insight":"test","confidence":5,"source":"observed","ts":"2025-06-15T10:00:00Z"}';
runLog(input);
const f = findLearningsFile();
expect(f).not.toBeNull();
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
expect(parsed.ts).toBe('2025-06-15T10:00:00Z');
});
test('handles JSON with special characters in insight', () => {
const input = JSON.stringify({ skill: 'review', type: 'pattern', key: 'special-chars', insight: 'Use "quotes" and \\backslashes', confidence: 7, source: 'observed' });
runLog(input);
const f = findLearningsFile();
expect(f).not.toBeNull();
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
expect(parsed.insight).toContain('quotes');
expect(parsed.insight).toContain('backslashes');
});
test('handles JSON with files array field', () => {
const input = JSON.stringify({ skill: 'review', type: 'architecture', key: 'with-files', insight: 'test', confidence: 8, source: 'observed', files: ['src/auth.ts', 'src/db.ts'] });
runLog(input);
const f = findLearningsFile();
expect(f).not.toBeNull();
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
expect(parsed.files).toEqual(['src/auth.ts', 'src/db.ts']);
});
});
describe('gstack-learnings-search edge cases', () => {
test('sorts by confidence then recency', () => {
// Two entries: one high confidence old, one lower confidence recent
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'high-conf', insight: 'high confidence entry', confidence: 9, source: 'user-stated', ts: '2026-01-01T00:00:00Z' }));
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'recent', insight: 'recent entry', confidence: 5, source: 'observed', ts: '2026-03-28T00:00:00Z' }));
const output = runSearch();
const highIdx = output.indexOf('high-conf');
const recentIdx = output.indexOf('recent');
// High confidence should appear first
expect(highIdx).toBeLessThan(recentIdx);
});
test('groups output by type', () => {
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'p1', insight: 'a pattern', confidence: 7, source: 'observed' }));
runLog(JSON.stringify({ skill: 'review', type: 'pitfall', key: 'pit1', insight: 'a pitfall', confidence: 7, source: 'observed' }));
const output = runSearch();
expect(output).toContain('## Patterns');
expect(output).toContain('## Pitfalls');
});
test('combined --type and --query filtering', () => {
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'auth-token', insight: 'check token expiry', confidence: 7, source: 'observed' }));
runLog(JSON.stringify({ skill: 'review', type: 'pitfall', key: 'auth-leak', insight: 'auth token in logs', confidence: 7, source: 'observed' }));
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'cache-key', insight: 'cache invalidation', confidence: 7, source: 'observed' }));
const output = runSearch('--type pattern --query auth');
expect(output).toContain('auth-token');
expect(output).not.toContain('auth-leak'); // wrong type
expect(output).not.toContain('cache-key'); // wrong query
});
test('entries with missing key or type are skipped', () => {
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'valid', insight: 'valid entry', confidence: 7, source: 'observed' }));
const f = findLearningsFile();
expect(f).not.toBeNull();
// Append entries missing key and type
fs.appendFileSync(f!, JSON.stringify({ skill: 'review', type: 'pattern', insight: 'no key', confidence: 7, source: 'observed' }) + '\n');
fs.appendFileSync(f!, JSON.stringify({ skill: 'review', key: 'no-type', insight: 'no type', confidence: 7, source: 'observed' }) + '\n');
const output = runSearch();
expect(output).toContain('valid');
expect(output).not.toContain('no key');
expect(output).not.toContain('no-type');
});
test('confidence decay floors at 0 (never negative)', () => {
// Entry from 1 year ago with confidence 3 — decay would be 12, clamped to 0
const ts = new Date(Date.now() - 365 * 86400000).toISOString();
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'ancient', insight: 'very old', confidence: 3, source: 'observed', ts }));
const output = runSearch();
expect(output).toContain('confidence: 0/10');
});
});
+515
View File
@@ -0,0 +1,515 @@
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import { execSync } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
const ROOT = path.resolve(import.meta.dir, '..');
const BIN = path.join(ROOT, 'bin');
let tmpDir: string;
let skillsDir: string;
let installDir: string;
function run(cmd: string, env: Record<string, string> = {}, expectFail = false): string {
try {
return execSync(cmd, {
cwd: ROOT,
env: { ...process.env, GSTACK_STATE_DIR: tmpDir, ...env },
encoding: 'utf-8',
timeout: 10000,
stdio: ['pipe', 'pipe', 'pipe'],
}).trim();
} catch (e: any) {
if (expectFail) return (e.stderr || e.stdout || '').toString().trim();
throw e;
}
}
// Create a mock gstack install directory with skill subdirs
function setupMockInstall(skills: string[]): void {
installDir = path.join(tmpDir, 'gstack-install');
skillsDir = path.join(tmpDir, 'skills');
fs.mkdirSync(installDir, { recursive: true });
fs.mkdirSync(skillsDir, { recursive: true });
// Copy the real gstack-config and gstack-relink to the mock install
const mockBin = path.join(installDir, 'bin');
fs.mkdirSync(mockBin, { recursive: true });
fs.copyFileSync(path.join(BIN, 'gstack-config'), path.join(mockBin, 'gstack-config'));
fs.chmodSync(path.join(mockBin, 'gstack-config'), 0o755);
if (fs.existsSync(path.join(BIN, 'gstack-relink'))) {
fs.copyFileSync(path.join(BIN, 'gstack-relink'), path.join(mockBin, 'gstack-relink'));
fs.chmodSync(path.join(mockBin, 'gstack-relink'), 0o755);
}
if (fs.existsSync(path.join(BIN, 'gstack-patch-names'))) {
fs.copyFileSync(path.join(BIN, 'gstack-patch-names'), path.join(mockBin, 'gstack-patch-names'));
fs.chmodSync(path.join(mockBin, 'gstack-patch-names'), 0o755);
}
// Create mock skill directories with proper frontmatter
for (const skill of skills) {
fs.mkdirSync(path.join(installDir, skill), { recursive: true });
fs.writeFileSync(
path.join(installDir, skill, 'SKILL.md'),
`---\nname: ${skill}\ndescription: test\n---\n# ${skill}`
);
}
}
beforeEach(() => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-relink-test-'));
});
afterEach(() => {
fs.rmSync(tmpDir, { recursive: true, force: true });
});
describe('gstack-relink (#578)', () => {
// Test 11: prefixed symlinks when skill_prefix=true
test('creates gstack-* symlinks when skill_prefix=true', () => {
setupMockInstall(['qa', 'ship', 'review']);
// Set config to prefix mode (pass install/skills env so auto-relink uses mock install)
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
// Run relink with env pointing to the mock install
const output = run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
// Verify gstack-* symlinks exist
expect(fs.existsSync(path.join(skillsDir, 'gstack-qa'))).toBe(true);
expect(fs.existsSync(path.join(skillsDir, 'gstack-ship'))).toBe(true);
expect(fs.existsSync(path.join(skillsDir, 'gstack-review'))).toBe(true);
expect(output).toContain('gstack-');
});
// Test 12: flat symlinks when skill_prefix=false
test('creates flat symlinks when skill_prefix=false', () => {
setupMockInstall(['qa', 'ship', 'review']);
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
const output = run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
expect(fs.existsSync(path.join(skillsDir, 'qa'))).toBe(true);
expect(fs.existsSync(path.join(skillsDir, 'ship'))).toBe(true);
expect(fs.existsSync(path.join(skillsDir, 'review'))).toBe(true);
expect(output).toContain('flat');
});
// REGRESSION: unprefixed skills must be real directories, not symlinks (#761)
// Claude Code auto-prefixes skills nested under a parent dir symlink.
// e.g., `qa -> gstack/qa` gets discovered as "gstack-qa", not "qa".
// The fix: create real directories with SKILL.md symlinks inside.
test('unprefixed skills are real directories with SKILL.md symlinks, not dir symlinks', () => {
setupMockInstall(['qa', 'ship', 'review', 'plan-ceo-review']);
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
for (const skill of ['qa', 'ship', 'review', 'plan-ceo-review']) {
const skillPath = path.join(skillsDir, skill);
const skillMdPath = path.join(skillPath, 'SKILL.md');
// Must be a real directory, NOT a symlink
expect(fs.lstatSync(skillPath).isDirectory()).toBe(true);
expect(fs.lstatSync(skillPath).isSymbolicLink()).toBe(false);
// Must contain a SKILL.md that IS a symlink
expect(fs.existsSync(skillMdPath)).toBe(true);
expect(fs.lstatSync(skillMdPath).isSymbolicLink()).toBe(true);
// The SKILL.md symlink must point to the source skill's SKILL.md
const target = fs.readlinkSync(skillMdPath);
expect(target).toContain(skill);
expect(target).toEndWith('/SKILL.md');
}
});
// Same invariant for prefixed mode
test('prefixed skills are real directories with SKILL.md symlinks, not dir symlinks', () => {
setupMockInstall(['qa', 'ship']);
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
for (const skill of ['gstack-qa', 'gstack-ship']) {
const skillPath = path.join(skillsDir, skill);
const skillMdPath = path.join(skillPath, 'SKILL.md');
expect(fs.lstatSync(skillPath).isDirectory()).toBe(true);
expect(fs.lstatSync(skillPath).isSymbolicLink()).toBe(false);
expect(fs.lstatSync(skillMdPath).isSymbolicLink()).toBe(true);
}
});
// Upgrade: old directory symlinks get replaced with real directories
test('upgrades old directory symlinks to real directories', () => {
setupMockInstall(['qa', 'ship']);
// Simulate old behavior: create directory symlinks (the old pattern)
fs.symlinkSync(path.join(installDir, 'qa'), path.join(skillsDir, 'qa'));
fs.symlinkSync(path.join(installDir, 'ship'), path.join(skillsDir, 'ship'));
// Verify they start as symlinks
expect(fs.lstatSync(path.join(skillsDir, 'qa')).isSymbolicLink()).toBe(true);
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
// After relink: must be real directories, not symlinks
expect(fs.lstatSync(path.join(skillsDir, 'qa')).isSymbolicLink()).toBe(false);
expect(fs.lstatSync(path.join(skillsDir, 'qa')).isDirectory()).toBe(true);
expect(fs.lstatSync(path.join(skillsDir, 'qa', 'SKILL.md')).isSymbolicLink()).toBe(true);
});
// FIRST INSTALL: --no-prefix must create ONLY flat names, zero gstack-* pollution
test('first install --no-prefix: only flat names exist, zero gstack-* entries', () => {
setupMockInstall(['qa', 'ship', 'review', 'plan-ceo-review', 'gstack-upgrade']);
// Simulate first install: no saved config, pass --no-prefix equivalent
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
// Enumerate everything in skills dir
const entries = fs.readdirSync(skillsDir);
// Expected: qa, ship, review, plan-ceo-review, gstack-upgrade (its real name)
expect(entries.sort()).toEqual(['gstack-upgrade', 'plan-ceo-review', 'qa', 'review', 'ship']);
// No gstack-qa, gstack-ship, gstack-review, gstack-plan-ceo-review
const leaked = entries.filter(e => e.startsWith('gstack-') && e !== 'gstack-upgrade');
expect(leaked).toEqual([]);
});
// FIRST INSTALL: --prefix must create ONLY gstack-* names, zero flat-name pollution
test('first install --prefix: only gstack-* entries exist, zero flat names', () => {
setupMockInstall(['qa', 'ship', 'review', 'plan-ceo-review', 'gstack-upgrade']);
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
const entries = fs.readdirSync(skillsDir);
// Expected: gstack-qa, gstack-ship, gstack-review, gstack-plan-ceo-review, gstack-upgrade
expect(entries.sort()).toEqual([
'gstack-plan-ceo-review', 'gstack-qa', 'gstack-review', 'gstack-ship', 'gstack-upgrade',
]);
// No unprefixed qa, ship, review, plan-ceo-review
const leaked = entries.filter(e => !e.startsWith('gstack-'));
expect(leaked).toEqual([]);
});
// FIRST INSTALL: non-TTY (no saved config, piped stdin) defaults to flat names
test('non-TTY first install defaults to flat names via relink', () => {
setupMockInstall(['qa', 'ship']);
// Don't set any config — simulate fresh install
// gstack-relink reads config; on fresh install config returns empty → defaults to false
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
const entries = fs.readdirSync(skillsDir);
// Should be flat names (relink defaults to false when config returns empty)
expect(entries.sort()).toEqual(['qa', 'ship']);
});
// SWITCH: prefix → no-prefix must clean up ALL gstack-* entries
test('switching prefix to no-prefix removes all gstack-* entries completely', () => {
setupMockInstall(['qa', 'ship', 'review', 'plan-ceo-review', 'gstack-upgrade']);
// Start in prefix mode
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
let entries = fs.readdirSync(skillsDir);
expect(entries.filter(e => !e.startsWith('gstack-'))).toEqual([]);
// Switch to no-prefix
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
entries = fs.readdirSync(skillsDir);
// Only flat names + gstack-upgrade (its real name)
expect(entries.sort()).toEqual(['gstack-upgrade', 'plan-ceo-review', 'qa', 'review', 'ship']);
const leaked = entries.filter(e => e.startsWith('gstack-') && e !== 'gstack-upgrade');
expect(leaked).toEqual([]);
});
// SWITCH: no-prefix → prefix must clean up ALL flat entries
test('switching no-prefix to prefix removes all flat entries completely', () => {
setupMockInstall(['qa', 'ship', 'review', 'gstack-upgrade']);
// Start in no-prefix mode
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
let entries = fs.readdirSync(skillsDir);
expect(entries.filter(e => e.startsWith('gstack-') && e !== 'gstack-upgrade')).toEqual([]);
// Switch to prefix
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
entries = fs.readdirSync(skillsDir);
// Only gstack-* names
expect(entries.sort()).toEqual([
'gstack-qa', 'gstack-review', 'gstack-ship', 'gstack-upgrade',
]);
const leaked = entries.filter(e => !e.startsWith('gstack-'));
expect(leaked).toEqual([]);
});
// Test 13: cleans stale symlinks from opposite mode
test('cleans up stale symlinks from opposite mode', () => {
setupMockInstall(['qa', 'ship']);
// Create prefixed symlinks first
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
expect(fs.existsSync(path.join(skillsDir, 'gstack-qa'))).toBe(true);
// Switch to flat mode
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
// Flat symlinks should exist, prefixed should be gone
expect(fs.existsSync(path.join(skillsDir, 'qa'))).toBe(true);
expect(fs.existsSync(path.join(skillsDir, 'gstack-qa'))).toBe(false);
});
// Test 14: error when install dir missing
test('prints error when install dir missing', () => {
const output = run(`${BIN}/gstack-relink`, {
GSTACK_INSTALL_DIR: '/nonexistent/path/gstack',
GSTACK_SKILLS_DIR: '/nonexistent/path/skills',
}, true);
expect(output).toContain('setup');
});
// Test: gstack-upgrade does NOT get double-prefixed
test('does not double-prefix gstack-upgrade directory', () => {
setupMockInstall(['qa', 'ship', 'gstack-upgrade']);
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
// gstack-upgrade should keep its name, NOT become gstack-gstack-upgrade
expect(fs.existsSync(path.join(skillsDir, 'gstack-upgrade'))).toBe(true);
expect(fs.existsSync(path.join(skillsDir, 'gstack-gstack-upgrade'))).toBe(false);
// Regular skills still get prefixed
expect(fs.existsSync(path.join(skillsDir, 'gstack-qa'))).toBe(true);
});
// Test 15: gstack-config set skill_prefix triggers relink
test('gstack-config set skill_prefix triggers relink', () => {
setupMockInstall(['qa', 'ship']);
// Run gstack-config set which should auto-trigger relink
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
// If relink was triggered, symlinks should exist
expect(fs.existsSync(path.join(skillsDir, 'gstack-qa'))).toBe(true);
expect(fs.existsSync(path.join(skillsDir, 'gstack-ship'))).toBe(true);
});
});
describe('upgrade migrations', () => {
const MIGRATIONS_DIR = path.join(ROOT, 'gstack-upgrade', 'migrations');
test('migrations directory exists', () => {
expect(fs.existsSync(MIGRATIONS_DIR)).toBe(true);
});
test('all migration scripts are executable and parse without syntax errors', () => {
const scripts = fs.readdirSync(MIGRATIONS_DIR).filter(f => f.endsWith('.sh'));
expect(scripts.length).toBeGreaterThan(0);
for (const script of scripts) {
const fullPath = path.join(MIGRATIONS_DIR, script);
// Must be executable
const stat = fs.statSync(fullPath);
expect(stat.mode & 0o111).toBeGreaterThan(0);
// Must parse without syntax errors (bash -n is a syntax check, doesn't execute)
const result = execSync(`bash -n "${fullPath}" 2>&1`, { encoding: 'utf-8', timeout: 5000 });
// bash -n outputs nothing on success
}
});
test('migration filenames follow v{VERSION}.sh pattern', () => {
const scripts = fs.readdirSync(MIGRATIONS_DIR).filter(f => f.endsWith('.sh'));
for (const script of scripts) {
expect(script).toMatch(/^v\d+\.\d+\.\d+\.\d+\.sh$/);
}
});
test('v0.15.2.0 migration runs gstack-relink', () => {
const content = fs.readFileSync(path.join(MIGRATIONS_DIR, 'v0.15.2.0.sh'), 'utf-8');
expect(content).toContain('gstack-relink');
});
test('v0.15.2.0 migration fixes stale directory symlinks', () => {
setupMockInstall(['qa', 'ship', 'review']);
// Simulate old state: directory symlinks (pre-v0.15.2.0 pattern)
fs.symlinkSync(path.join(installDir, 'qa'), path.join(skillsDir, 'qa'));
fs.symlinkSync(path.join(installDir, 'ship'), path.join(skillsDir, 'ship'));
fs.symlinkSync(path.join(installDir, 'review'), path.join(skillsDir, 'review'));
// Set no-prefix mode (suppress auto-relink so symlinks stay intact for the test)
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
GSTACK_SETUP_RUNNING: '1',
});
// Verify old state: symlinks
expect(fs.lstatSync(path.join(skillsDir, 'qa')).isSymbolicLink()).toBe(true);
// Run the migration (it calls gstack-relink internally)
run(`bash ${path.join(MIGRATIONS_DIR, 'v0.15.2.0.sh')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
// After migration: real directories with SKILL.md symlinks
for (const skill of ['qa', 'ship', 'review']) {
const skillPath = path.join(skillsDir, skill);
expect(fs.lstatSync(skillPath).isSymbolicLink()).toBe(false);
expect(fs.lstatSync(skillPath).isDirectory()).toBe(true);
expect(fs.lstatSync(path.join(skillPath, 'SKILL.md')).isSymbolicLink()).toBe(true);
}
});
});
describe('gstack-patch-names (#620/#578)', () => {
// Helper to read name: from SKILL.md frontmatter
function readSkillName(skillDir: string): string | null {
const content = fs.readFileSync(path.join(skillDir, 'SKILL.md'), 'utf-8');
const match = content.match(/^name:\s*(.+)$/m);
return match ? match[1].trim() : null;
}
test('prefix=true patches name: field in SKILL.md', () => {
setupMockInstall(['qa', 'ship', 'review']);
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
// Verify name: field is patched with gstack- prefix
expect(readSkillName(path.join(installDir, 'qa'))).toBe('gstack-qa');
expect(readSkillName(path.join(installDir, 'ship'))).toBe('gstack-ship');
expect(readSkillName(path.join(installDir, 'review'))).toBe('gstack-review');
});
test('prefix=false restores name: field in SKILL.md', () => {
setupMockInstall(['qa', 'ship']);
// First, prefix them
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
expect(readSkillName(path.join(installDir, 'qa'))).toBe('gstack-qa');
// Now switch to flat mode
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
// Verify name: field is restored to unprefixed
expect(readSkillName(path.join(installDir, 'qa'))).toBe('qa');
expect(readSkillName(path.join(installDir, 'ship'))).toBe('ship');
});
test('gstack-upgrade name: not double-prefixed', () => {
setupMockInstall(['qa', 'gstack-upgrade']);
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
// gstack-upgrade should keep its name, NOT become gstack-gstack-upgrade
expect(readSkillName(path.join(installDir, 'gstack-upgrade'))).toBe('gstack-upgrade');
// Regular skill should be prefixed
expect(readSkillName(path.join(installDir, 'qa'))).toBe('gstack-qa');
});
test('SKILL.md without frontmatter is a no-op', () => {
setupMockInstall(['qa']);
// Overwrite qa SKILL.md with no frontmatter
fs.writeFileSync(path.join(installDir, 'qa', 'SKILL.md'), '# qa\nSome content.');
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
// Should not crash
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
GSTACK_INSTALL_DIR: installDir,
GSTACK_SKILLS_DIR: skillsDir,
});
// Content should be unchanged (no name: to patch)
const content = fs.readFileSync(path.join(installDir, 'qa', 'SKILL.md'), 'utf-8');
expect(content).toBe('# qa\nSome content.');
});
});
+78 -30
View File
@@ -20,6 +20,7 @@ let tmpDir: string;
describeIfSelected('Skill E2E tests', [
'browse-basic', 'browse-snapshot', 'skillmd-setup-discovery',
'skillmd-no-local-binary', 'skillmd-outside-git', 'session-awareness',
'operational-learning',
], () => {
beforeAll(() => {
testServer = startTestServer();
@@ -177,49 +178,96 @@ Report the exact output — either "READY: <path>" or "NEEDS_SETUP".`,
try { fs.rmSync(nonGitDir, { recursive: true, force: true }); } catch {}
}, 60_000);
testConcurrentIfSelected('contributor-mode', async () => {
const contribDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-contrib-'));
const logsDir = path.join(contribDir, 'contributor-logs');
fs.mkdirSync(logsDir, { recursive: true });
testConcurrentIfSelected('operational-learning', async () => {
const opDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-oplearn-'));
const gstackHome = path.join(opDir, '.gstack-home');
// Init git repo
const run = (cmd: string, args: string[]) =>
spawnSync(cmd, args, { cwd: opDir, stdio: 'pipe', timeout: 5000 });
run('git', ['init', '-b', 'main']);
run('git', ['config', 'user.email', 'test@test.com']);
run('git', ['config', 'user.name', 'Test']);
fs.writeFileSync(path.join(opDir, 'app.ts'), 'console.log("hello");\n');
run('git', ['add', '.']);
run('git', ['commit', '-m', 'initial']);
// Copy bin scripts
const binDir = path.join(opDir, 'bin');
fs.mkdirSync(binDir, { recursive: true });
for (const script of ['gstack-learnings-log', 'gstack-slug']) {
fs.copyFileSync(path.join(ROOT, 'bin', script), path.join(binDir, script));
fs.chmodSync(path.join(binDir, script), 0o755);
}
// gstack-learnings-log will create the project dir automatically via gstack-slug
const result = await runSkillTest({
prompt: `You are in contributor mode (gstack_contributor=true). You just ran this browse command and it failed:
prompt: `You just ran \`npm test\` in this project and it failed with this error:
$ /nonexistent/browse goto https://example.com
/nonexistent/browse: No such file or directory
Error: --experimental-vm-modules flag is required for ESM support in this project.
Run: npm test --experimental-vm-modules
Per the contributor mode instructions, file a field report to ${logsDir}/browse-missing-binary.md using the Write tool. Include all required sections: title, what you tried, what happened, rating, repro steps, raw output, what would make it a 10, and the date/version footer.`,
workingDirectory: contribDir,
Per the Operational Self-Improvement instructions below, log an operational learning about this failure.
## Operational Self-Improvement
Before completing, reflect on this session:
- Did any commands fail unexpectedly?
If yes, log an operational learning for future sessions:
\`\`\`bash
GSTACK_HOME="${gstackHome}" ${binDir}/gstack-learnings-log '{"skill":"qa","type":"operational","key":"SHORT_KEY","insight":"DESCRIPTION","confidence":N,"source":"observed"}'
\`\`\`
Replace SHORT_KEY with a kebab-case key like "esm-vm-modules-flag".
Replace DESCRIPTION with a one-sentence description of what you learned.
Replace N with a confidence score 1-10.
Log the operational learning now. Then say what you logged.`,
workingDirectory: opDir,
maxTurns: 5,
timeout: 30_000,
testName: 'contributor-mode',
testName: 'operational-learning',
runId,
});
logCost('contributor mode', result);
// Override passed: this test intentionally triggers a browse error (nonexistent binary)
// so browseErrors will be non-empty — that's expected, not a failure
recordE2E(evalCollector, 'contributor mode report', 'Skill E2E tests', result, {
passed: result.exitReason === 'success',
logCost('operational learning', result);
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
// Check if learnings file was created with an operational entry
// The slug is derived from the git repo (dirname), so search all project dirs
let hasOperational = false;
const projectsDir = path.join(gstackHome, 'projects');
if (fs.existsSync(projectsDir)) {
for (const slug of fs.readdirSync(projectsDir)) {
const lPath = path.join(projectsDir, slug, 'learnings.jsonl');
if (fs.existsSync(lPath)) {
const jsonl = fs.readFileSync(lPath, 'utf-8').trim();
if (jsonl) {
const entries = jsonl.split('\n').map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
const opEntry = entries.find(e => e.type === 'operational');
if (opEntry) {
hasOperational = true;
console.log(`Operational learning logged: key="${opEntry.key}" insight="${opEntry.insight}" (slug: ${slug})`);
break;
}
}
}
}
}
recordE2E(evalCollector, 'operational learning', 'Skill E2E tests', result, {
passed: exitOk && hasOperational,
});
// Verify a contributor log was created with expected format
const logFiles = fs.readdirSync(logsDir).filter(f => f.endsWith('.md'));
expect(logFiles.length).toBeGreaterThan(0);
// Verify report has key structural sections (agent may phrase differently)
const logContent = fs.readFileSync(path.join(logsDir, logFiles[0]), 'utf-8');
// Must have a title (# heading)
expect(logContent).toMatch(/^#\s/m);
// Must mention the failed command or browse
expect(logContent).toMatch(/browse|nonexistent|not found|no such file/i);
// Must have some kind of rating
expect(logContent).toMatch(/rating|\/10/i);
// Must have steps or reproduction info
expect(logContent).toMatch(/step|repro|reproduce/i);
expect(exitOk).toBe(true);
expect(hasOperational).toBe(true);
// Clean up
try { fs.rmSync(contribDir, { recursive: true, force: true }); } catch {}
try { fs.rmSync(opDir, { recursive: true, force: true }); } catch {}
}, 90_000);
testConcurrentIfSelected('session-awareness', async () => {
+138
View File
@@ -0,0 +1,138 @@
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import { runSkillTest } from './helpers/session-runner';
import {
ROOT, runId, evalsEnabled,
describeIfSelected, testConcurrentIfSelected,
copyDirSync, logCost, recordE2E,
createEvalCollector, finalizeEvalCollector,
} from './helpers/e2e-helpers';
import { spawnSync } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
const evalCollector = createEvalCollector('e2e-learnings');
// --- Learnings E2E: seed learnings, run /learn, verify output ---
describeIfSelected('Learnings E2E', ['learnings-show'], () => {
let workDir: string;
let gstackHome: string;
beforeAll(() => {
workDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-learnings-'));
gstackHome = path.join(workDir, '.gstack-home');
// Init git repo
const run = (cmd: string, args: string[]) =>
spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 });
run('git', ['init', '-b', 'main']);
run('git', ['config', 'user.email', 'test@test.com']);
run('git', ['config', 'user.name', 'Test']);
fs.writeFileSync(path.join(workDir, 'app.ts'), 'console.log("hello");\n');
run('git', ['add', '.']);
run('git', ['commit', '-m', 'initial']);
// Copy the /learn skill
copyDirSync(path.join(ROOT, 'learn'), path.join(workDir, 'learn'));
// Copy bin scripts needed by /learn
const binDir = path.join(workDir, 'bin');
fs.mkdirSync(binDir, { recursive: true });
for (const script of ['gstack-learnings-search', 'gstack-learnings-log', 'gstack-slug']) {
fs.copyFileSync(path.join(ROOT, 'bin', script), path.join(binDir, script));
fs.chmodSync(path.join(binDir, script), 0o755);
}
// Seed learnings JSONL — slug must match what gstack-slug computes.
// With no git remote, gstack-slug falls back to basename(workDir).
const slug = path.basename(workDir).replace(/[^a-zA-Z0-9._-]/g, '');
const projectDir = path.join(gstackHome, 'projects', slug);
fs.mkdirSync(projectDir, { recursive: true });
const learnings = [
{
skill: 'review', type: 'pattern', key: 'n-plus-one-queries',
insight: 'ActiveRecord associations in loops cause N+1 queries. Always use includes/preload.',
confidence: 9, source: 'observed', ts: new Date().toISOString(),
files: ['app/models/user.rb'],
},
{
skill: 'investigate', type: 'pitfall', key: 'stale-cache-after-deploy',
insight: 'Redis cache not invalidated on deploy causes stale data for 5 minutes.',
confidence: 7, source: 'observed', ts: new Date().toISOString(),
files: ['config/redis.yml'],
},
{
skill: 'ship', type: 'preference', key: 'always-run-rubocop',
insight: 'User wants rubocop to run before every commit, no exceptions.',
confidence: 10, source: 'user-stated', ts: new Date().toISOString(),
},
{
skill: 'qa', type: 'operational', key: 'test-timeout-flag',
insight: 'bun test requires --timeout 30000 for E2E tests in this project.',
confidence: 9, source: 'observed', ts: new Date().toISOString(),
},
];
fs.writeFileSync(
path.join(projectDir, 'learnings.jsonl'),
learnings.map(l => JSON.stringify(l)).join('\n') + '\n',
);
});
afterAll(() => {
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
finalizeEvalCollector(evalCollector);
});
testConcurrentIfSelected('learnings-show', async () => {
const result = await runSkillTest({
prompt: `Read the file learn/SKILL.md for the /learn skill instructions.
Run the /learn command (no arguments — show recent learnings).
IMPORTANT:
- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
Replace any references to ~/.claude/skills/gstack/bin/ with ./bin/ when running commands.
- Replace any references to ~/.claude/skills/gstack/bin/gstack-slug with ./bin/gstack-slug.
- Do NOT use AskUserQuestion.
- Do NOT implement code changes.
- Just show the learnings and summarize what you found.`,
workingDirectory: workDir,
maxTurns: 15,
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
timeout: 120_000,
testName: 'learnings-show',
runId,
});
logCost('/learn show', result);
const output = result.output.toLowerCase();
// The agent should have found and displayed the seeded learnings
const mentionsNPlusOne = output.includes('n-plus-one') || output.includes('n+1');
const mentionsCache = output.includes('stale') || output.includes('cache');
const mentionsRubocop = output.includes('rubocop');
// At least 2 of 3 learnings should appear in the output
const foundCount = [mentionsNPlusOne, mentionsCache, mentionsRubocop].filter(Boolean).length;
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
recordE2E(evalCollector, '/learn', 'Learnings show E2E', result, {
passed: exitOk && foundCount >= 2,
});
expect(exitOk).toBe(true);
expect(foundCount).toBeGreaterThanOrEqual(2);
if (foundCount === 3) {
console.log('All 3 seeded learnings found in output');
} else {
console.warn(`Only ${foundCount}/3 learnings found (N+1: ${mentionsNPlusOne}, cache: ${mentionsCache}, rubocop: ${mentionsRubocop})`);
}
}, 180_000);
});
+562
View File
@@ -0,0 +1,562 @@
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import { runSkillTest } from './helpers/session-runner';
import {
ROOT, runId, describeIfSelected, testConcurrentIfSelected,
logCost, recordE2E, createEvalCollector, finalizeEvalCollector,
} from './helpers/e2e-helpers';
import { spawnSync } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
const evalCollector = createEvalCollector('e2e-review-army');
// Helper: create a git repo with a feature branch
function setupRepo(prefix: string): { dir: string; run: (cmd: string, args: string[]) => void } {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), `skill-e2e-${prefix}-`));
const run = (cmd: string, args: string[]) =>
spawnSync(cmd, args, { cwd: dir, stdio: 'pipe', timeout: 5000 });
run('git', ['init', '-b', 'main']);
run('git', ['config', 'user.email', 'test@test.com']);
run('git', ['config', 'user.name', 'Test']);
return { dir, run };
}
// Helper: copy review skill files to test dir
function copyReviewFiles(dir: string) {
fs.copyFileSync(path.join(ROOT, 'review', 'SKILL.md'), path.join(dir, 'review-SKILL.md'));
fs.copyFileSync(path.join(ROOT, 'review', 'checklist.md'), path.join(dir, 'review-checklist.md'));
fs.copyFileSync(path.join(ROOT, 'review', 'greptile-triage.md'), path.join(dir, 'review-greptile-triage.md'));
// Copy specialist checklists
const specDir = path.join(dir, 'review-specialists');
fs.mkdirSync(specDir, { recursive: true });
const specialistsRoot = path.join(ROOT, 'review', 'specialists');
for (const f of fs.readdirSync(specialistsRoot)) {
fs.copyFileSync(path.join(specialistsRoot, f), path.join(specDir, f));
}
}
// --- Review Army: Migration Safety ---
describeIfSelected('Review Army: Migration Safety', ['review-army-migration-safety'], () => {
let dir: string;
beforeAll(() => {
const repo = setupRepo('army-migration');
dir = repo.dir;
// Base commit
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'initial']);
// Feature branch with unsafe migration
repo.run('git', ['checkout', '-b', 'feature/drop-columns']);
fs.mkdirSync(path.join(dir, 'db', 'migrate'), { recursive: true });
const migrationContent = fs.readFileSync(
path.join(ROOT, 'test', 'fixtures', 'review-army-migration.sql'), 'utf-8'
);
fs.writeFileSync(path.join(dir, 'db', 'migrate', '20260330_drop_columns.sql'), migrationContent);
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'drop email and phone columns']);
copyReviewFiles(dir);
});
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
testConcurrentIfSelected('review-army-migration-safety', async () => {
const result = await runSkillTest({
prompt: `You are in a git repo on a feature branch with a database migration that drops columns.
Read review-SKILL.md for instructions. Also read review-checklist.md.
The specialist checklists are in review-specialists/ (testing.md, security.md, performance.md, data-migration.md, etc.).
Skip the preamble, lake intro, telemetry sections.
Run Step 4 (Critical pass) then Step 4.5 (Review Army — Specialist Dispatch).
The base branch is main. Run gstack-diff-scope style analysis on the changed files.
Since db/migrate/ files changed, the Data Migration specialist should activate.
For the specialist dispatch, instead of launching subagents, just read review-specialists/data-migration.md
and apply it yourself against the diff (git diff main...HEAD).
Write your findings to ${dir}/review-output.md`,
workingDirectory: dir,
maxTurns: 20,
timeout: 180_000,
testName: 'review-army-migration-safety',
runId,
});
logCost('/review army migration', result);
recordE2E(evalCollector, '/review army migration safety', 'Review Army', result);
expect(result.exitReason).toBe('success');
// Verify migration issues were caught
const outputPath = path.join(dir, 'review-output.md');
if (fs.existsSync(outputPath)) {
const content = fs.readFileSync(outputPath, 'utf-8').toLowerCase();
const hasMigrationFinding =
content.includes('drop') ||
content.includes('data loss') ||
content.includes('reversib') ||
content.includes('migration') ||
content.includes('column');
expect(hasMigrationFinding).toBe(true);
}
}, 210_000);
});
// --- Review Army: N+1 Performance ---
describeIfSelected('Review Army: N+1 Performance', ['review-army-perf-n-plus-one'], () => {
let dir: string;
beforeAll(() => {
const repo = setupRepo('army-n-plus-one');
dir = repo.dir;
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'initial']);
repo.run('git', ['checkout', '-b', 'feature/add-posts-index']);
const n1Content = fs.readFileSync(
path.join(ROOT, 'test', 'fixtures', 'review-army-n-plus-one.rb'), 'utf-8'
);
fs.writeFileSync(path.join(dir, 'posts_controller.rb'), n1Content);
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'add posts controller']);
copyReviewFiles(dir);
});
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
testConcurrentIfSelected('review-army-perf-n-plus-one', async () => {
const result = await runSkillTest({
prompt: `You are in a git repo on a feature branch with a Ruby controller that has N+1 queries.
Read review-SKILL.md for instructions. Also read review-checklist.md.
The specialist checklists are in review-specialists/ (testing.md, performance.md, etc.).
Skip the preamble, lake intro, telemetry sections.
Run Step 4 (Critical pass) then Step 4.5 (Review Army).
The base branch is main. This is a Ruby backend file, so Performance specialist should activate.
For the specialist dispatch, read review-specialists/performance.md and apply it against the diff.
Write your findings to ${dir}/review-output.md`,
workingDirectory: dir,
maxTurns: 20,
timeout: 180_000,
testName: 'review-army-perf-n-plus-one',
runId,
});
logCost('/review army n+1', result);
recordE2E(evalCollector, '/review army N+1 detection', 'Review Army', result);
expect(result.exitReason).toBe('success');
const outputPath = path.join(dir, 'review-output.md');
if (fs.existsSync(outputPath)) {
const content = fs.readFileSync(outputPath, 'utf-8').toLowerCase();
const hasN1Finding =
content.includes('n+1') ||
content.includes('n + 1') ||
content.includes('eager') ||
content.includes('includes') ||
content.includes('preload') ||
content.includes('query') ||
content.includes('loop');
expect(hasN1Finding).toBe(true);
}
}, 210_000);
});
// --- Review Army: Delivery Audit ---
describeIfSelected('Review Army: Delivery Audit', ['review-army-delivery-audit'], () => {
let dir: string;
beforeAll(() => {
const repo = setupRepo('army-delivery');
dir = repo.dir;
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'initial']);
repo.run('git', ['checkout', '-b', 'feature/three-features']);
// Write a plan file promising 3 features
fs.writeFileSync(path.join(dir, 'PLAN.md'), `# Feature Plan
## Implementation Items
1. Add user authentication with login/logout
2. Add user profile page with avatar upload
3. Add email notification system for new signups
## Test Items
- Test login flow
- Test profile page rendering
- Test email sending
`);
repo.run('git', ['add', 'PLAN.md']);
repo.run('git', ['commit', '-m', 'add plan']);
// Implement only 2 of 3 features
fs.writeFileSync(path.join(dir, 'auth.rb'), `class AuthController
def login
# authenticate user
session[:user_id] = user.id
end
def logout
session.delete(:user_id)
end
end
`);
fs.writeFileSync(path.join(dir, 'profile.rb'), `class ProfileController
def show
@user = User.find(params[:id])
end
def update_avatar
@user.avatar.attach(params[:avatar])
end
end
`);
// NOTE: email notification system is NOT implemented (intentionally missing)
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'implement auth and profile features']);
copyReviewFiles(dir);
});
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
testConcurrentIfSelected('review-army-delivery-audit', async () => {
const result = await runSkillTest({
prompt: `You are in a git repo on branch feature/three-features.
There is a PLAN.md file that promises 3 features: auth, profile, and email notifications.
The diff (git diff main...HEAD) only implements 2 of them (auth and profile).
Read review-SKILL.md for the review workflow. Focus on the Plan Completion Audit section.
The plan file is at ./PLAN.md. Cross-reference it against the diff.
For each plan item, classify as DONE, PARTIAL, NOT DONE, or CHANGED.
The email notification system should be classified as NOT DONE.
Write your completion audit to ${dir}/review-output.md`,
workingDirectory: dir,
maxTurns: 15,
timeout: 120_000,
testName: 'review-army-delivery-audit',
runId,
});
logCost('/review army delivery', result);
recordE2E(evalCollector, '/review army delivery audit', 'Review Army', result);
expect(result.exitReason).toBe('success');
const outputPath = path.join(dir, 'review-output.md');
if (fs.existsSync(outputPath)) {
const content = fs.readFileSync(outputPath, 'utf-8').toLowerCase();
// Should identify email notifications as NOT DONE
const hasNotDone =
content.includes('not done') ||
content.includes('not_done') ||
content.includes('missing') ||
content.includes('not implemented');
const mentionsEmail =
content.includes('email') ||
content.includes('notification');
expect(hasNotDone).toBe(true);
expect(mentionsEmail).toBe(true);
}
}, 150_000);
});
// --- Review Army: Quality Score ---
describeIfSelected('Review Army: Quality Score', ['review-army-quality-score'], () => {
let dir: string;
beforeAll(() => {
const repo = setupRepo('army-quality');
dir = repo.dir;
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'initial']);
repo.run('git', ['checkout', '-b', 'feature/add-controller']);
// Code with obvious issues for quality score computation
fs.writeFileSync(path.join(dir, 'user_controller.rb'), `class UserController
def create
# SQL injection
User.where("name = '#{params[:name]}'")
# Magic number
if users.count > 42
raise "too many"
end
end
end
`);
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'add user controller']);
copyReviewFiles(dir);
});
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
testConcurrentIfSelected('review-army-quality-score', async () => {
const result = await runSkillTest({
prompt: `You are in a git repo with a vulnerable user controller.
Read review-SKILL.md and review-checklist.md.
Skip preamble, lake intro, telemetry.
Run the Critical pass (Step 4) against the diff (git diff main...HEAD).
Then compute the PR Quality Score as described in the Review Army merge step:
quality_score = max(0, 10 - (critical_count * 2 + informational_count * 0.5))
Write your findings AND the computed quality score to ${dir}/review-output.md
Include the line: "PR Quality Score: X/10" where X is the computed score.`,
workingDirectory: dir,
maxTurns: 15,
timeout: 120_000,
testName: 'review-army-quality-score',
runId,
});
logCost('/review army quality', result);
recordE2E(evalCollector, '/review army quality score', 'Review Army', result);
expect(result.exitReason).toBe('success');
const outputPath = path.join(dir, 'review-output.md');
if (fs.existsSync(outputPath)) {
const content = fs.readFileSync(outputPath, 'utf-8');
// Should contain a quality score
const hasScore =
content.toLowerCase().includes('quality score') ||
content.match(/\d+\/10/);
expect(hasScore).toBeTruthy();
}
}, 150_000);
});
// --- Review Army: JSON Findings ---
describeIfSelected('Review Army: JSON Findings', ['review-army-json-findings'], () => {
let dir: string;
beforeAll(() => {
const repo = setupRepo('army-json');
dir = repo.dir;
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'initial']);
repo.run('git', ['checkout', '-b', 'feature/vuln']);
fs.writeFileSync(path.join(dir, 'search.rb'), `class SearchController
def index
# SQL injection via string interpolation
results = ActiveRecord::Base.connection.execute(
"SELECT * FROM products WHERE name LIKE '%#{params[:q]}%'"
)
render json: results
end
end
`);
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'add search']);
copyReviewFiles(dir);
});
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
testConcurrentIfSelected('review-army-json-findings', async () => {
const result = await runSkillTest({
prompt: `You are reviewing a git diff with a SQL injection vulnerability.
Read review-specialists/security.md for the security checklist.
Apply the checklist against this diff (git diff main...HEAD).
Output your findings as JSON objects, one per line, following the schema:
{"severity":"CRITICAL","confidence":9,"path":"search.rb","line":4,"category":"injection","summary":"SQL injection via string interpolation","fix":"Use parameterized query","fingerprint":"search.rb:4:injection","specialist":"security"}
Write ONLY JSON findings (no preamble) to ${dir}/findings.json`,
workingDirectory: dir,
maxTurns: 12,
timeout: 90_000,
testName: 'review-army-json-findings',
runId,
});
logCost('/review army json', result);
recordE2E(evalCollector, '/review army JSON findings', 'Review Army', result);
expect(result.exitReason).toBe('success');
const findingsPath = path.join(dir, 'findings.json');
if (fs.existsSync(findingsPath)) {
const content = fs.readFileSync(findingsPath, 'utf-8').trim();
const lines = content.split('\n').filter(l => l.trim());
// At least one finding
expect(lines.length).toBeGreaterThanOrEqual(1);
// Each line should be valid JSON with required fields
for (const line of lines) {
let parsed: any;
try { parsed = JSON.parse(line); } catch { continue; }
// Required fields per schema
expect(parsed).toHaveProperty('severity');
expect(parsed).toHaveProperty('confidence');
expect(parsed).toHaveProperty('path');
expect(parsed).toHaveProperty('category');
expect(parsed).toHaveProperty('summary');
expect(parsed).toHaveProperty('specialist');
break; // One valid line is enough for the gate test
}
}
}, 120_000);
});
// --- Review Army: Red Team (periodic) ---
describeIfSelected('Review Army: Red Team', ['review-army-red-team'], () => {
let dir: string;
beforeAll(() => {
const repo = setupRepo('army-redteam');
dir = repo.dir;
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'initial']);
repo.run('git', ['checkout', '-b', 'feature/large-change']);
// Create a large diff (300+ lines)
const lines: string[] = ['class LargeController'];
for (let i = 0; i < 100; i++) {
lines.push(` def method_${i}`);
lines.push(` data = params[:input_${i}]`);
lines.push(` process(data)`);
lines.push(' end');
lines.push('');
}
lines.push('end');
fs.writeFileSync(path.join(dir, 'large_controller.rb'), lines.join('\n'));
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'add large controller']);
copyReviewFiles(dir);
});
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
testConcurrentIfSelected('review-army-red-team', async () => {
const result = await runSkillTest({
prompt: `You are reviewing a large diff (300+ lines). Read review-SKILL.md.
Skip preamble, lake intro, telemetry.
The diff is large enough to activate the Red Team specialist.
Read review-specialists/red-team.md and apply it against the diff (git diff main...HEAD).
Focus on finding issues that other specialists might miss.
Write your red team findings to ${dir}/review-output.md
Start the file with "RED TEAM REVIEW" on the first line.`,
workingDirectory: dir,
maxTurns: 20,
timeout: 180_000,
testName: 'review-army-red-team',
runId,
});
logCost('/review army red-team', result);
recordE2E(evalCollector, '/review army red team', 'Review Army', result);
expect(result.exitReason).toBe('success');
const outputPath = path.join(dir, 'review-output.md');
if (fs.existsSync(outputPath)) {
const content = fs.readFileSync(outputPath, 'utf-8');
expect(content.toLowerCase()).toMatch(/red team|adversarial/);
}
}, 210_000);
});
// --- Review Army: Consensus (periodic) ---
describeIfSelected('Review Army: Consensus', ['review-army-consensus'], () => {
let dir: string;
beforeAll(() => {
const repo = setupRepo('army-consensus');
dir = repo.dir;
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'initial']);
repo.run('git', ['checkout', '-b', 'feature/vuln-auth']);
// SQL injection that both security AND testing specialists should flag
fs.writeFileSync(path.join(dir, 'auth_controller.rb'), `class AuthController
def login
user = User.find_by("email = '#{params[:email]}' AND password = '#{params[:password]}'")
if user
session[:user_id] = user.id
redirect_to root_path
else
flash[:error] = "Invalid credentials"
render :login
end
end
end
`);
repo.run('git', ['add', '.']);
repo.run('git', ['commit', '-m', 'add auth controller']);
copyReviewFiles(dir);
});
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
testConcurrentIfSelected('review-army-consensus', async () => {
const result = await runSkillTest({
prompt: `You are reviewing a git diff with a SQL injection in an auth controller.
Read review-SKILL.md, review-checklist.md, and the specialist checklists in review-specialists/.
This vulnerability should be caught by BOTH the security specialist (injection vector)
AND the testing specialist (no test for auth bypass).
Run the review. In your output, if a finding is flagged by multiple perspectives,
mark it as "MULTI-SPECIALIST CONFIRMED" with the confirming categories.
Write findings to ${dir}/review-output.md`,
workingDirectory: dir,
maxTurns: 20,
timeout: 180_000,
testName: 'review-army-consensus',
runId,
});
logCost('/review army consensus', result);
recordE2E(evalCollector, '/review army consensus', 'Review Army', result);
expect(result.exitReason).toBe('success');
const outputPath = path.join(dir, 'review-output.md');
if (fs.existsSync(outputPath)) {
const content = fs.readFileSync(outputPath, 'utf-8').toLowerCase();
// Should catch the SQL injection
const hasSqlFinding =
content.includes('sql') ||
content.includes('injection') ||
content.includes('interpolat');
expect(hasSqlFinding).toBe(true);
}
}, 210_000);
});
// Finalize eval collector
afterAll(async () => {
await finalizeEvalCollector(evalCollector);
});
+268
View File
@@ -0,0 +1,268 @@
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import { runSkillTest } from './helpers/session-runner';
import {
ROOT, runId, evalsEnabled,
describeIfSelected, testConcurrentIfSelected,
copyDirSync, logCost, recordE2E,
createEvalCollector, finalizeEvalCollector,
} from './helpers/e2e-helpers';
import { spawnSync } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
const evalCollector = createEvalCollector('e2e-session-intelligence');
// --- Session Intelligence E2E ---
// Tests the core contract: timeline events flow in, context recovery flows out,
// checkpoints round-trip.
describeIfSelected('Session Intelligence E2E', [
'timeline-event-flow', 'context-recovery-artifacts', 'checkpoint-save-resume',
], () => {
let workDir: string;
let gstackHome: string;
let slug: string;
beforeAll(() => {
workDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-session-intel-'));
gstackHome = path.join(workDir, '.gstack-home');
// Init git repo
const run = (cmd: string, args: string[]) =>
spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 });
run('git', ['init', '-b', 'main']);
run('git', ['config', 'user.email', 'test@test.com']);
run('git', ['config', 'user.name', 'Test']);
fs.writeFileSync(path.join(workDir, 'app.ts'), 'console.log("hello");\n');
run('git', ['add', '.']);
run('git', ['commit', '-m', 'initial']);
// Copy bin scripts needed by timeline and checkpoint
const binDir = path.join(workDir, 'bin');
fs.mkdirSync(binDir, { recursive: true });
for (const script of [
'gstack-timeline-log', 'gstack-timeline-read', 'gstack-slug',
'gstack-learnings-log', 'gstack-learnings-search',
]) {
const src = path.join(ROOT, 'bin', script);
if (fs.existsSync(src)) {
fs.copyFileSync(src, path.join(binDir, script));
fs.chmodSync(path.join(binDir, script), 0o755);
}
}
// Compute slug (same logic as gstack-slug without git remote)
slug = path.basename(workDir).replace(/[^a-zA-Z0-9._-]/g, '');
});
afterAll(() => {
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
finalizeEvalCollector(evalCollector);
});
// --- Test 1: Timeline event flow ---
// Write a timeline event via gstack-timeline-log, read it back via gstack-timeline-read.
// This is the foundational data flow test: events go in, they come back out.
testConcurrentIfSelected('timeline-event-flow', async () => {
const projectDir = path.join(gstackHome, 'projects', slug);
fs.mkdirSync(projectDir, { recursive: true });
// Write two events via the binary
const logBin = path.join(workDir, 'bin', 'gstack-timeline-log');
const readBin = path.join(workDir, 'bin', 'gstack-timeline-read');
const env = { ...process.env, GSTACK_HOME: gstackHome };
const opts = { cwd: workDir, env, stdio: 'pipe' as const, timeout: 10000 };
spawnSync(logBin, [JSON.stringify({
skill: 'review', event: 'started', branch: 'main', session: 'test-1',
})], opts);
spawnSync(logBin, [JSON.stringify({
skill: 'review', event: 'completed', branch: 'main',
outcome: 'success', duration_s: 120, session: 'test-1',
})], opts);
// Read via gstack-timeline-read
const readResult = spawnSync(readBin, ['--branch', 'main'], opts);
const readOutput = readResult.stdout?.toString() || '';
// Verify timeline.jsonl exists and has content
const timelinePath = path.join(projectDir, 'timeline.jsonl');
expect(fs.existsSync(timelinePath)).toBe(true);
const lines = fs.readFileSync(timelinePath, 'utf-8').trim().split('\n');
expect(lines.length).toBe(2);
// Verify the events are valid JSON with expected fields
const event1 = JSON.parse(lines[0]);
expect(event1.skill).toBe('review');
expect(event1.event).toBe('started');
expect(event1.ts).toBeDefined();
const event2 = JSON.parse(lines[1]);
expect(event2.event).toBe('completed');
expect(event2.outcome).toBe('success');
// Verify gstack-timeline-read output includes the events
expect(readOutput).toContain('review');
recordE2E(evalCollector, 'timeline event flow', 'Session Intelligence E2E', {
output: readOutput,
exitReason: 'success',
duration: 0,
toolCalls: [],
browseErrors: [],
costEstimate: { inputChars: 0, outputChars: 0, estimatedTokens: 0, estimatedCost: 0, turnsUsed: 0 },
transcript: [],
model: 'direct',
firstResponseMs: 0,
maxInterTurnMs: 0,
}, { passed: true });
console.log(`Timeline flow: ${lines.length} events written, read output ${readOutput.length} chars`);
}, 30_000);
// --- Test 2: Context recovery with seeded artifacts ---
// Seed CEO plans and timeline events, then run a skill and verify the preamble
// outputs "RECENT ARTIFACTS" and "LAST_SESSION".
testConcurrentIfSelected('context-recovery-artifacts', async () => {
const projectDir = path.join(gstackHome, 'projects', slug);
fs.mkdirSync(path.join(projectDir, 'ceo-plans'), { recursive: true });
// Seed a CEO plan
fs.writeFileSync(
path.join(projectDir, 'ceo-plans', '2026-03-31-test-feature.md'),
'---\nstatus: ACTIVE\n---\n# CEO Plan: Test Feature\nThis is a test plan.\n',
);
// Seed timeline with a completed event on main branch
const timelineEntry = JSON.stringify({
ts: new Date().toISOString(),
skill: 'ship',
event: 'completed',
branch: 'main',
outcome: 'success',
duration_s: 60,
session: 'prior-session',
});
fs.writeFileSync(path.join(projectDir, 'timeline.jsonl'), timelineEntry + '\n');
// Copy the /learn skill (lightweight, tier-2 skill that runs context recovery)
copyDirSync(path.join(ROOT, 'learn'), path.join(workDir, 'learn'));
const result = await runSkillTest({
prompt: `Read the file learn/SKILL.md for instructions.
Run the context recovery check — the preamble should show recent artifacts.
IMPORTANT:
- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
Replace any references to ~/.claude/skills/gstack/bin/ with ./bin/ when running commands.
- Do NOT use AskUserQuestion.
- Just run the preamble bash block and report what you see.
- Look for "RECENT ARTIFACTS" and "LAST_SESSION" in the output.`,
workingDirectory: workDir,
maxTurns: 10,
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
timeout: 120_000,
testName: 'context-recovery-artifacts',
runId,
});
logCost('context recovery', result);
const output = result.output.toLowerCase();
// The preamble should have found the seeded artifacts
const foundArtifacts = output.includes('recent artifacts') || output.includes('ceo-plans');
const foundLastSession = output.includes('last_session') || output.includes('ship');
const foundTimeline = output.includes('timeline') || output.includes('completed');
// At least the CEO plan or timeline should be visible
const foundCount = [foundArtifacts, foundLastSession, foundTimeline].filter(Boolean).length;
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
recordE2E(evalCollector, 'context recovery', 'Session Intelligence E2E', result, {
passed: exitOk && foundCount >= 1,
});
expect(exitOk).toBe(true);
expect(foundCount).toBeGreaterThanOrEqual(1);
console.log(`Context recovery: artifacts=${foundArtifacts}, lastSession=${foundLastSession}, timeline=${foundTimeline}`);
}, 180_000);
// --- Test 3: Checkpoint save and resume ---
// Run /checkpoint save via claude -p, verify file created. Then run /checkpoint resume
// and verify it reads the checkpoint back.
testConcurrentIfSelected('checkpoint-save-resume', async () => {
const projectDir = path.join(gstackHome, 'projects', slug);
fs.mkdirSync(path.join(projectDir, 'checkpoints'), { recursive: true });
// Copy the /checkpoint skill
copyDirSync(path.join(ROOT, 'checkpoint'), path.join(workDir, 'checkpoint'));
// Add a staged change so /checkpoint has something to capture
fs.writeFileSync(path.join(workDir, 'feature.ts'), 'export function newFeature() { return true; }\n');
spawnSync('git', ['add', 'feature.ts'], { cwd: workDir, stdio: 'pipe', timeout: 5000 });
// Extract the checkpoint save section from the skill template
const full = fs.readFileSync(path.join(ROOT, 'checkpoint', 'SKILL.md'), 'utf-8');
const saveStart = full.indexOf('## Save');
const resumeStart = full.indexOf('## Resume');
const saveSection = full.slice(saveStart, resumeStart > saveStart ? resumeStart : undefined);
const result = await runSkillTest({
prompt: `You are testing the /checkpoint skill. Follow these instructions to save a checkpoint.
${saveSection.slice(0, 2000)}
IMPORTANT:
- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
Replace any references to ~/.claude/skills/gstack/bin/ with ./bin/ when running commands.
- Save the checkpoint to ${projectDir}/checkpoints/ with a filename like "20260401-test-checkpoint.md".
- Include YAML frontmatter with status, branch, and timestamp.
- Include a summary of what's being worked on (you can see from git status).
- Do NOT use AskUserQuestion.`,
workingDirectory: workDir,
maxTurns: 10,
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
timeout: 120_000,
testName: 'checkpoint-save-resume',
runId,
});
logCost('checkpoint save', result);
// Check that a checkpoint file was created
const checkpointDir = path.join(projectDir, 'checkpoints');
const checkpointFiles = fs.existsSync(checkpointDir)
? fs.readdirSync(checkpointDir).filter(f => f.endsWith('.md'))
: [];
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
const checkpointCreated = checkpointFiles.length > 0;
let checkpointContent = '';
if (checkpointCreated) {
checkpointContent = fs.readFileSync(path.join(checkpointDir, checkpointFiles[0]), 'utf-8');
}
// Verify checkpoint has expected structure
const hasYamlFrontmatter = checkpointContent.includes('---') && checkpointContent.includes('status:');
const hasBranch = checkpointContent.includes('branch:') || checkpointContent.includes('main');
recordE2E(evalCollector, 'checkpoint save-resume', 'Session Intelligence E2E', result, {
passed: exitOk && checkpointCreated && hasYamlFrontmatter,
});
expect(exitOk).toBe(true);
expect(checkpointCreated).toBe(true);
expect(hasYamlFrontmatter).toBe(true);
console.log(`Checkpoint: ${checkpointFiles.length} files created, YAML frontmatter: ${hasYamlFrontmatter}, branch: ${hasBranch}`);
}, 180_000);
});
+194 -2
View File
@@ -116,9 +116,10 @@ describeIfSelected('Sidebar URL accuracy E2E', ['sidebar-url-accuracy'], () => {
}
expect(lastEntry).not.toBeNull();
// Extension URL should be used, not the Playwright fallback
// Extension URL should be used, not the Playwright fallback.
// The pageUrl field carries the extension URL; the prompt itself
// contains only the system prompt + user message (URL is metadata).
expect(lastEntry.pageUrl).toBe(extensionUrl);
expect(lastEntry.prompt).toContain(extensionUrl);
expect(lastEntry.pageUrl).not.toBe('about:blank');
// Also test: chrome:// URL should be rejected, falling back to about:blank
@@ -149,6 +150,197 @@ describeIfSelected('Sidebar URL accuracy E2E', ['sidebar-url-accuracy'], () => {
}, 30_000);
});
// --- Sidebar CSS Interaction E2E (real Claude + real browser) ---
// Goes to HN, reads comments, identifies the most insightful one, highlights it.
// Exercises: navigation, snapshot, text reading, LLM judgment, CSS style injection.
describeIfSelected('Sidebar CSS interaction E2E', ['sidebar-css-interaction'], () => {
let serverProc: Subprocess | null = null;
let agentProc: Subprocess | null = null;
let serverPort: number = 0;
let authToken: string = '';
let tmpDir: string = '';
let stateFile: string = '';
let queueFile: string = '';
let serverLogFile: string = '';
let serverErrFile: string = '';
let agentLogFile: string = '';
let agentErrFile: string = '';
async function api(pathname: string, opts: RequestInit = {}): Promise<Response> {
const headers: Record<string, string> = {
'Content-Type': 'application/json',
...(opts.headers as Record<string, string> || {}),
};
if (!headers['Authorization'] && authToken) {
headers['Authorization'] = `Bearer ${authToken}`;
}
return fetch(`http://127.0.0.1:${serverPort}${pathname}`, { ...opts, headers });
}
beforeAll(async () => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sidebar-e2e-css-'));
stateFile = path.join(tmpDir, 'browse.json');
queueFile = path.join(tmpDir, 'sidebar-queue.jsonl');
fs.mkdirSync(path.dirname(queueFile), { recursive: true });
// Start server WITH a real browser for CSS interaction
const serverScript = path.resolve(ROOT, 'browse', 'src', 'server.ts');
serverLogFile = path.join(tmpDir, 'server.log');
serverErrFile = path.join(tmpDir, 'server.err');
// Use 'pipe' stdio — closing file descriptors kills the child on macOS/bun
serverProc = spawn(['bun', 'run', serverScript], {
env: {
...process.env,
BROWSE_STATE_FILE: stateFile,
BROWSE_PORT: '0',
SIDEBAR_QUEUE_PATH: queueFile,
BROWSE_IDLE_TIMEOUT: '600000', // 10 min in ms — test takes ~3 min
},
stdio: ['ignore', 'pipe', 'pipe'],
});
// Wait for state file with port/token
const deadline = Date.now() + 30000;
while (Date.now() < deadline) {
if (fs.existsSync(stateFile)) {
try {
const state = JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
if (state.port && state.token) {
serverPort = state.port;
authToken = state.token;
break;
}
} catch {}
}
await new Promise(r => setTimeout(r, 200));
}
if (!serverPort) throw new Error('Server did not start in time');
// Verify server is healthy before proceeding
const healthDeadline = Date.now() + 10000;
let healthy = false;
while (Date.now() < healthDeadline) {
try {
const resp = await fetch(`http://127.0.0.1:${serverPort}/health`);
if (resp.ok) { healthy = true; break; }
} catch {}
await new Promise(r => setTimeout(r, 500));
}
if (!healthy) throw new Error('Server started but health check failed');
// Start sidebar-agent with the real browse binary
const agentScript = path.resolve(ROOT, 'browse', 'src', 'sidebar-agent.ts');
const browseBin = path.resolve(ROOT, 'browse', 'dist', 'browse');
agentLogFile = path.join(tmpDir, 'agent.log');
agentErrFile = path.join(tmpDir, 'agent.err');
// Use 'pipe' stdio — closing file descriptors kills the child on macOS/bun
agentProc = spawn(['bun', 'run', agentScript], {
env: {
...process.env,
BROWSE_SERVER_PORT: String(serverPort),
BROWSE_STATE_FILE: stateFile,
SIDEBAR_QUEUE_PATH: queueFile,
SIDEBAR_AGENT_TIMEOUT: '180000', // 3 min — multi-step HN comment task
BROWSE_BIN: fs.existsSync(browseBin) ? browseBin : 'echo',
},
stdio: ['ignore', 'pipe', 'pipe'],
});
await new Promise(r => setTimeout(r, 2000));
}, 35000);
afterAll(() => {
if (agentProc) { try { agentProc.kill(); } catch {} }
if (serverProc) { try { serverProc.kill(); } catch {} }
finalizeEvalCollector(evalCollector);
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
});
testIfSelected('sidebar-css-interaction', async () => {
// Fresh session + clean queue
try { await api('/sidebar-session/new', { method: 'POST' }); } catch {}
fs.writeFileSync(queueFile, '');
const startTime = Date.now();
// Simple task: go to example.com, read the title, apply a style
// (much faster than multi-step HN comment navigation)
const resp = await api('/sidebar-command', {
method: 'POST',
body: JSON.stringify({
message: 'Go to https://example.com. Read the page title. Add a 4px solid orange outline to the h1 element.',
activeTabUrl: 'about:blank',
}),
});
expect(resp.status).toBe(200);
// Poll for agent_done (4 min timeout — multi-step task with opus LLM)
const deadline = Date.now() + 240000;
let entries: any[] = [];
while (Date.now() < deadline) {
try {
const chatResp = await api('/sidebar-chat?after=0');
const data = await chatResp.json();
entries = data.entries || [];
if (entries.some((e: any) => e.type === 'agent_done')) break;
} catch (err: any) {
// Server may be temporarily busy or restarting — retry on connection errors
const isConnErr = err.code === 'ConnectionRefused' || err.message?.includes('ConnectionRefused') || err.message?.includes('Unable to connect');
if (!isConnErr) throw err;
}
await new Promise(r => setTimeout(r, 3000));
}
const duration = Date.now() - startTime;
const doneEntry = entries.find((e: any) => e.type === 'agent_done');
// Dump debug info on failure
if (!doneEntry || entries.length === 0) {
console.log('ENTRIES:', JSON.stringify(entries.slice(-5), null, 2));
console.log('SERVER exitCode:', serverProc?.exitCode, 'signalCode:', serverProc?.signalCode, 'killed:', serverProc?.killed);
console.log('AGENT exitCode:', agentProc?.exitCode, 'signalCode:', agentProc?.signalCode, 'killed:', agentProc?.killed);
const queueContent = fs.existsSync(queueFile) ? fs.readFileSync(queueFile, 'utf-8').slice(-500) : 'NO QUEUE';
console.log('QUEUE:', queueContent.length > 0 ? 'has entries' : 'empty');
}
// Agent should have completed
expect(doneEntry).toBeDefined();
// Agent should have run browse commands (look for tool_use entries)
const toolUses = entries.filter((e: any) => e.type === 'tool_use');
expect(toolUses.length).toBeGreaterThanOrEqual(2); // At minimum: goto + one more
// Agent text should mention something about the comment it found
const agentText = entries
.filter((e: any) => e.role === 'agent' && (e.type === 'text' || e.type === 'result'))
.map((e: any) => e.text || '')
.join(' ')
.toLowerCase();
// Should have navigated to example.com (look for example.com in any entry text)
const allEntryText = entries
.map((e: any) => `${e.text || ''} ${e.input || ''} ${e.message || ''}`)
.join(' ');
const navigatedToTarget = allEntryText.includes('example.com') || allEntryText.includes('Example Domain');
if (!navigatedToTarget) {
console.log('ALL ENTRY TEXT (first 2000):', allEntryText.slice(0, 2000));
}
expect(navigatedToTarget).toBe(true);
// Should have applied a style (look for orange/outline in tool commands)
const allText = entries.map((e: any) => e.text || '').join(' ');
const appliedStyle = allText.includes('outline') || allText.includes('orange') || allText.includes('style');
evalCollector?.addTest({
name: 'sidebar-css-interaction', suite: 'Sidebar CSS interaction E2E', tier: 'e2e',
passed: !!doneEntry && navigatedToTarget && appliedStyle,
duration_ms: duration,
cost_usd: 0,
exit_reason: doneEntry ? 'success' : 'timeout',
});
}, 300_000);
});
// --- Sidebar Navigate (real Claude, requires ANTHROPIC_API_KEY) ---
describeIfSelected('Sidebar navigate E2E', ['sidebar-navigate'], () => {
+15 -5
View File
@@ -467,8 +467,18 @@ describeIfSelected('Codex skill E2E', ['codex-review'], () => {
run('git', ['add', 'user_controller.rb']);
run('git', ['commit', '-m', 'add vulnerable controller']);
// Copy the codex skill file
fs.copyFileSync(path.join(ROOT, 'codex', 'SKILL.md'), path.join(codexDir, 'codex-SKILL.md'));
// Extract only the review-relevant section from codex SKILL.md (~120 lines vs 1075).
// Full SKILL.md is 55KB / ~14K tokens — takes 8 Read calls to consume, exhausting turns.
const full = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md'), 'utf-8');
const startMarker = '# /codex — Multi-AI Second Opinion';
const endMarker = '## Plan File Review Report';
const start = full.indexOf(startMarker);
const end = full.indexOf(endMarker, start);
const reviewSection = full.slice(
start >= 0 ? start : 0,
end > start ? end : undefined,
);
fs.writeFileSync(path.join(codexDir, 'codex-SKILL.md'), reviewSection);
});
afterAll(() => {
@@ -485,11 +495,11 @@ describeIfSelected('Codex skill E2E', ['codex-review'], () => {
const result = await runSkillTest({
prompt: `You are in a git repo on branch feature/add-vuln with changes against main.
Read codex-SKILL.md for the /codex skill instructions.
Run /codex review to review the current diff against main.
Read codex-SKILL.md for the /codex review instructions (it's short — ~120 lines).
Follow those instructions to run codex review against the diff on this branch.
Write the full output (including the GATE verdict) to ${codexDir}/codex-output.md`,
workingDirectory: codexDir,
maxTurns: 15,
maxTurns: 25,
timeout: 300_000,
testName: 'codex-review',
runId,
+96 -56
View File
@@ -325,62 +325,6 @@ Report the exact output — either "READY: <path>" or "NEEDS_SETUP".`,
try { fs.rmSync(nonGitDir, { recursive: true, force: true }); } catch {}
}, 60_000);
testIfSelected('contributor-mode', async () => {
const contribDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-contrib-'));
const logsDir = path.join(contribDir, 'contributor-logs');
fs.mkdirSync(logsDir, { recursive: true });
// Extract contributor mode instructions from generated SKILL.md
const skillMd = fs.readFileSync(path.join(ROOT, 'SKILL.md'), 'utf-8');
const contribStart = skillMd.indexOf('## Contributor Mode');
const contribEnd = skillMd.indexOf('\n## ', contribStart + 1);
const contribBlock = skillMd.slice(contribStart, contribEnd > 0 ? contribEnd : undefined);
const result = await runSkillTest({
prompt: `You are in contributor mode (_CONTRIB=true).
${contribBlock}
OVERRIDE: Write contributor logs to ${logsDir}/ instead of ~/.gstack/contributor-logs/
Now try this browse command (it will fail — there is no binary at this path):
/nonexistent/path/browse goto https://example.com
This is a gstack issue (the browse binary is missing/misconfigured).
File a contributor report about this issue. Then tell me what you filed.`,
workingDirectory: contribDir,
maxTurns: 8,
timeout: 60_000,
testName: 'contributor-mode',
runId,
});
logCost('contributor mode', result);
// Override passed: this test intentionally triggers a browse error (nonexistent binary)
// so browseErrors will be non-empty — that's expected, not a failure
recordE2E('contributor mode report', 'Skill E2E tests', result, {
passed: result.exitReason === 'success',
});
// Verify a contributor log was created with expected format
const logFiles = fs.readdirSync(logsDir).filter(f => f.endsWith('.md'));
expect(logFiles.length).toBeGreaterThan(0);
// Verify new reflection-based format
const logContent = fs.readFileSync(path.join(logsDir, logFiles[0]), 'utf-8');
expect(logContent).toContain('Hey gstack team');
expect(logContent).toContain('What I was trying to do');
expect(logContent).toContain('What happened instead');
expect(logContent).toMatch(/rating/i);
// Verify report has repro steps (agent may use "Steps to reproduce", "Repro Steps", etc.)
expect(logContent).toMatch(/repro|steps to reproduce|how to reproduce/i);
// Verify report has date/version footer (agent may format differently)
expect(logContent).toMatch(/date.*2026|2026.*date/i);
// Clean up
try { fs.rmSync(contribDir, { recursive: true, force: true }); } catch {}
}, 90_000);
testIfSelected('session-awareness', async () => {
const sessionDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-session-'));
@@ -3313,6 +3257,102 @@ Write your summary to ${benefitsDir}/benefits-summary.md`,
}, 180_000);
});
// --- Ship idempotency (#649) ---
describeIfSelected('Ship idempotency', ['ship-idempotency'], () => {
let idempDir: string;
const gitRun = (args: string[], cwd: string) =>
spawnSync('git', args, { cwd, stdio: 'pipe', timeout: 5000 });
beforeAll(() => {
idempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-ship-idemp-'));
// Create git repo with initial commit on main
gitRun(['init', '-b', 'main'], idempDir);
gitRun(['config', 'user.email', 'test@test.com'], idempDir);
gitRun(['config', 'user.name', 'Test'], idempDir);
fs.writeFileSync(path.join(idempDir, 'app.ts'), 'console.log("v1");\n');
fs.writeFileSync(path.join(idempDir, 'VERSION'), '0.1.0.0\n');
fs.writeFileSync(path.join(idempDir, 'CHANGELOG.md'), '# Changelog\n');
gitRun(['add', '.'], idempDir);
gitRun(['commit', '-m', 'initial'], idempDir);
// Create feature branch with changes
gitRun(['checkout', '-b', 'feat/my-feature'], idempDir);
fs.writeFileSync(path.join(idempDir, 'app.ts'), 'console.log("v2");\n');
gitRun(['add', 'app.ts'], idempDir);
gitRun(['commit', '-m', 'feat: update to v2'], idempDir);
// Simulate prior /ship run: bump VERSION and write CHANGELOG entry
fs.writeFileSync(path.join(idempDir, 'VERSION'), '0.2.0.0\n');
fs.writeFileSync(path.join(idempDir, 'CHANGELOG.md'),
'# Changelog\n\n## [0.2.0.0] — 2026-03-30\n\n- Updated app to v2\n');
gitRun(['add', 'VERSION', 'CHANGELOG.md'], idempDir);
gitRun(['commit', '-m', 'chore: bump version to 0.2.0.0'], idempDir);
// Extract just the idempotency-relevant sections from ship/SKILL.md
const full = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
const step4Start = full.indexOf('## Step 4: Version bump');
const step4End = full.indexOf('\n---\n', step4Start);
const step7Start = full.indexOf('## Step 7: Push');
const step8End = full.indexOf('## Step 8.5');
const extracted = [
full.slice(step4Start, step4End > step4Start ? step4End : step4Start + 500),
full.slice(step7Start, step8End > step7Start ? step8End : step7Start + 500),
].join('\n\n---\n\n');
fs.writeFileSync(path.join(idempDir, 'ship-steps.md'), extracted);
});
afterAll(() => {
try { fs.rmSync(idempDir, { recursive: true, force: true }); } catch {}
});
testIfSelected('ship-idempotency', async () => {
const result = await runSkillTest({
prompt: `You are in a git repo on branch feat/my-feature. A prior /ship run already:
- Bumped VERSION from 0.1.0.0 to 0.2.0.0
- Wrote a CHANGELOG entry for 0.2.0.0
- But the push/PR step failed
Read ship-steps.md for the idempotency check instructions from the ship workflow.
Run ONLY the idempotency checks described in Steps 4 and 7. Do NOT actually push or create PRs (there is no remote).
After running the checks, write a report to ${idempDir}/idemp-result.md containing:
- Whether VERSION was detected as ALREADY_BUMPED or not
- Whether the push was detected as ALREADY_PUSHED or PUSH_NEEDED
- The current VERSION value (should still be 0.2.0.0)
Do NOT modify VERSION or CHANGELOG. Only run the detection checks and report.`,
workingDirectory: idempDir,
maxTurns: 10,
timeout: 60_000,
testName: 'ship-idempotency',
runId,
});
logCost('/ship idempotency', result);
recordE2E('/ship idempotency guard', 'Ship idempotency', result);
expect(result.exitReason).toBe('success');
// Verify VERSION was NOT modified
const version = fs.readFileSync(path.join(idempDir, 'VERSION'), 'utf-8').trim();
expect(version).toBe('0.2.0.0');
// Verify CHANGELOG was NOT duplicated
const changelog = fs.readFileSync(path.join(idempDir, 'CHANGELOG.md'), 'utf-8');
const versionEntries = (changelog.match(/## \[0\.2\.0\.0\]/g) || []).length;
expect(versionEntries).toBe(1);
// Check the result report if it was written
const reportPath = path.join(idempDir, 'idemp-result.md');
if (fs.existsSync(reportPath)) {
const report = fs.readFileSync(reportPath, 'utf-8');
expect(report.toLowerCase()).toContain('already_bumped');
}
}, 120_000);
});
// Module-level afterAll — finalize eval collector after all tests complete
afterAll(async () => {
if (evalCollector) {
+24 -5
View File
@@ -93,11 +93,30 @@ function installSkills(tmpDir: string) {
}
}
// Copy CLAUDE.md so Claude has project context for skill routing.
const claudeMdSrc = path.join(ROOT, 'CLAUDE.md');
if (fs.existsSync(claudeMdSrc)) {
fs.copyFileSync(claudeMdSrc, path.join(tmpDir, 'CLAUDE.md'));
}
// Write a CLAUDE.md with explicit routing instructions.
// The skill descriptions in system-reminder aren't strong enough to override
// Claude's default behavior of answering directly. A CLAUDE.md instruction
// puts routing rules in project context which Claude weighs more heavily.
fs.writeFileSync(path.join(tmpDir, 'CLAUDE.md'), `# Project Instructions
## Skill routing
When the user's request matches an available skill, ALWAYS invoke it using the Skill
tool as your FIRST action. Do NOT answer directly, do NOT use other tools first.
The skill has specialized workflows that produce better results than ad-hoc answers.
Key routing rules:
- Product ideas, "is this worth building", brainstorming → invoke office-hours
- Bugs, errors, "why is this broken", 500 errors → invoke investigate
- Ship, deploy, push, create PR → invoke ship
- QA, test the site, find bugs → invoke qa
- Code review, check my diff → invoke review
- Update docs after shipping → invoke document-release
- Weekly retro → invoke retro
- Design system, brand → invoke design-consultation
- Visual audit, design polish → invoke design-review
- Architecture review → invoke plan-eng-review
`);
}
/** Init a git repo with config */
+80 -58
View File
@@ -721,45 +721,8 @@ describe('investigate skill structure', () => {
}
});
// --- Contributor mode preamble structure validation ---
describe('Contributor mode preamble structure', () => {
const skillsWithPreamble = [
'SKILL.md', 'browse/SKILL.md', 'qa/SKILL.md',
'qa-only/SKILL.md',
'setup-browser-cookies/SKILL.md',
'ship/SKILL.md', 'review/SKILL.md',
'plan-ceo-review/SKILL.md', 'plan-eng-review/SKILL.md',
'retro/SKILL.md',
'plan-design-review/SKILL.md',
'design-review/SKILL.md',
'design-consultation/SKILL.md',
'document-release/SKILL.md',
'canary/SKILL.md',
'benchmark/SKILL.md',
'land-and-deploy/SKILL.md',
'setup-deploy/SKILL.md',
];
for (const skill of skillsWithPreamble) {
test(`${skill} has 0-10 rating in contributor mode`, () => {
const content = fs.readFileSync(path.join(ROOT, skill), 'utf-8');
expect(content).toContain('0-10');
expect(content).toContain('Rating');
});
test(`${skill} has "what would make this a 10" field`, () => {
const content = fs.readFileSync(path.join(ROOT, skill), 'utf-8');
expect(content).toContain('What would make this a 10');
});
test(`${skill} uses periodic reflection (not per-command)`, () => {
const content = fs.readFileSync(path.join(ROOT, skill), 'utf-8');
expect(content).toContain('workflow step');
expect(content).not.toContain('After you use gstack-provided CLIs');
});
}
});
// Contributor mode was removed in v0.13.10.0 — replaced by operational self-improvement.
// Tests for contributor mode preamble structure are no longer applicable.
describe('Enum & Value Completeness in review checklist', () => {
const checklist = fs.readFileSync(path.join(ROOT, 'review', 'checklist.md'), 'utf-8');
@@ -1291,38 +1254,49 @@ describe('Codex skill', () => {
expect(content).toContain('mktemp');
});
test('adversarial review in /review auto-scales by diff size', () => {
test('adversarial review in /review always runs both passes', () => {
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
expect(content).toContain('Adversarial review (auto-scaled)');
// Diff size thresholds
expect(content).toContain('< 50');
expect(content).toContain('50199');
expect(content).toContain('200+');
// All three tiers present
expect(content).toContain('Small');
expect(content).toContain('Medium tier');
expect(content).toContain('Large tier');
expect(content).toContain('Adversarial review (always-on)');
// Always-on: both Claude and Codex adversarial
expect(content).toContain('Claude adversarial subagent (always runs)');
expect(content).toContain('Codex adversarial challenge (always runs when available)');
// Claude adversarial subagent dispatch
expect(content).toContain('Agent tool');
expect(content).toContain('FIXABLE');
expect(content).toContain('INVESTIGATE');
// Codex fallback logic
// Codex availability check
expect(content).toContain('CODEX_NOT_AVAILABLE');
expect(content).toContain('fall back to the Claude adversarial subagent');
// Review log uses new skill name
// OLD_CFG only gates Codex, not Claude
expect(content).toContain('skip Codex passes only');
// Review log
expect(content).toContain('adversarial-review');
expect(content).toContain('reasoning_effort="high"');
expect(content).toContain('ADVERSARIAL REVIEW SYNTHESIS');
// Large diff structured review still gated
expect(content).toContain('Codex structured review (large diffs only');
expect(content).toContain('200');
});
test('adversarial review in /ship auto-scales by diff size', () => {
test('adversarial review in /ship always runs both passes', () => {
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
expect(content).toContain('Adversarial review (auto-scaled)');
expect(content).toContain('< 50');
expect(content).toContain('200+');
expect(content).toContain('Adversarial review (always-on)');
expect(content).toContain('adversarial-review');
expect(content).toContain('reasoning_effort="high"');
expect(content).toContain('Investigate and fix');
expect(content).toContain('Claude adversarial subagent (always runs)');
});
test('scope drift detection in /review and /ship', () => {
const reviewContent = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
const shipContent = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
// Both should contain scope drift from the shared resolver
for (const content of [reviewContent, shipContent]) {
expect(content).toContain('Scope Check:');
expect(content).toContain('DRIFT DETECTED');
expect(content).toContain('SCOPE CREEP');
expect(content).toContain('MISSING REQUIREMENTS');
expect(content).toContain('stated intent');
}
});
test('codex-host ship/review do NOT contain adversarial review step', () => {
@@ -1395,13 +1369,13 @@ describe('Skill trigger phrases', () => {
];
for (const skill of SKILLS_REQUIRING_PROACTIVE) {
test(`${skill}/SKILL.md has "Proactively suggest" phrase`, () => {
test(`${skill}/SKILL.md has proactive routing phrase`, () => {
const skillPath = path.join(ROOT, skill, 'SKILL.md');
if (!fs.existsSync(skillPath)) return;
const content = fs.readFileSync(skillPath, 'utf-8');
const frontmatterEnd = content.indexOf('---', 4);
const frontmatter = content.slice(0, frontmatterEnd);
expect(frontmatter).toMatch(/Proactively suggest/i);
expect(frontmatter).toMatch(/Proactively (suggest|invoke)/i);
});
}
});
@@ -1533,3 +1507,51 @@ describe('Test failure triage in ship skill', () => {
expect(content).toContain('In-branch test failures');
});
});
describe('no compiled binaries in git', () => {
test('git tracks no Mach-O or ELF binaries', () => {
const result = require('child_process').execSync(
'git ls-files -z | xargs -0 file --mime-type 2>/dev/null | grep -E "application/(x-mach-binary|x-executable|x-pie-executable|x-sharedlib)" || true',
{ cwd: ROOT, encoding: 'utf-8' }
).trim();
const files = result ? result.split('\n').map((l: string) => l.split(':')[0].trim()) : [];
expect(files).toEqual([]);
});
test('git tracks no files larger than 2MB', () => {
const result = require('child_process').execSync(
'git ls-files -z | xargs -0 -I{} sh -c \'size=$(wc -c < "{}" 2>/dev/null | tr -d " "); [ "$size" -gt 2097152 ] 2>/dev/null && echo "{}:${size}"\' || true',
{ cwd: ROOT, encoding: 'utf-8' }
).trim();
const files = result ? result.split('\n').filter(Boolean) : [];
expect(files).toEqual([]);
});
});
describe('sidebar agent (#584)', () => {
// #584 — Sidebar Write: sidebar-agent.ts allowedTools includes Write
test('sidebar-agent.ts allowedTools includes Write', () => {
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'sidebar-agent.ts'), 'utf-8');
// Find the allowedTools line in the askClaude function
const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
expect(match).not.toBeNull();
expect(match![1]).toContain('Write');
});
// #584 — Server Write: server.ts allowedTools includes Write (DRY parity)
test('server.ts allowedTools excludes Write (agent is read-only + Bash)', () => {
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'server.ts'), 'utf-8');
// Find the sidebar allowedTools in the headed-mode path
const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
expect(match).not.toBeNull();
expect(match![1]).toContain('Bash');
expect(match![1]).not.toContain('Write');
});
// #584 — Sidebar stderr: stderr handler is not empty
test('sidebar-agent.ts stderr handler is not empty', () => {
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'sidebar-agent.ts'), 'utf-8');
// The stderr handler should NOT be an empty arrow function
expect(content).not.toContain("proc.stderr.on('data', () => {})");
});
});
+339
View File
@@ -0,0 +1,339 @@
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { execSync } from 'child_process';
const ROOT = path.resolve(import.meta.dir, '..');
const SETTINGS_HOOK = path.join(ROOT, 'bin', 'gstack-settings-hook');
const SESSION_UPDATE = path.join(ROOT, 'bin', 'gstack-session-update');
const TEAM_INIT = path.join(ROOT, 'bin', 'gstack-team-init');
function mkTmpDir(): string {
return fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-team-test-'));
}
function run(cmd: string, opts: { cwd?: string; env?: Record<string, string> } = {}): { stdout: string; stderr: string; exitCode: number } {
try {
const stdout = execSync(cmd, {
cwd: opts.cwd,
env: { ...process.env, ...opts.env },
encoding: 'utf-8',
timeout: 10000,
});
return { stdout, stderr: '', exitCode: 0 };
} catch (e: any) {
return { stdout: e.stdout || '', stderr: e.stderr || '', exitCode: e.status ?? 1 };
}
}
describe('gstack-settings-hook', () => {
let tmpDir: string;
let settingsFile: string;
beforeEach(() => {
tmpDir = mkTmpDir();
settingsFile = path.join(tmpDir, 'settings.json');
});
afterEach(() => {
fs.rmSync(tmpDir, { recursive: true, force: true });
});
test('add creates settings.json if missing', () => {
const result = run(`${SETTINGS_HOOK} add /path/to/gstack-session-update`, {
env: { GSTACK_SETTINGS_FILE: settingsFile },
});
expect(result.exitCode).toBe(0);
const settings = JSON.parse(fs.readFileSync(settingsFile, 'utf-8'));
expect(settings.hooks.SessionStart).toHaveLength(1);
expect(settings.hooks.SessionStart[0].hooks[0].command).toBe('/path/to/gstack-session-update');
});
test('add preserves existing settings', () => {
fs.writeFileSync(settingsFile, JSON.stringify({ effortLevel: 'high', permissions: { defaultMode: 'auto' } }, null, 2));
const result = run(`${SETTINGS_HOOK} add /path/to/gstack-session-update`, {
env: { GSTACK_SETTINGS_FILE: settingsFile },
});
expect(result.exitCode).toBe(0);
const settings = JSON.parse(fs.readFileSync(settingsFile, 'utf-8'));
expect(settings.effortLevel).toBe('high');
expect(settings.permissions.defaultMode).toBe('auto');
expect(settings.hooks.SessionStart).toHaveLength(1);
});
test('add deduplicates (running twice does not double-add)', () => {
run(`${SETTINGS_HOOK} add /path/to/gstack-session-update`, {
env: { GSTACK_SETTINGS_FILE: settingsFile },
});
run(`${SETTINGS_HOOK} add /path/to/gstack-session-update`, {
env: { GSTACK_SETTINGS_FILE: settingsFile },
});
const settings = JSON.parse(fs.readFileSync(settingsFile, 'utf-8'));
expect(settings.hooks.SessionStart).toHaveLength(1);
});
test('remove removes the hook', () => {
run(`${SETTINGS_HOOK} add /path/to/gstack-session-update`, {
env: { GSTACK_SETTINGS_FILE: settingsFile },
});
const result = run(`${SETTINGS_HOOK} remove /path/to/gstack-session-update`, {
env: { GSTACK_SETTINGS_FILE: settingsFile },
});
expect(result.exitCode).toBe(0);
const settings = JSON.parse(fs.readFileSync(settingsFile, 'utf-8'));
expect(settings.hooks).toBeUndefined();
});
test('remove is safe when settings.json does not exist', () => {
const result = run(`${SETTINGS_HOOK} remove /path/to/gstack-session-update`, {
env: { GSTACK_SETTINGS_FILE: settingsFile },
});
expect(result.exitCode).toBe(0);
});
test('remove preserves other hooks', () => {
fs.writeFileSync(settingsFile, JSON.stringify({
hooks: {
SessionStart: [
{ hooks: [{ type: 'command', command: '/path/to/gstack-session-update' }] },
{ hooks: [{ type: 'command', command: '/other/hook' }] },
],
},
}, null, 2));
run(`${SETTINGS_HOOK} remove /path/to/gstack-session-update`, {
env: { GSTACK_SETTINGS_FILE: settingsFile },
});
const settings = JSON.parse(fs.readFileSync(settingsFile, 'utf-8'));
expect(settings.hooks.SessionStart).toHaveLength(1);
expect(settings.hooks.SessionStart[0].hooks[0].command).toBe('/other/hook');
});
test('atomic write (no partial file on success)', () => {
run(`${SETTINGS_HOOK} add /path/to/gstack-session-update`, {
env: { GSTACK_SETTINGS_FILE: settingsFile },
});
// .tmp file should not exist after successful write
expect(fs.existsSync(settingsFile + '.tmp')).toBe(false);
// File should be valid JSON
expect(() => JSON.parse(fs.readFileSync(settingsFile, 'utf-8'))).not.toThrow();
});
});
describe('gstack-session-update', () => {
let tmpDir: string;
let gstackDir: string;
let stateDir: string;
beforeEach(() => {
tmpDir = mkTmpDir();
gstackDir = path.join(tmpDir, 'gstack');
stateDir = path.join(tmpDir, 'state');
fs.mkdirSync(gstackDir, { recursive: true });
fs.mkdirSync(stateDir, { recursive: true });
// Init a git repo to pass the .git guard
execSync('git init', { cwd: gstackDir });
execSync('git commit --allow-empty -m "init"', { cwd: gstackDir });
fs.writeFileSync(path.join(gstackDir, 'VERSION'), '0.1.0');
// Create a minimal gstack-config that returns auto_upgrade=true
const binDir = path.join(gstackDir, 'bin');
fs.mkdirSync(binDir, { recursive: true });
fs.writeFileSync(path.join(binDir, 'gstack-config'), '#!/bin/bash\necho "true"');
fs.chmodSync(path.join(binDir, 'gstack-config'), 0o755);
});
afterEach(() => {
fs.rmSync(tmpDir, { recursive: true, force: true });
});
test('exits 0 when .git is missing', () => {
fs.rmSync(path.join(gstackDir, '.git'), { recursive: true });
const result = run(SESSION_UPDATE, {
env: { GSTACK_DIR: gstackDir, GSTACK_STATE_DIR: stateDir },
});
expect(result.exitCode).toBe(0);
});
test('exits 0 when auto_upgrade is not true', () => {
// Override gstack-config to return false
fs.writeFileSync(path.join(gstackDir, 'bin', 'gstack-config'), '#!/bin/bash\necho "false"');
const result = run(SESSION_UPDATE, {
env: { GSTACK_DIR: gstackDir, GSTACK_STATE_DIR: stateDir },
});
expect(result.exitCode).toBe(0);
});
test('throttle: skips when checked recently', () => {
// Write a recent throttle timestamp
const throttleFile = path.join(stateDir, '.last-session-update');
fs.writeFileSync(throttleFile, String(Math.floor(Date.now() / 1000)));
const result = run(SESSION_UPDATE, {
env: { GSTACK_DIR: gstackDir, GSTACK_STATE_DIR: stateDir },
});
expect(result.exitCode).toBe(0);
// No log file should be created (throttled before forking)
});
test('always exits 0 (non-fatal)', () => {
// Even with a broken setup, should exit 0
const result = run(SESSION_UPDATE, {
env: { GSTACK_DIR: '/nonexistent/path', GSTACK_STATE_DIR: stateDir },
});
expect(result.exitCode).toBe(0);
});
});
describe('gstack-team-init', () => {
let tmpDir: string;
beforeEach(() => {
tmpDir = mkTmpDir();
execSync('git init', { cwd: tmpDir });
execSync('git commit --allow-empty -m "init"', { cwd: tmpDir });
});
afterEach(() => {
fs.rmSync(tmpDir, { recursive: true, force: true });
});
test('errors without a mode argument', () => {
const result = run(TEAM_INIT, { cwd: tmpDir });
expect(result.exitCode).not.toBe(0);
expect(result.stderr).toContain('Usage');
});
test('errors outside a git repo', () => {
const nonGitDir = mkTmpDir();
const result = run(`${TEAM_INIT} optional`, { cwd: nonGitDir });
expect(result.exitCode).not.toBe(0);
expect(result.stderr).toContain('not in a git repository');
fs.rmSync(nonGitDir, { recursive: true, force: true });
});
test('optional: creates CLAUDE.md with recommended section', () => {
const result = run(`${TEAM_INIT} optional`, { cwd: tmpDir });
expect(result.exitCode).toBe(0);
const claude = fs.readFileSync(path.join(tmpDir, 'CLAUDE.md'), 'utf-8');
expect(claude).toContain('## gstack (recommended)');
expect(claude).toContain('./setup --team');
});
test('required: creates CLAUDE.md with required section', () => {
const result = run(`${TEAM_INIT} required`, { cwd: tmpDir });
expect(result.exitCode).toBe(0);
const claude = fs.readFileSync(path.join(tmpDir, 'CLAUDE.md'), 'utf-8');
expect(claude).toContain('## gstack (REQUIRED');
expect(claude).toContain('GSTACK_MISSING');
});
test('required: creates enforcement hook', () => {
run(`${TEAM_INIT} required`, { cwd: tmpDir });
const hookPath = path.join(tmpDir, '.claude', 'hooks', 'check-gstack.sh');
expect(fs.existsSync(hookPath)).toBe(true);
const hook = fs.readFileSync(hookPath, 'utf-8');
expect(hook).toContain('BLOCKED: gstack is not installed');
// Should be executable
const stat = fs.statSync(hookPath);
expect(stat.mode & 0o111).toBeGreaterThan(0);
});
test('required: creates project settings.json with PreToolUse hook', () => {
run(`${TEAM_INIT} required`, { cwd: tmpDir });
const settingsPath = path.join(tmpDir, '.claude', 'settings.json');
expect(fs.existsSync(settingsPath)).toBe(true);
const settings = JSON.parse(fs.readFileSync(settingsPath, 'utf-8'));
expect(settings.hooks.PreToolUse).toHaveLength(1);
expect(settings.hooks.PreToolUse[0].matcher).toBe('Skill');
expect(settings.hooks.PreToolUse[0].hooks[0].command).toContain('check-gstack');
});
test('idempotent: running twice does not duplicate CLAUDE.md section', () => {
run(`${TEAM_INIT} optional`, { cwd: tmpDir });
run(`${TEAM_INIT} optional`, { cwd: tmpDir });
const claude = fs.readFileSync(path.join(tmpDir, 'CLAUDE.md'), 'utf-8');
const matches = claude.match(/## gstack/g);
expect(matches).toHaveLength(1);
});
test('removes vendored copy when present', () => {
// Create a fake vendored gstack with VERSION file
const vendoredDir = path.join(tmpDir, '.claude', 'skills', 'gstack');
fs.mkdirSync(vendoredDir, { recursive: true });
fs.writeFileSync(path.join(vendoredDir, 'VERSION'), '0.14.0.0');
fs.writeFileSync(path.join(vendoredDir, 'README.md'), 'vendored');
// Track it in git
execSync('git add .claude/skills/gstack/', { cwd: tmpDir });
execSync('git commit -m "add vendored gstack"', { cwd: tmpDir });
const result = run(`${TEAM_INIT} optional`, { cwd: tmpDir });
expect(result.exitCode).toBe(0);
expect(result.stdout).toContain('Found vendored gstack copy');
expect(result.stdout).toContain('Removed vendored copy');
// Vendored dir should be gone
expect(fs.existsSync(vendoredDir)).toBe(false);
// .gitignore should have the entry
const gitignore = fs.readFileSync(path.join(tmpDir, '.gitignore'), 'utf-8');
expect(gitignore).toContain('.claude/skills/gstack/');
});
test('skips when no vendored copy present', () => {
const result = run(`${TEAM_INIT} optional`, { cwd: tmpDir });
expect(result.exitCode).toBe(0);
expect(result.stdout).not.toContain('Found vendored gstack copy');
});
test('skips when .claude/skills/gstack is a symlink', () => {
// Create a symlink (not a real vendored copy)
const skillsDir = path.join(tmpDir, '.claude', 'skills');
fs.mkdirSync(skillsDir, { recursive: true });
const targetDir = mkTmpDir();
fs.writeFileSync(path.join(targetDir, 'VERSION'), '0.14.0.0');
fs.symlinkSync(targetDir, path.join(skillsDir, 'gstack'));
const result = run(`${TEAM_INIT} optional`, { cwd: tmpDir });
expect(result.exitCode).toBe(0);
expect(result.stdout).not.toContain('Found vendored gstack copy');
// Symlink should still exist
expect(fs.lstatSync(path.join(skillsDir, 'gstack')).isSymbolicLink()).toBe(true);
fs.rmSync(targetDir, { recursive: true, force: true });
});
test('does not duplicate .gitignore entry on re-run', () => {
// Create vendored copy
const vendoredDir = path.join(tmpDir, '.claude', 'skills', 'gstack');
fs.mkdirSync(vendoredDir, { recursive: true });
fs.writeFileSync(path.join(vendoredDir, 'VERSION'), '0.14.0.0');
execSync('git add .claude/skills/gstack/', { cwd: tmpDir });
execSync('git commit -m "add vendored"', { cwd: tmpDir });
run(`${TEAM_INIT} optional`, { cwd: tmpDir });
// Re-create vendored dir to simulate re-run scenario
fs.mkdirSync(vendoredDir, { recursive: true });
fs.writeFileSync(path.join(vendoredDir, 'VERSION'), '0.14.0.0');
run(`${TEAM_INIT} optional`, { cwd: tmpDir });
const gitignore = fs.readFileSync(path.join(tmpDir, '.gitignore'), 'utf-8');
const matches = gitignore.match(/\.claude\/skills\/gstack\//g);
expect(matches).toHaveLength(1);
});
});
describe('setup --team / --no-team / -q', () => {
test('setup -q produces no stdout', () => {
const result = run(`${path.join(ROOT, 'setup')} -q`, { cwd: ROOT });
// -q should suppress informational output (may still have some output from build)
// The key test is that the "Skill naming:" prompt and "gstack ready" messages are suppressed
expect(result.stdout).not.toContain('Skill naming:');
expect(result.stdout).not.toContain('gstack ready');
});
test('setup --local prints deprecation warning', () => {
// stderr capture: run via bash redirect so we can capture stderr
const result = run(`bash -c '${path.join(ROOT, 'setup')} --local -q 2>&1'`, { cwd: ROOT });
expect(result.stdout).toContain('deprecated');
});
});
+22
View File
@@ -396,3 +396,25 @@ describe('gstack-community-dashboard', () => {
expect(output).not.toContain('Supabase not configured');
});
});
describe('preamble telemetry gating (#467)', () => {
test('preamble source does not write JSONL unconditionally', () => {
const preamble = fs.readFileSync(path.join(ROOT, 'scripts', 'resolvers', 'preamble.ts'), 'utf-8');
const lines = preamble.split('\n');
for (let i = 0; i < lines.length; i++) {
if (lines[i].includes('skill-usage.jsonl') && lines[i].includes('>>')) {
// Each JSONL write must be inside a _TEL conditional (within 5 lines above)
let foundConditional = false;
for (let j = i - 1; j >= Math.max(0, i - 5); j--) {
if (lines[j].includes('_TEL') && lines[j].includes('off')) {
foundConditional = true;
break;
}
}
if (!foundConditional) {
throw new Error(`Unconditional JSONL write at preamble.ts line ${i + 1}: ${lines[i].trim()}`);
}
}
}
});
});
+154
View File
@@ -0,0 +1,154 @@
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import { execSync, ExecSyncOptionsWithStringEncoding } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
const ROOT = path.resolve(import.meta.dir, '..');
const BIN = path.join(ROOT, 'bin');
let tmpDir: string;
let slugDir: string;
function runLog(input: string, opts: { expectFail?: boolean } = {}): { stdout: string; exitCode: number } {
const execOpts: ExecSyncOptionsWithStringEncoding = {
cwd: ROOT,
env: { ...process.env, GSTACK_HOME: tmpDir },
encoding: 'utf-8',
timeout: 15000,
};
try {
const stdout = execSync(`${BIN}/gstack-timeline-log '${input.replace(/'/g, "'\\''")}'`, execOpts).trim();
return { stdout, exitCode: 0 };
} catch (e: any) {
if (opts.expectFail) {
return { stdout: e.stderr?.toString() || '', exitCode: e.status || 1 };
}
throw e;
}
}
function runRead(args: string = ''): string {
const execOpts: ExecSyncOptionsWithStringEncoding = {
cwd: ROOT,
env: { ...process.env, GSTACK_HOME: tmpDir },
encoding: 'utf-8',
timeout: 15000,
};
try {
return execSync(`${BIN}/gstack-timeline-read ${args}`, execOpts).trim();
} catch {
return '';
}
}
beforeEach(() => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-timeline-'));
slugDir = path.join(tmpDir, 'projects');
fs.mkdirSync(slugDir, { recursive: true });
});
afterEach(() => {
fs.rmSync(tmpDir, { recursive: true, force: true });
});
function findTimelineFile(): string | null {
const projectDirs = fs.readdirSync(slugDir);
if (projectDirs.length === 0) return null;
const f = path.join(slugDir, projectDirs[0], 'timeline.jsonl');
return fs.existsSync(f) ? f : null;
}
describe('gstack-timeline-log', () => {
test('accepts valid JSON and appends to timeline.jsonl', () => {
const input = '{"skill":"review","event":"started","branch":"main"}';
const result = runLog(input);
expect(result.exitCode).toBe(0);
const f = findTimelineFile();
expect(f).not.toBeNull();
const content = fs.readFileSync(f!, 'utf-8').trim();
const parsed = JSON.parse(content);
expect(parsed.skill).toBe('review');
expect(parsed.event).toBe('started');
expect(parsed.branch).toBe('main');
});
test('rejects invalid JSON with exit 0 (non-blocking)', () => {
const result = runLog('not json at all');
expect(result.exitCode).toBe(0);
// No file should be created
const f = findTimelineFile();
expect(f).toBeNull();
});
test('injects timestamp when ts field is missing', () => {
const input = '{"skill":"review","event":"started","branch":"main"}';
runLog(input);
const f = findTimelineFile();
expect(f).not.toBeNull();
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
expect(parsed.ts).toBeDefined();
expect(new Date(parsed.ts).getTime()).toBeGreaterThan(0);
});
test('preserves timestamp when ts field is present', () => {
const input = '{"skill":"review","event":"completed","branch":"main","ts":"2025-06-15T10:00:00Z"}';
runLog(input);
const f = findTimelineFile();
expect(f).not.toBeNull();
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
expect(parsed.ts).toBe('2025-06-15T10:00:00Z');
});
test('validates required fields (skill, event) - exits 0 if missing skill', () => {
const result = runLog('{"event":"started","branch":"main"}');
expect(result.exitCode).toBe(0);
const f = findTimelineFile();
expect(f).toBeNull();
});
test('validates required fields (skill, event) - exits 0 if missing event', () => {
const result = runLog('{"skill":"review","branch":"main"}');
expect(result.exitCode).toBe(0);
const f = findTimelineFile();
expect(f).toBeNull();
});
});
describe('gstack-timeline-read', () => {
test('returns empty output for missing file (exit 0)', () => {
const output = runRead();
expect(output).toBe('');
});
test('filters by --branch', () => {
runLog(JSON.stringify({ skill: 'review', event: 'completed', branch: 'feature-a', outcome: 'approved', ts: '2026-03-28T10:00:00Z' }));
runLog(JSON.stringify({ skill: 'ship', event: 'completed', branch: 'feature-b', outcome: 'merged', ts: '2026-03-28T11:00:00Z' }));
const output = runRead('--branch feature-a');
expect(output).toContain('review');
expect(output).not.toContain('feature-b');
});
test('limits output with --limit', () => {
for (let i = 0; i < 5; i++) {
runLog(JSON.stringify({ skill: 'review', event: 'completed', branch: 'main', outcome: 'approved', ts: `2026-03-2${i}T10:00:00Z` }));
}
const unlimited = runRead('--limit 20');
const limited = runRead('--limit 2');
// Count event lines (lines starting with "- ")
const unlimitedEvents = unlimited.split('\n').filter(l => l.startsWith('- ')).length;
const limitedEvents = limited.split('\n').filter(l => l.startsWith('- ')).length;
expect(unlimitedEvents).toBe(5);
expect(limitedEvents).toBe(2);
});
});
+2 -2
View File
@@ -101,7 +101,7 @@ describe('selectTests', () => {
expect(result.reason).toBe('diff');
// Should include tests that depend on gen-skill-docs.ts
expect(result.selected).toContain('skillmd-setup-discovery');
expect(result.selected).toContain('contributor-mode');
expect(result.selected).toContain('session-awareness');
expect(result.selected).toContain('journey-ideation');
// Should NOT include tests that don't depend on it
expect(result.selected).not.toContain('retro');
@@ -144,7 +144,7 @@ describe('selectTests', () => {
const result = selectTests(['SKILL.md.tmpl'], E2E_TOUCHFILES);
// Should select the 7 tests that depend on root SKILL.md
expect(result.selected).toContain('skillmd-setup-discovery');
expect(result.selected).toContain('contributor-mode');
expect(result.selected).toContain('session-awareness');
expect(result.selected).toContain('session-awareness');
// Also selects journey routing tests (SKILL.md.tmpl in their touchfiles)
expect(result.selected).toContain('journey-ideation');
+3
View File
@@ -231,6 +231,9 @@ describe('WorktreeManager', () => {
spawnSync('git', ['worktree', 'remove', '--force', oldPath], { cwd: repo, stdio: 'pipe' });
// Recreate the directory to simulate orphaned state
fs.mkdirSync(oldPath, { recursive: true });
// Backdate mtime to simulate a stale worktree (> 1 hour old)
const staleTime = new Date(Date.now() - 7200_000);
fs.utimesSync(oldRunDir, staleTime, staleTime);
// New manager should prune the old run's directory
const newMgr = new WorktreeManager(repo);