mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-06 13:45:35 +02:00
Merge branch 'main' into garrytan/team-supabase-store
Brings in 48 commits from main (v0.15.7–v0.15.16): deterministic slugs, TabSession refactor, pair-agent tunnel fix, content security layers, community security wave, team-friendly install, interactive snapshots. Conflict resolution: - .gitignore: merged both sides (kept .factory/ + added .kiro/.opencode/ .slate/.cursor/.openclaw/ from main) - open-gstack-browser/SKILL.md: accepted main (renamed from .factory/) - setup-team-sync/SKILL.md: regenerated via gen:skill-docs - test/fixtures/golden/*: updated golden baselines for ship SKILL.md - codex-ship-SKILL.md: accepted main (renamed from .factory/) - package.json version: synced to VERSION (0.15.16.0) - bin/gstack-uninstall: check settings file exists before claiming SessionStart hook removal (fixes false positive on clean systems) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -45,15 +45,17 @@ describe('Audit compliance', () => {
|
||||
expect(completionSection).toContain('_TEL" != "off"');
|
||||
});
|
||||
|
||||
// Fix 3: W012 — Bun install is version-pinned
|
||||
test('bun install commands use version pinning', () => {
|
||||
// Round 2 Fix 1: W012 — Bun install uses checksum verification
|
||||
test('bun install uses checksum-verified method', () => {
|
||||
const browseResolver = readFileSync(join(ROOT, 'scripts/resolvers/browse.ts'), 'utf-8');
|
||||
expect(browseResolver).toContain('BUN_VERSION');
|
||||
// Should not have unpinned curl|bash (without BUN_VERSION on same line)
|
||||
const lines = browseResolver.split('\n');
|
||||
expect(browseResolver).toContain('shasum -a 256');
|
||||
expect(browseResolver).toContain('BUN_INSTALL_SHA');
|
||||
const setup = readFileSync(join(ROOT, 'setup'), 'utf-8');
|
||||
// Setup error message should not have unverified curl|bash
|
||||
const lines = setup.split('\n');
|
||||
for (const line of lines) {
|
||||
if (line.includes('bun.sh/install') && line.includes('bash') && !line.includes('BUN_VERSION') && !line.includes('command -v')) {
|
||||
throw new Error(`Unpinned bun install found: ${line.trim()}`);
|
||||
if (line.includes('bun.sh/install') && line.includes('| bash') && !line.includes('shasum')) {
|
||||
throw new Error(`Unverified bun install found: ${line.trim()}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -69,6 +71,17 @@ describe('Audit compliance', () => {
|
||||
expect(between.toLowerCase()).toContain('untrusted');
|
||||
});
|
||||
|
||||
// Round 2 Fix 2: Trust boundary markers + helper + wrapping in all paths
|
||||
test('browse wraps untrusted content with trust boundary markers', () => {
|
||||
const commands = readFileSync(join(ROOT, 'browse/src/commands.ts'), 'utf-8');
|
||||
expect(commands).toContain('PAGE_CONTENT_COMMANDS');
|
||||
expect(commands).toContain('wrapUntrustedContent');
|
||||
const server = readFileSync(join(ROOT, 'browse/src/server.ts'), 'utf-8');
|
||||
expect(server).toContain('wrapUntrustedContent');
|
||||
const meta = readFileSync(join(ROOT, 'browse/src/meta-commands.ts'), 'utf-8');
|
||||
expect(meta).toContain('wrapUntrustedContent');
|
||||
});
|
||||
|
||||
// Fix 5: Data flow documentation in review.ts
|
||||
test('review.ts has data flow documentation', () => {
|
||||
const review = readFileSync(join(ROOT, 'scripts/resolvers/review.ts'), 'utf-8');
|
||||
@@ -76,6 +89,20 @@ describe('Audit compliance', () => {
|
||||
expect(review).toContain('Data NOT sent');
|
||||
});
|
||||
|
||||
// Round 2 Fix 3: Extension sender validation + message type allowlist
|
||||
test('extension background.js validates message sender', () => {
|
||||
const bg = readFileSync(join(ROOT, 'extension/background.js'), 'utf-8');
|
||||
expect(bg).toContain('sender.id !== chrome.runtime.id');
|
||||
expect(bg).toContain('ALLOWED_TYPES');
|
||||
});
|
||||
|
||||
// Round 2 Fix 4: Chrome CDP binds to localhost only
|
||||
test('chrome-cdp binds to localhost only', () => {
|
||||
const cdp = readFileSync(join(ROOT, 'bin/chrome-cdp'), 'utf-8');
|
||||
expect(cdp).toContain('--remote-debugging-address=127.0.0.1');
|
||||
expect(cdp).toContain('--remote-allow-origins=');
|
||||
});
|
||||
|
||||
// Fix 2+6: All generated SKILL.md files with telemetry are conditional
|
||||
test('all generated SKILL.md files with telemetry calls use conditional pattern', () => {
|
||||
const skills = getAllSkillMds();
|
||||
|
||||
@@ -0,0 +1,165 @@
|
||||
/**
|
||||
* Tests for bin/gstack-diff-scope — verifies scope signal detection.
|
||||
*
|
||||
* Creates temp git repos with specific file patterns and verifies
|
||||
* the correct SCOPE_* variables are output.
|
||||
*/
|
||||
import { describe, test, expect, afterAll } from 'bun:test';
|
||||
import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const SCRIPT = join(import.meta.dir, '..', 'bin', 'gstack-diff-scope');
|
||||
|
||||
const dirs: string[] = [];
|
||||
|
||||
function createRepo(files: string[]): string {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'diff-scope-test-'));
|
||||
dirs.push(dir);
|
||||
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: dir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
|
||||
// Base commit
|
||||
writeFileSync(join(dir, 'README.md'), '# test\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
// Feature branch with specified files
|
||||
run('git', ['checkout', '-b', 'feature/test']);
|
||||
for (const f of files) {
|
||||
const fullPath = join(dir, f);
|
||||
const dirPath = fullPath.substring(0, fullPath.lastIndexOf('/'));
|
||||
if (dirPath !== dir) mkdirSync(dirPath, { recursive: true });
|
||||
writeFileSync(fullPath, '# test content\n');
|
||||
}
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'add files']);
|
||||
|
||||
return dir;
|
||||
}
|
||||
|
||||
function runScope(dir: string): Record<string, string> {
|
||||
const result = spawnSync('bash', [SCRIPT, 'main'], {
|
||||
cwd: dir, stdio: 'pipe', timeout: 5000,
|
||||
});
|
||||
const output = result.stdout.toString().trim();
|
||||
const vars: Record<string, string> = {};
|
||||
for (const line of output.split('\n')) {
|
||||
const [key, val] = line.split('=');
|
||||
if (key && val) vars[key] = val;
|
||||
}
|
||||
return vars;
|
||||
}
|
||||
|
||||
afterAll(() => {
|
||||
for (const d of dirs) {
|
||||
try { rmSync(d, { recursive: true, force: true }); } catch {}
|
||||
}
|
||||
});
|
||||
|
||||
describe('gstack-diff-scope', () => {
|
||||
// --- Existing scope signals ---
|
||||
|
||||
test('detects frontend files', () => {
|
||||
const dir = createRepo(['styles.css', 'component.tsx']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_FRONTEND).toBe('true');
|
||||
});
|
||||
|
||||
test('detects backend files', () => {
|
||||
const dir = createRepo(['app.rb', 'service.py']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_BACKEND).toBe('true');
|
||||
});
|
||||
|
||||
test('detects test files', () => {
|
||||
const dir = createRepo(['test/app.test.ts']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_TESTS).toBe('true');
|
||||
});
|
||||
|
||||
// --- New scope signals (Review Army) ---
|
||||
|
||||
test('detects migrations via db/migrate/', () => {
|
||||
const dir = createRepo(['db/migrate/20260330_create_users.rb']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_MIGRATIONS).toBe('true');
|
||||
});
|
||||
|
||||
test('detects migrations via generic migrations/', () => {
|
||||
const dir = createRepo(['app/migrations/0001_initial.py']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_MIGRATIONS).toBe('true');
|
||||
});
|
||||
|
||||
test('detects migrations via prisma', () => {
|
||||
const dir = createRepo(['prisma/migrations/20260330/migration.sql']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_MIGRATIONS).toBe('true');
|
||||
});
|
||||
|
||||
test('detects API via controller files', () => {
|
||||
const dir = createRepo(['app/controllers/users_controller.rb']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_API).toBe('true');
|
||||
});
|
||||
|
||||
test('detects API via route files', () => {
|
||||
const dir = createRepo(['src/routes/api.ts']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_API).toBe('true');
|
||||
});
|
||||
|
||||
test('detects API via GraphQL schemas', () => {
|
||||
const dir = createRepo(['schema.graphql']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_API).toBe('true');
|
||||
});
|
||||
|
||||
test('detects auth files', () => {
|
||||
const dir = createRepo(['app/services/auth_service.rb']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_AUTH).toBe('true');
|
||||
});
|
||||
|
||||
test('detects session files', () => {
|
||||
const dir = createRepo(['lib/session_manager.ts']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_AUTH).toBe('true');
|
||||
});
|
||||
|
||||
test('detects JWT files', () => {
|
||||
const dir = createRepo(['utils/jwt_helper.py']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_AUTH).toBe('true');
|
||||
});
|
||||
|
||||
test('returns false for all new signals when no matching files', () => {
|
||||
const dir = createRepo(['docs/readme.md', 'config.yml']);
|
||||
const scope = runScope(dir);
|
||||
expect(scope.SCOPE_MIGRATIONS).toBe('false');
|
||||
expect(scope.SCOPE_API).toBe('false');
|
||||
expect(scope.SCOPE_AUTH).toBe('false');
|
||||
});
|
||||
|
||||
test('outputs all 9 scope variables', () => {
|
||||
const dir = createRepo(['app.ts']);
|
||||
const scope = runScope(dir);
|
||||
expect(Object.keys(scope)).toHaveLength(9);
|
||||
expect(scope).toHaveProperty('SCOPE_FRONTEND');
|
||||
expect(scope).toHaveProperty('SCOPE_BACKEND');
|
||||
expect(scope).toHaveProperty('SCOPE_PROMPTS');
|
||||
expect(scope).toHaveProperty('SCOPE_TESTS');
|
||||
expect(scope).toHaveProperty('SCOPE_DOCS');
|
||||
expect(scope).toHaveProperty('SCOPE_CONFIG');
|
||||
expect(scope).toHaveProperty('SCOPE_MIGRATIONS');
|
||||
expect(scope).toHaveProperty('SCOPE_API');
|
||||
expect(scope).toHaveProperty('SCOPE_AUTH');
|
||||
});
|
||||
});
|
||||
Vendored
+2503
File diff suppressed because it is too large
Load Diff
+2543
File diff suppressed because it is too large
Load Diff
+2163
File diff suppressed because it is too large
Load Diff
+2539
File diff suppressed because it is too large
Load Diff
+5
@@ -0,0 +1,5 @@
|
||||
-- Migration: Drop user email column
|
||||
-- WARNING: This migration is intentionally unsafe for testing
|
||||
ALTER TABLE users DROP COLUMN email;
|
||||
ALTER TABLE users DROP COLUMN phone_number;
|
||||
-- No backfill, no reversibility check, no data preservation
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
# N+1 query example — intentionally bad for testing
|
||||
class PostsController
|
||||
def index
|
||||
@posts = Post.all
|
||||
@posts.each do |post|
|
||||
# N+1: queries Author table for every post
|
||||
puts post.author.name
|
||||
# N+1: queries Comments table for every post
|
||||
puts post.comments.count
|
||||
end
|
||||
end
|
||||
end
|
||||
+647
-34
@@ -213,11 +213,20 @@ describe('gen-skill-docs', () => {
|
||||
expect(browseTmpl).toContain('{{PREAMBLE}}');
|
||||
});
|
||||
|
||||
test('generated SKILL.md contains contributor mode check', () => {
|
||||
test('generated SKILL.md contains operational self-improvement (replaced contributor mode)', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Contributor Mode');
|
||||
expect(content).toContain('gstack_contributor');
|
||||
expect(content).toContain('contributor-logs');
|
||||
expect(content).not.toContain('Contributor Mode');
|
||||
expect(content).not.toContain('gstack_contributor');
|
||||
expect(content).not.toContain('contributor-logs');
|
||||
expect(content).toContain('Operational Self-Improvement');
|
||||
expect(content).toContain('gstack-learnings-log');
|
||||
expect(content).toContain('gstack-learnings-search --limit 3');
|
||||
});
|
||||
|
||||
test('generated SKILL.md with LEARNINGS_LOG contains operational type', () => {
|
||||
// Check a skill that has LEARNINGS_LOG (e.g., review)
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('operational');
|
||||
});
|
||||
|
||||
test('generated SKILL.md contains session awareness', () => {
|
||||
@@ -586,10 +595,12 @@ describe('REVIEW_DASHBOARD resolver', () => {
|
||||
expect(content).toContain('/plan-ceo-review');
|
||||
});
|
||||
|
||||
test('plan-design-review chaining mentions eng and ceo reviews', () => {
|
||||
test('plan-design-review chaining mentions eng, ceo, and design skills', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'plan-design-review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('/plan-eng-review');
|
||||
expect(content).toContain('/plan-ceo-review');
|
||||
expect(content).toContain('/design-shotgun');
|
||||
expect(content).toContain('/design-html');
|
||||
});
|
||||
|
||||
test('ship does NOT contain review chaining', () => {
|
||||
@@ -605,7 +616,8 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
|
||||
const shipSkill = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
const reviewSkill = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
|
||||
test('all three modes share codepath tracing methodology', () => {
|
||||
test('plan and ship modes share codepath tracing methodology', () => {
|
||||
// Review mode delegates test coverage to the Testing specialist subagent (Review Army)
|
||||
const sharedPhrases = [
|
||||
'Trace data flow',
|
||||
'Diagram the execution',
|
||||
@@ -617,33 +629,40 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
|
||||
for (const phrase of sharedPhrases) {
|
||||
expect(planSkill).toContain(phrase);
|
||||
expect(shipSkill).toContain(phrase);
|
||||
expect(reviewSkill).toContain(phrase);
|
||||
}
|
||||
// Plan mode traces the plan, not a git diff
|
||||
expect(planSkill).toContain('Trace every codepath in the plan');
|
||||
expect(planSkill).not.toContain('git diff origin');
|
||||
// Ship and review modes trace the diff
|
||||
// Ship mode traces the diff
|
||||
expect(shipSkill).toContain('Trace every codepath changed');
|
||||
expect(reviewSkill).toContain('Trace every codepath changed');
|
||||
});
|
||||
|
||||
test('all three modes include E2E decision matrix', () => {
|
||||
for (const skill of [planSkill, shipSkill, reviewSkill]) {
|
||||
test('review mode uses Review Army for specialist dispatch', () => {
|
||||
expect(reviewSkill).toContain('Review Army');
|
||||
expect(reviewSkill).toContain('Specialist Dispatch');
|
||||
expect(reviewSkill).toContain('testing.md');
|
||||
});
|
||||
|
||||
test('plan and ship modes include E2E decision matrix', () => {
|
||||
// Review mode delegates to Testing specialist
|
||||
for (const skill of [planSkill, shipSkill]) {
|
||||
expect(skill).toContain('E2E Test Decision Matrix');
|
||||
expect(skill).toContain('→E2E');
|
||||
expect(skill).toContain('→EVAL');
|
||||
}
|
||||
});
|
||||
|
||||
test('all three modes include regression rule', () => {
|
||||
for (const skill of [planSkill, shipSkill, reviewSkill]) {
|
||||
test('plan and ship modes include regression rule', () => {
|
||||
// Review mode delegates to Testing specialist
|
||||
for (const skill of [planSkill, shipSkill]) {
|
||||
expect(skill).toContain('REGRESSION RULE');
|
||||
expect(skill).toContain('IRON RULE');
|
||||
}
|
||||
});
|
||||
|
||||
test('all three modes include test framework detection', () => {
|
||||
for (const skill of [planSkill, shipSkill, reviewSkill]) {
|
||||
test('plan and ship modes include test framework detection', () => {
|
||||
// Review mode delegates to Testing specialist
|
||||
for (const skill of [planSkill, shipSkill]) {
|
||||
expect(skill).toContain('Test Framework Detection');
|
||||
expect(skill).toContain('CLAUDE.md');
|
||||
}
|
||||
@@ -662,11 +681,12 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
|
||||
expect(shipSkill).toContain('ship-test-plan');
|
||||
});
|
||||
|
||||
test('review mode generates via Fix-First + gaps are INFORMATIONAL', () => {
|
||||
test('review mode uses Fix-First + Review Army for specialist coverage', () => {
|
||||
expect(reviewSkill).toContain('Fix-First');
|
||||
expect(reviewSkill).toContain('INFORMATIONAL');
|
||||
expect(reviewSkill).toContain('Step 4.75');
|
||||
expect(reviewSkill).toContain('subsumes the "Test Gaps" category');
|
||||
// Review Army handles test coverage via Testing specialist subagent
|
||||
expect(reviewSkill).toContain('Review Army');
|
||||
expect(reviewSkill).toContain('Testing');
|
||||
});
|
||||
|
||||
test('plan mode does NOT include ship-specific content', () => {
|
||||
@@ -681,6 +701,35 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
|
||||
expect(reviewSkill).not.toContain('ship-test-plan');
|
||||
});
|
||||
|
||||
test('review/specialists/ directory has all expected checklist files', () => {
|
||||
const specDir = path.join(ROOT, 'review', 'specialists');
|
||||
const expected = [
|
||||
'testing.md',
|
||||
'maintainability.md',
|
||||
'security.md',
|
||||
'performance.md',
|
||||
'data-migration.md',
|
||||
'api-contract.md',
|
||||
'red-team.md',
|
||||
];
|
||||
for (const f of expected) {
|
||||
expect(fs.existsSync(path.join(specDir, f))).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
test('each specialist file has standard header with scope and output format', () => {
|
||||
const specDir = path.join(ROOT, 'review', 'specialists');
|
||||
const files = fs.readdirSync(specDir).filter(f => f.endsWith('.md'));
|
||||
for (const f of files) {
|
||||
const content = fs.readFileSync(path.join(specDir, f), 'utf-8');
|
||||
// All specialist files must have Scope and Output/JSON in header
|
||||
expect(content).toContain('Scope:');
|
||||
expect(content.toLowerCase()).toMatch(/output|json/);
|
||||
// Must define NO FINDINGS behavior
|
||||
expect(content).toContain('NO FINDINGS');
|
||||
}
|
||||
});
|
||||
|
||||
// Regression guard: ship output contains key phrases from before the refactor
|
||||
test('ship SKILL.md regression guard — key phrases preserved', () => {
|
||||
const regressionPhrases = [
|
||||
@@ -700,6 +749,22 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => {
|
||||
expect(shipSkill).toContain(phrase);
|
||||
}
|
||||
});
|
||||
|
||||
test('ship SKILL.md contains review army specialist dispatch', () => {
|
||||
expect(shipSkill).toContain('Specialist Dispatch');
|
||||
expect(shipSkill).toContain('Step 3.55');
|
||||
expect(shipSkill).toContain('Step 3.56');
|
||||
});
|
||||
|
||||
test('ship SKILL.md contains cross-review finding dedup', () => {
|
||||
expect(shipSkill).toContain('Cross-review finding dedup');
|
||||
expect(shipSkill).toContain('Step 3.57');
|
||||
});
|
||||
|
||||
test('ship SKILL.md contains re-run idempotency behavior', () => {
|
||||
expect(shipSkill).toContain('Re-run behavior (idempotency)');
|
||||
expect(shipSkill).toContain('Never skip a verification step');
|
||||
});
|
||||
});
|
||||
|
||||
// --- {{TEST_FAILURE_TRIAGE}} resolver tests ---
|
||||
@@ -868,12 +933,9 @@ describe('Coverage gate in ship', () => {
|
||||
expect(shipSkill).toContain('could not determine percentage — skipping');
|
||||
});
|
||||
|
||||
test('review SKILL.md contains coverage WARNING', () => {
|
||||
expect(reviewSkill).toContain('COVERAGE WARNING');
|
||||
expect(reviewSkill).toContain('Consider writing tests before running /ship');
|
||||
});
|
||||
|
||||
test('review coverage warning is INFORMATIONAL', () => {
|
||||
test('review SKILL.md delegates coverage to Testing specialist', () => {
|
||||
// Coverage audit moved to Testing specialist subagent in Review Army
|
||||
expect(reviewSkill).toContain('testing.md');
|
||||
expect(reviewSkill).toContain('INFORMATIONAL');
|
||||
});
|
||||
});
|
||||
@@ -942,6 +1004,18 @@ describe('Plan status footer in preamble', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// --- Skill invocation during plan mode in preamble ---
|
||||
|
||||
describe('Skill invocation during plan mode in preamble', () => {
|
||||
test('preamble contains skill invocation plan mode section', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'office-hours', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Skill Invocation During Plan Mode');
|
||||
expect(content).toContain('precedence over generic plan mode behavior');
|
||||
expect(content).toContain('Do not continue the workflow');
|
||||
expect(content).toContain('cancel the skill or leave plan mode');
|
||||
});
|
||||
});
|
||||
|
||||
// --- {{SPEC_REVIEW_LOOP}} resolver tests ---
|
||||
|
||||
describe('SPEC_REVIEW_LOOP resolver', () => {
|
||||
@@ -1153,6 +1227,138 @@ describe('BENEFITS_FROM resolver', () => {
|
||||
expect(ceoContent).toContain('office-hours/SKILL.md');
|
||||
expect(engContent).toContain('office-hours/SKILL.md');
|
||||
});
|
||||
|
||||
test('BENEFITS_FROM delegates to INVOKE_SKILL pattern', () => {
|
||||
// Should contain the INVOKE_SKILL-style loading prose (not the old manual skip list)
|
||||
expect(engContent).toContain('Follow its instructions from top to bottom');
|
||||
expect(engContent).toContain('skipping these sections');
|
||||
expect(ceoContent).toContain('Follow its instructions from top to bottom');
|
||||
});
|
||||
});
|
||||
|
||||
// --- {{INVOKE_SKILL}} resolver tests ---
|
||||
|
||||
describe('INVOKE_SKILL resolver', () => {
|
||||
const ceoContent = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8');
|
||||
|
||||
test('plan-ceo-review uses INVOKE_SKILL for mid-session office-hours fallback', () => {
|
||||
// The mid-session detection path should use INVOKE_SKILL-generated prose
|
||||
expect(ceoContent).toContain('office-hours/SKILL.md');
|
||||
expect(ceoContent).toContain('Follow its instructions from top to bottom');
|
||||
});
|
||||
|
||||
test('INVOKE_SKILL output includes default skip list', () => {
|
||||
expect(ceoContent).toContain('Preamble (run first)');
|
||||
expect(ceoContent).toContain('Telemetry (run last)');
|
||||
expect(ceoContent).toContain('AskUserQuestion Format');
|
||||
});
|
||||
|
||||
test('INVOKE_SKILL output includes error handling', () => {
|
||||
expect(ceoContent).toContain('If unreadable');
|
||||
expect(ceoContent).toContain('Could not load');
|
||||
});
|
||||
|
||||
test('template uses {{INVOKE_SKILL:office-hours}} placeholder', () => {
|
||||
const tmpl = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md.tmpl'), 'utf-8');
|
||||
expect(tmpl).toContain('{{INVOKE_SKILL:office-hours}}');
|
||||
});
|
||||
});
|
||||
|
||||
// --- {{CHANGELOG_WORKFLOW}} resolver tests ---
|
||||
|
||||
describe('CHANGELOG_WORKFLOW resolver', () => {
|
||||
const shipContent = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
|
||||
test('ship SKILL.md contains changelog workflow', () => {
|
||||
expect(shipContent).toContain('CHANGELOG (auto-generate)');
|
||||
expect(shipContent).toContain('git log <base>..HEAD --oneline');
|
||||
});
|
||||
|
||||
test('changelog workflow includes cross-check step', () => {
|
||||
expect(shipContent).toContain('Cross-check');
|
||||
expect(shipContent).toContain('Every commit must map to at least one bullet point');
|
||||
});
|
||||
|
||||
test('changelog workflow includes voice guidance', () => {
|
||||
expect(shipContent).toContain('Lead with what the user can now **do**');
|
||||
});
|
||||
|
||||
test('template uses {{CHANGELOG_WORKFLOW}} placeholder', () => {
|
||||
const tmpl = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md.tmpl'), 'utf-8');
|
||||
expect(tmpl).toContain('{{CHANGELOG_WORKFLOW}}');
|
||||
// Should NOT contain the old inline changelog content
|
||||
expect(tmpl).not.toContain('Group commits by theme');
|
||||
});
|
||||
|
||||
test('changelog workflow includes keep-changelog format', () => {
|
||||
expect(shipContent).toContain('### Added');
|
||||
expect(shipContent).toContain('### Fixed');
|
||||
});
|
||||
});
|
||||
|
||||
// --- Parameterized resolver infrastructure tests ---
|
||||
|
||||
describe('parameterized resolver support', () => {
|
||||
test('gen-skill-docs regex handles colon-separated args', () => {
|
||||
// Verify the template containing {{INVOKE_SKILL:office-hours}} was processed
|
||||
// without leaving unresolved placeholders
|
||||
const ceoContent = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8');
|
||||
expect(ceoContent).not.toMatch(/\{\{INVOKE_SKILL:[^}]+\}\}/);
|
||||
});
|
||||
|
||||
test('templates with parameterized resolvers pass unresolved check', () => {
|
||||
// All generated SKILL.md files should have no unresolved {{...}} placeholders
|
||||
const skillDirs = fs.readdirSync(ROOT).filter(d =>
|
||||
fs.existsSync(path.join(ROOT, d, 'SKILL.md'))
|
||||
);
|
||||
for (const dir of skillDirs) {
|
||||
const content = fs.readFileSync(path.join(ROOT, dir, 'SKILL.md'), 'utf-8');
|
||||
const unresolved = content.match(/\{\{[A-Z_]+(?::[^}]*)?\}\}/g);
|
||||
if (unresolved) {
|
||||
throw new Error(`${dir}/SKILL.md has unresolved placeholders: ${unresolved.join(', ')}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// --- Preamble routing injection tests ---
|
||||
|
||||
describe('preamble routing injection', () => {
|
||||
const shipContent = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
|
||||
test('preamble bash checks for routing section in CLAUDE.md', () => {
|
||||
expect(shipContent).toContain('grep -q "## Skill routing" CLAUDE.md');
|
||||
expect(shipContent).toContain('HAS_ROUTING');
|
||||
});
|
||||
|
||||
test('preamble bash reads routing_declined config', () => {
|
||||
expect(shipContent).toContain('routing_declined');
|
||||
expect(shipContent).toContain('ROUTING_DECLINED');
|
||||
});
|
||||
|
||||
test('preamble includes routing injection AskUserQuestion', () => {
|
||||
expect(shipContent).toContain('Add routing rules to CLAUDE.md');
|
||||
expect(shipContent).toContain("I'll invoke skills manually");
|
||||
});
|
||||
|
||||
test('routing injection respects prior decline', () => {
|
||||
expect(shipContent).toContain('ROUTING_DECLINED');
|
||||
expect(shipContent).toMatch(/routing_declined.*true/);
|
||||
});
|
||||
|
||||
test('routing injection only fires when all conditions met', () => {
|
||||
// Must be: HAS_ROUTING=no AND ROUTING_DECLINED=false AND PROACTIVE_PROMPTED=yes
|
||||
expect(shipContent).toContain('HAS_ROUTING');
|
||||
expect(shipContent).toContain('ROUTING_DECLINED');
|
||||
expect(shipContent).toContain('PROACTIVE_PROMPTED');
|
||||
});
|
||||
|
||||
test('routing section content includes key routing rules', () => {
|
||||
expect(shipContent).toContain('invoke office-hours');
|
||||
expect(shipContent).toContain('invoke investigate');
|
||||
expect(shipContent).toContain('invoke ship');
|
||||
expect(shipContent).toContain('invoke qa');
|
||||
});
|
||||
});
|
||||
|
||||
// --- {{DESIGN_OUTSIDE_VOICES}} resolver tests ---
|
||||
@@ -1470,10 +1676,9 @@ describe('Codex generation (--host codex)', () => {
|
||||
const content = fs.readFileSync(path.join(AGENTS_DIR, 'gstack-review', 'SKILL.md'), 'utf-8');
|
||||
// Correct: references to sidecar files use gstack/review/ path
|
||||
expect(content).toContain('.agents/skills/gstack/review/checklist.md');
|
||||
expect(content).toContain('.agents/skills/gstack/review/design-checklist.md');
|
||||
// design-checklist.md is now referenced via Review Army specialist (Claude only, stripped for Codex)
|
||||
// Wrong: must NOT reference gstack-review/checklist.md (file doesn't exist there)
|
||||
expect(content).not.toContain('.agents/skills/gstack-review/checklist.md');
|
||||
expect(content).not.toContain('.agents/skills/gstack-review/design-checklist.md');
|
||||
});
|
||||
|
||||
test('sidecar paths in ship skill point to gstack/review/ for pre-landing review', () => {
|
||||
@@ -1550,7 +1755,10 @@ describe('Codex generation (--host codex)', () => {
|
||||
test('Claude output unchanged: all Claude skills have zero Codex paths', () => {
|
||||
for (const skill of ALL_SKILLS) {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
|
||||
expect(content).not.toContain('~/.codex/');
|
||||
// pair-agent legitimately documents how Codex agents store credentials
|
||||
if (skill.dir !== 'pair-agent') {
|
||||
expect(content).not.toContain('~/.codex/');
|
||||
}
|
||||
// gstack-upgrade legitimately references .agents/skills for cross-platform detection
|
||||
if (skill.dir !== 'gstack-upgrade') {
|
||||
expect(content).not.toContain('.agents/skills');
|
||||
@@ -1709,19 +1917,95 @@ describe('Factory generation (--host factory)', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Parameterized host smoke tests (config-driven) ─────────
|
||||
|
||||
import { ALL_HOST_CONFIGS, getExternalHosts } from '../hosts/index';
|
||||
|
||||
describe('Parameterized host smoke tests', () => {
|
||||
for (const hostConfig of getExternalHosts()) {
|
||||
describe(`${hostConfig.displayName} (--host ${hostConfig.name})`, () => {
|
||||
const hostDir = path.join(ROOT, hostConfig.hostSubdir, 'skills');
|
||||
|
||||
test('generates output that exists on disk', () => {
|
||||
// Generated dir should exist (created by earlier bun run gen:skill-docs --host all)
|
||||
if (!fs.existsSync(hostDir)) {
|
||||
// Generate if not already done
|
||||
Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', hostConfig.name], {
|
||||
cwd: ROOT, stdout: 'pipe', stderr: 'pipe',
|
||||
});
|
||||
}
|
||||
expect(fs.existsSync(hostDir)).toBe(true);
|
||||
const skills = fs.readdirSync(hostDir).filter(d =>
|
||||
fs.existsSync(path.join(hostDir, d, 'SKILL.md'))
|
||||
);
|
||||
expect(skills.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test('no .claude/skills path leakage in non-root skills', () => {
|
||||
if (!fs.existsSync(hostDir)) return; // skip if not generated
|
||||
const skills = fs.readdirSync(hostDir);
|
||||
for (const skill of skills) {
|
||||
// Skip root gstack skill — it contains preamble with intentional .claude/skills
|
||||
// fallback paths for binary lookup and skill prefix instructions
|
||||
if (skill === 'gstack') continue;
|
||||
const skillMd = path.join(hostDir, skill, 'SKILL.md');
|
||||
if (!fs.existsSync(skillMd)) continue;
|
||||
const content = fs.readFileSync(skillMd, 'utf-8');
|
||||
// Strip bash blocks (which have legitimate fallback paths)
|
||||
const noBash = content.replace(/```bash\n[\s\S]*?```/g, '');
|
||||
const leaks = noBash.split('\n').filter(l => l.includes('.claude/skills'));
|
||||
if (leaks.length > 0) {
|
||||
throw new Error(`${skill}: .claude/skills leakage:\n${leaks.slice(0, 3).join('\n')}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('frontmatter has name and description', () => {
|
||||
if (!fs.existsSync(hostDir)) return;
|
||||
const skills = fs.readdirSync(hostDir);
|
||||
for (const skill of skills) {
|
||||
const skillMd = path.join(hostDir, skill, 'SKILL.md');
|
||||
if (!fs.existsSync(skillMd)) continue;
|
||||
const content = fs.readFileSync(skillMd, 'utf-8');
|
||||
expect(content).toMatch(/^---\n/);
|
||||
expect(content).toMatch(/^name:\s/m);
|
||||
expect(content).toMatch(/^description:\s/m);
|
||||
}
|
||||
});
|
||||
|
||||
test('--dry-run freshness check passes', () => {
|
||||
const result = Bun.spawnSync(
|
||||
['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', hostConfig.name, '--dry-run'],
|
||||
{ cwd: ROOT, stdout: 'pipe', stderr: 'pipe' }
|
||||
);
|
||||
expect(result.exitCode).toBe(0);
|
||||
const output = result.stdout.toString();
|
||||
expect(output).not.toContain('STALE');
|
||||
});
|
||||
|
||||
if (hostConfig.generation.skipSkills?.includes('codex')) {
|
||||
test('/codex skill excluded', () => {
|
||||
expect(fs.existsSync(path.join(hostDir, 'gstack-codex', 'SKILL.md'))).toBe(false);
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// ─── --host all tests ────────────────────────────────────────
|
||||
|
||||
describe('--host all', () => {
|
||||
test('--host all generates for claude, codex, and factory', () => {
|
||||
test('--host all generates for all registered hosts', () => {
|
||||
const result = Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', 'all', '--dry-run'], {
|
||||
cwd: ROOT, stdout: 'pipe', stderr: 'pipe',
|
||||
});
|
||||
expect(result.exitCode).toBe(0);
|
||||
const output = result.stdout.toString();
|
||||
// All three hosts should appear in output
|
||||
// All hosts should appear in output
|
||||
expect(output).toContain('FRESH: SKILL.md'); // claude
|
||||
expect(output).toContain('FRESH: .agents/skills/'); // codex
|
||||
expect(output).toContain('FRESH: .factory/skills/'); // factory
|
||||
for (const hostConfig of getExternalHosts()) {
|
||||
expect(output).toContain(`FRESH: ${hostConfig.hostSubdir}/skills/`);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1792,12 +2076,43 @@ describe('setup script validation', () => {
|
||||
expect(fnBody).toContain('gstack*');
|
||||
});
|
||||
|
||||
test('link_claude_skill_dirs creates relative symlinks', () => {
|
||||
// Claude links should be relative: ln -snf "gstack/skill_name"
|
||||
test('link_claude_skill_dirs creates real directories with absolute SKILL.md symlinks', () => {
|
||||
// Claude links should be real directories with absolute SKILL.md symlinks
|
||||
// to ensure Claude Code discovers them as top-level skills (not nested under gstack/)
|
||||
const fnStart = setupContent.indexOf('link_claude_skill_dirs()');
|
||||
const fnEnd = setupContent.indexOf('}', setupContent.indexOf('linked[@]}', fnStart));
|
||||
const fnBody = setupContent.slice(fnStart, fnEnd);
|
||||
expect(fnBody).toContain('ln -snf "gstack/$skill_name"');
|
||||
expect(fnBody).toContain('mkdir -p "$target"');
|
||||
expect(fnBody).toContain('ln -snf "$gstack_dir/$dir_name/SKILL.md" "$target/SKILL.md"');
|
||||
});
|
||||
|
||||
// REGRESSION: cleanup functions must handle both old symlinks AND new real-directory pattern
|
||||
test('cleanup functions handle real directories with symlinked SKILL.md', () => {
|
||||
// cleanup_old_claude_symlinks must detect and remove real dirs with SKILL.md symlinks
|
||||
const cleanupOldStart = setupContent.indexOf('cleanup_old_claude_symlinks()');
|
||||
const cleanupOldEnd = setupContent.indexOf('}', setupContent.indexOf('cleaned up old', cleanupOldStart));
|
||||
const cleanupOldBody = setupContent.slice(cleanupOldStart, cleanupOldEnd);
|
||||
expect(cleanupOldBody).toContain('-d "$old_target"');
|
||||
expect(cleanupOldBody).toContain('-L "$old_target/SKILL.md"');
|
||||
expect(cleanupOldBody).toContain('rm -rf "$old_target"');
|
||||
|
||||
// cleanup_prefixed_claude_symlinks must also handle the new pattern
|
||||
const cleanupPrefixedStart = setupContent.indexOf('cleanup_prefixed_claude_symlinks()');
|
||||
const cleanupPrefixedEnd = setupContent.indexOf('}', setupContent.indexOf('cleaned up prefixed', cleanupPrefixedStart));
|
||||
const cleanupPrefixedBody = setupContent.slice(cleanupPrefixedStart, cleanupPrefixedEnd);
|
||||
expect(cleanupPrefixedBody).toContain('-d "$prefixed_target"');
|
||||
expect(cleanupPrefixedBody).toContain('-L "$prefixed_target/SKILL.md"');
|
||||
expect(cleanupPrefixedBody).toContain('rm -rf "$prefixed_target"');
|
||||
});
|
||||
|
||||
// REGRESSION: link function must upgrade old directory symlinks
|
||||
test('link_claude_skill_dirs removes old directory symlinks before creating real dirs', () => {
|
||||
const fnStart = setupContent.indexOf('link_claude_skill_dirs()');
|
||||
const fnEnd = setupContent.indexOf('}', setupContent.indexOf('linked[@]}', fnStart));
|
||||
const fnBody = setupContent.slice(fnStart, fnEnd);
|
||||
// Must check for and remove old symlinks before mkdir
|
||||
expect(fnBody).toContain('if [ -L "$target" ]');
|
||||
expect(fnBody).toContain('rm -f "$target"');
|
||||
});
|
||||
|
||||
test('setup supports --host auto|claude|codex|kiro', () => {
|
||||
@@ -2036,6 +2351,100 @@ describe('telemetry', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('community fixes wave', () => {
|
||||
// Helper to get all generated SKILL.md files
|
||||
function getAllSkillMds(): Array<{ name: string; content: string }> {
|
||||
const results: Array<{ name: string; content: string }> = [];
|
||||
const rootPath = path.join(ROOT, 'SKILL.md');
|
||||
if (fs.existsSync(rootPath)) {
|
||||
results.push({ name: 'root', content: fs.readFileSync(rootPath, 'utf-8') });
|
||||
}
|
||||
for (const entry of fs.readdirSync(ROOT, { withFileTypes: true })) {
|
||||
if (!entry.isDirectory() || entry.name.startsWith('.') || entry.name === 'node_modules') continue;
|
||||
const skillPath = path.join(ROOT, entry.name, 'SKILL.md');
|
||||
if (fs.existsSync(skillPath)) {
|
||||
results.push({ name: entry.name, content: fs.readFileSync(skillPath, 'utf-8') });
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
// #594 — Discoverability: every SKILL.md.tmpl description contains "gstack"
|
||||
test('every SKILL.md.tmpl description contains "gstack"', () => {
|
||||
for (const skill of ALL_SKILLS) {
|
||||
const tmplPath = skill.dir === '.' ? path.join(ROOT, 'SKILL.md.tmpl') : path.join(ROOT, skill.dir, 'SKILL.md.tmpl');
|
||||
const content = fs.readFileSync(tmplPath, 'utf-8');
|
||||
const desc = extractDescription(content);
|
||||
expect(desc.toLowerCase()).toContain('gstack');
|
||||
}
|
||||
});
|
||||
|
||||
// #594 — Discoverability: first line of each description is under 120 chars
|
||||
test('every SKILL.md.tmpl description first line is under 120 chars', () => {
|
||||
for (const skill of ALL_SKILLS) {
|
||||
const tmplPath = skill.dir === '.' ? path.join(ROOT, 'SKILL.md.tmpl') : path.join(ROOT, skill.dir, 'SKILL.md.tmpl');
|
||||
const content = fs.readFileSync(tmplPath, 'utf-8');
|
||||
const desc = extractDescription(content);
|
||||
const firstLine = desc.split('\n')[0];
|
||||
expect(firstLine.length).toBeLessThanOrEqual(120);
|
||||
}
|
||||
});
|
||||
|
||||
// #573 — Feature signals: ship/SKILL.md contains feature signal detection
|
||||
test('ship/SKILL.md contains feature signal detection in Step 4', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content.toLowerCase()).toContain('feature signal');
|
||||
});
|
||||
|
||||
// #510 — Context warnings: no SKILL.md contains "running low on context"
|
||||
test('no generated SKILL.md contains "running low on context"', () => {
|
||||
const skills = getAllSkillMds();
|
||||
for (const { name, content } of skills) {
|
||||
expect(content).not.toContain('running low on context');
|
||||
}
|
||||
});
|
||||
|
||||
// #510 — Context warnings: plan-eng-review has explicit anti-warning
|
||||
test('plan-eng-review/SKILL.md contains "Do not preemptively warn"', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Do not preemptively warn');
|
||||
});
|
||||
|
||||
// #474 — Safety Net: no SKILL.md uses find with -delete
|
||||
test('no generated SKILL.md contains find with -delete flag', () => {
|
||||
const skills = getAllSkillMds();
|
||||
for (const { name, content } of skills) {
|
||||
// Match find commands that use -delete (but not prose mentioning the word "delete")
|
||||
const lines = content.split('\n');
|
||||
for (const line of lines) {
|
||||
if (line.includes('find ') && line.includes('-delete')) {
|
||||
throw new Error(`${name}/SKILL.md contains find with -delete: ${line.trim()}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// #467 — Telemetry: preamble JSONL writes are gated by telemetry setting
|
||||
test('preamble JSONL writes are inside telemetry conditional', () => {
|
||||
const preamble = fs.readFileSync(path.join(ROOT, 'scripts/resolvers/preamble.ts'), 'utf-8');
|
||||
// Find all skill-usage.jsonl write lines
|
||||
const lines = preamble.split('\n');
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
if (lines[i].includes('skill-usage.jsonl') && lines[i].includes('>>')) {
|
||||
// Look backwards for a telemetry conditional within 5 lines
|
||||
let foundConditional = false;
|
||||
for (let j = i - 1; j >= Math.max(0, i - 5); j--) {
|
||||
if (lines[j].includes('_TEL') && lines[j].includes('off')) {
|
||||
foundConditional = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
expect(foundConditional).toBe(true);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('codex commands must not use inline $(git rev-parse --show-toplevel) for cwd', () => {
|
||||
// Regression test: inline $(git rev-parse --show-toplevel) in codex exec -C
|
||||
// or codex review without cd evaluates in whatever cwd the background shell
|
||||
@@ -2123,3 +2532,207 @@ describe('codex commands must not use inline $(git rev-parse --show-toplevel) fo
|
||||
expect(violations).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Learnings + Confidence Resolver Tests ─────────────────────
|
||||
|
||||
describe('LEARNINGS_SEARCH resolver', () => {
|
||||
const SEARCH_SKILLS = ['review', 'ship', 'plan-eng-review', 'investigate', 'office-hours', 'plan-ceo-review'];
|
||||
|
||||
for (const skill of SEARCH_SKILLS) {
|
||||
test(`${skill} generated SKILL.md contains learnings search`, () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Prior Learnings');
|
||||
expect(content).toContain('gstack-learnings-search');
|
||||
});
|
||||
}
|
||||
|
||||
test('learnings search includes cross-project config check', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('cross_project_learnings');
|
||||
expect(content).toContain('--cross-project');
|
||||
});
|
||||
|
||||
test('learnings search includes AskUserQuestion for first-time cross-project opt-in', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Enable cross-project learnings');
|
||||
expect(content).toContain('project-scoped only');
|
||||
});
|
||||
|
||||
test('learnings search mentions prior learning applied display format', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Prior learning applied');
|
||||
});
|
||||
});
|
||||
|
||||
describe('LEARNINGS_LOG resolver', () => {
|
||||
const LOG_SKILLS = ['review', 'retro', 'investigate'];
|
||||
|
||||
for (const skill of LOG_SKILLS) {
|
||||
test(`${skill} generated SKILL.md contains learnings log`, () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Capture Learnings');
|
||||
expect(content).toContain('gstack-learnings-log');
|
||||
});
|
||||
}
|
||||
|
||||
test('learnings log documents all type values', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
for (const type of ['pattern', 'pitfall', 'preference', 'architecture', 'tool']) {
|
||||
expect(content).toContain(type);
|
||||
}
|
||||
});
|
||||
|
||||
test('learnings log documents all source values', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
for (const source of ['observed', 'user-stated', 'inferred', 'cross-model']) {
|
||||
expect(content).toContain(source);
|
||||
}
|
||||
});
|
||||
|
||||
test('learnings log includes files field for staleness detection', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('"files"');
|
||||
expect(content).toContain('staleness detection');
|
||||
});
|
||||
});
|
||||
|
||||
describe('CONFIDENCE_CALIBRATION resolver', () => {
|
||||
const CONFIDENCE_SKILLS = ['review', 'ship', 'plan-eng-review', 'cso'];
|
||||
|
||||
for (const skill of CONFIDENCE_SKILLS) {
|
||||
test(`${skill} generated SKILL.md contains confidence calibration`, () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Confidence Calibration');
|
||||
expect(content).toContain('confidence score');
|
||||
});
|
||||
}
|
||||
|
||||
test('confidence calibration includes scoring rubric with all tiers', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('9-10');
|
||||
expect(content).toContain('7-8');
|
||||
expect(content).toContain('5-6');
|
||||
expect(content).toContain('3-4');
|
||||
expect(content).toContain('1-2');
|
||||
});
|
||||
|
||||
test('confidence calibration includes display rules', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Show normally');
|
||||
expect(content).toContain('Suppress from main report');
|
||||
});
|
||||
|
||||
test('confidence calibration includes finding format example', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('[P1] (confidence:');
|
||||
expect(content).toContain('SQL injection');
|
||||
});
|
||||
|
||||
test('confidence calibration includes calibration learning feedback loop', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('calibration event');
|
||||
expect(content).toContain('Log the corrected pattern');
|
||||
});
|
||||
|
||||
test('skills without confidence calibration do NOT contain it', () => {
|
||||
// office-hours and retro do NOT use confidence calibration
|
||||
for (const skill of ['office-hours', 'retro']) {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8');
|
||||
expect(content).not.toContain('## Confidence Calibration');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('gen-skill-docs prefix warning (#620/#578)', () => {
|
||||
const { execSync } = require('child_process');
|
||||
|
||||
test('warns about skill_prefix when config has prefix=true', () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-prefix-warn-'));
|
||||
try {
|
||||
// Create a fake ~/.gstack/config.yaml with skill_prefix: true
|
||||
const fakeHome = tmpDir;
|
||||
const fakeGstack = path.join(fakeHome, '.gstack');
|
||||
fs.mkdirSync(fakeGstack, { recursive: true });
|
||||
fs.writeFileSync(path.join(fakeGstack, 'config.yaml'), 'skill_prefix: true\n');
|
||||
|
||||
const output = execSync('bun run scripts/gen-skill-docs.ts', {
|
||||
cwd: ROOT,
|
||||
env: { ...process.env, HOME: fakeHome },
|
||||
encoding: 'utf-8',
|
||||
timeout: 30000,
|
||||
});
|
||||
expect(output).toContain('skill_prefix is true');
|
||||
expect(output).toContain('gstack-relink');
|
||||
} finally {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('no warning when skill_prefix is false or absent', () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-prefix-warn-'));
|
||||
try {
|
||||
const fakeHome = tmpDir;
|
||||
const fakeGstack = path.join(fakeHome, '.gstack');
|
||||
fs.mkdirSync(fakeGstack, { recursive: true });
|
||||
fs.writeFileSync(path.join(fakeGstack, 'config.yaml'), 'skill_prefix: false\n');
|
||||
|
||||
const output = execSync('bun run scripts/gen-skill-docs.ts', {
|
||||
cwd: ROOT,
|
||||
env: { ...process.env, HOME: fakeHome },
|
||||
encoding: 'utf-8',
|
||||
timeout: 30000,
|
||||
});
|
||||
expect(output).not.toContain('skill_prefix is true');
|
||||
} finally {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('voice-triggers processing', () => {
|
||||
const { extractVoiceTriggers, processVoiceTriggers } = require('../scripts/gen-skill-docs') as {
|
||||
extractVoiceTriggers: (content: string) => string[];
|
||||
processVoiceTriggers: (content: string) => string;
|
||||
};
|
||||
|
||||
test('extractVoiceTriggers parses valid YAML list', () => {
|
||||
const content = `---\nname: cso\ndescription: |\n Security audit.\nvoice-triggers:\n - "see-so"\n - "security review"\n---\nBody`;
|
||||
const triggers = extractVoiceTriggers(content);
|
||||
expect(triggers).toEqual(['see-so', 'security review']);
|
||||
});
|
||||
|
||||
test('extractVoiceTriggers returns [] when no field present', () => {
|
||||
const content = `---\nname: qa\ndescription: |\n QA testing.\n---\nBody`;
|
||||
expect(extractVoiceTriggers(content)).toEqual([]);
|
||||
});
|
||||
|
||||
test('processVoiceTriggers appends voice triggers to description', () => {
|
||||
const content = `---\nname: cso\ndescription: |\n Security audit. (gstack)\nvoice-triggers:\n - "see-so"\n - "security review"\n---\nBody`;
|
||||
const result = processVoiceTriggers(content);
|
||||
expect(result).toContain('Voice triggers (speech-to-text aliases): "see-so", "security review".');
|
||||
});
|
||||
|
||||
test('processVoiceTriggers strips voice-triggers field from output', () => {
|
||||
const content = `---\nname: cso\ndescription: |\n Security audit. (gstack)\nvoice-triggers:\n - "see-so"\n---\nBody`;
|
||||
const result = processVoiceTriggers(content);
|
||||
expect(result).not.toContain('voice-triggers:');
|
||||
});
|
||||
|
||||
test('processVoiceTriggers returns content unchanged when no voice-triggers', () => {
|
||||
const content = `---\nname: qa\ndescription: |\n QA testing.\n---\nBody`;
|
||||
expect(processVoiceTriggers(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('generated CSO SKILL.md contains voice triggers in description', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'cso', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('"see-so"');
|
||||
expect(content).toContain('Voice triggers (speech-to-text aliases):');
|
||||
});
|
||||
|
||||
test('generated CSO SKILL.md does NOT contain raw voice-triggers field', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'cso', 'SKILL.md'), 'utf-8');
|
||||
const fmEnd = content.indexOf('\n---', 4);
|
||||
const frontmatter = content.slice(0, fmEnd);
|
||||
expect(frontmatter).not.toContain('voice-triggers:');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -131,6 +131,165 @@ describe("gstack-global-discover", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("codex large session_meta parsing", () => {
|
||||
let codexDir: string;
|
||||
let tmpDir: string;
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = mkdtempSync(join(tmpdir(), "gstack-codex-test-"));
|
||||
// Build a realistic ~/.codex/sessions/YYYY/MM/DD structure
|
||||
const now = new Date();
|
||||
const y = now.getFullYear().toString();
|
||||
const m = String(now.getMonth() + 1).padStart(2, "0");
|
||||
const d = String(now.getDate()).padStart(2, "0");
|
||||
codexDir = join(tmpDir, "codex-home", "sessions", y, m, d);
|
||||
mkdirSync(codexDir, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function writeCodexSession(
|
||||
dir: string,
|
||||
cwd: string,
|
||||
baseInstructionsSize: number
|
||||
): string {
|
||||
const padding = "x".repeat(baseInstructionsSize);
|
||||
const line = JSON.stringify({
|
||||
timestamp: new Date().toISOString(),
|
||||
type: "session_meta",
|
||||
payload: {
|
||||
id: `test-${Date.now()}`,
|
||||
timestamp: new Date().toISOString(),
|
||||
cwd,
|
||||
originator: "codex_exec",
|
||||
cli_version: "0.118.0",
|
||||
source: "exec",
|
||||
model_provider: "openai",
|
||||
base_instructions: { text: padding },
|
||||
},
|
||||
});
|
||||
const name = `rollout-${new Date().toISOString().replace(/[:.]/g, "-")}-${Math.random().toString(36).slice(2)}.jsonl`;
|
||||
const filePath = join(dir, name);
|
||||
writeFileSync(filePath, line + "\n");
|
||||
return filePath;
|
||||
}
|
||||
|
||||
test("discovers codex sessions with >4KB session_meta via CLI", () => {
|
||||
// Create a git repo as the session target
|
||||
const repoDir = join(tmpDir, "fake-repo");
|
||||
mkdirSync(repoDir);
|
||||
spawnSync("git", ["init"], { cwd: repoDir, stdio: "pipe" });
|
||||
spawnSync("git", ["commit", "--allow-empty", "-m", "init"], {
|
||||
cwd: repoDir,
|
||||
stdio: "pipe",
|
||||
});
|
||||
|
||||
// Write a session with a 20KB first line (simulates Codex v0.117+)
|
||||
writeCodexSession(codexDir, repoDir, 20000);
|
||||
|
||||
// Run discovery with CODEX_SESSIONS_DIR override
|
||||
const result = spawnSync(
|
||||
"bun",
|
||||
["run", scriptPath, "--since", "1h", "--format", "json"],
|
||||
{
|
||||
encoding: "utf-8",
|
||||
timeout: 30000,
|
||||
env: {
|
||||
...process.env,
|
||||
CODEX_SESSIONS_DIR: join(tmpDir, "codex-home", "sessions"),
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
expect(result.status).toBe(0);
|
||||
const json = JSON.parse(result.stdout);
|
||||
expect(json.tools.codex.total_sessions).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
|
||||
test("4KB buffer truncates session_meta, 128KB buffer parses it", () => {
|
||||
const padding = "x".repeat(20000);
|
||||
const sessionMeta = JSON.stringify({
|
||||
timestamp: new Date().toISOString(),
|
||||
type: "session_meta",
|
||||
payload: {
|
||||
id: "test-id",
|
||||
timestamp: new Date().toISOString(),
|
||||
cwd: "/tmp/test-repo",
|
||||
originator: "codex_exec",
|
||||
cli_version: "0.118.0",
|
||||
source: "exec",
|
||||
model_provider: "openai",
|
||||
base_instructions: { text: padding },
|
||||
},
|
||||
});
|
||||
|
||||
expect(sessionMeta.length).toBeGreaterThan(4096);
|
||||
|
||||
const filePath = join(codexDir, "test.jsonl");
|
||||
writeFileSync(filePath, sessionMeta + "\n");
|
||||
|
||||
// 4KB buffer: JSON.parse fails (the old bug)
|
||||
const { openSync, readSync, closeSync } = require("fs");
|
||||
const fd4k = openSync(filePath, "r");
|
||||
const buf4k = Buffer.alloc(4096);
|
||||
readSync(fd4k, buf4k, 0, 4096, 0);
|
||||
closeSync(fd4k);
|
||||
expect(() =>
|
||||
JSON.parse(buf4k.toString("utf-8").split("\n")[0])
|
||||
).toThrow();
|
||||
|
||||
// 128KB buffer: JSON.parse succeeds (the fix)
|
||||
const fd128k = openSync(filePath, "r");
|
||||
const buf128k = Buffer.alloc(131072);
|
||||
const bytesRead = readSync(fd128k, buf128k, 0, 131072, 0);
|
||||
closeSync(fd128k);
|
||||
const firstLine = buf128k.toString("utf-8", 0, bytesRead).split("\n")[0];
|
||||
const meta = JSON.parse(firstLine);
|
||||
expect(meta.type).toBe("session_meta");
|
||||
expect(meta.payload.cwd).toBe("/tmp/test-repo");
|
||||
});
|
||||
|
||||
test("regression: session_meta beyond 128KB still needs streaming parse", () => {
|
||||
// This test documents the current limitation: 128KB buffer is a heuristic.
|
||||
// If Codex ever embeds >128KB in session_meta, this test will fail,
|
||||
// signaling that the buffer needs to increase or be replaced with streaming.
|
||||
const padding = "x".repeat(140000); // ~140KB payload
|
||||
const sessionMeta = JSON.stringify({
|
||||
timestamp: new Date().toISOString(),
|
||||
type: "session_meta",
|
||||
payload: {
|
||||
id: "test-large",
|
||||
timestamp: new Date().toISOString(),
|
||||
cwd: "/tmp/large-test",
|
||||
originator: "codex_exec",
|
||||
cli_version: "0.200.0",
|
||||
source: "exec",
|
||||
model_provider: "openai",
|
||||
base_instructions: { text: padding },
|
||||
},
|
||||
});
|
||||
|
||||
expect(sessionMeta.length).toBeGreaterThan(131072);
|
||||
|
||||
const filePath = join(codexDir, "large-test.jsonl");
|
||||
writeFileSync(filePath, sessionMeta + "\n");
|
||||
|
||||
// 128KB buffer: JSON.parse FAILS for >128KB lines (current limitation)
|
||||
const { openSync, readSync, closeSync } = require("fs");
|
||||
const fd = openSync(filePath, "r");
|
||||
const buf = Buffer.alloc(131072);
|
||||
readSync(fd, buf, 0, 131072, 0);
|
||||
closeSync(fd);
|
||||
expect(() =>
|
||||
JSON.parse(buf.toString("utf-8").split("\n")[0])
|
||||
).toThrow();
|
||||
// When this test starts passing (e.g., after implementing streaming parse),
|
||||
// update it to verify correct parsing instead of documenting the limitation.
|
||||
});
|
||||
});
|
||||
|
||||
describe("discovery output structure", () => {
|
||||
test("repos have required fields", () => {
|
||||
const result = spawnSync(
|
||||
|
||||
@@ -305,12 +305,13 @@ export async function runSkillTest(options: {
|
||||
|
||||
// Use resultLine for structured result data
|
||||
if (resultLine) {
|
||||
if (resultLine.is_error) {
|
||||
if (resultLine.subtype === 'success' && resultLine.is_error) {
|
||||
// claude -p can return subtype=success with is_error=true (e.g. API connection failure)
|
||||
exitReason = 'error_api';
|
||||
} else if (resultLine.subtype === 'success') {
|
||||
exitReason = 'success';
|
||||
} else if (resultLine.subtype) {
|
||||
// Preserve known subtypes like error_max_turns even if is_error is set
|
||||
exitReason = resultLine.subtype;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,11 @@ import { parseSnapshotArgs } from '../../browse/src/snapshot';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
/** CLI-only commands: valid $B invocations that are handled by the CLI, not the server */
|
||||
const CLI_COMMANDS = new Set([
|
||||
'status', 'pair-agent', 'tunnel',
|
||||
]);
|
||||
|
||||
export interface BrowseCommand {
|
||||
command: string;
|
||||
args: string[];
|
||||
@@ -112,7 +117,7 @@ export function validateSkill(skillPath: string): ValidationResult {
|
||||
}
|
||||
|
||||
for (const cmd of commands) {
|
||||
if (!ALL_COMMANDS.has(cmd.command)) {
|
||||
if (!ALL_COMMANDS.has(cmd.command) && !CLI_COMMANDS.has(cmd.command)) {
|
||||
result.invalid.push(cmd);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -41,8 +41,8 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
'skillmd-no-local-binary': ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
|
||||
'skillmd-outside-git': ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
|
||||
|
||||
'contributor-mode': ['SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
|
||||
'session-awareness': ['SKILL.md', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
|
||||
'operational-learning': ['scripts/resolvers/preamble.ts', 'bin/gstack-learnings-log'],
|
||||
|
||||
// QA (+ test-server dependency)
|
||||
'qa-quick': ['qa/**', 'browse/src/**', 'browse/test/test-server.ts'],
|
||||
@@ -59,6 +59,15 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
'review-base-branch': ['review/**'],
|
||||
'review-design-lite': ['review/**', 'test/fixtures/review-eval-design-slop.*'],
|
||||
|
||||
// Review Army (specialist dispatch)
|
||||
'review-army-migration-safety': ['review/**', 'scripts/resolvers/review-army.ts', 'bin/gstack-diff-scope'],
|
||||
'review-army-perf-n-plus-one': ['review/**', 'scripts/resolvers/review-army.ts', 'bin/gstack-diff-scope'],
|
||||
'review-army-delivery-audit': ['review/**', 'scripts/resolvers/review.ts', 'scripts/resolvers/review-army.ts'],
|
||||
'review-army-quality-score': ['review/**', 'scripts/resolvers/review-army.ts'],
|
||||
'review-army-json-findings': ['review/**', 'scripts/resolvers/review-army.ts'],
|
||||
'review-army-red-team': ['review/**', 'scripts/resolvers/review-army.ts'],
|
||||
'review-army-consensus': ['review/**', 'scripts/resolvers/review-army.ts'],
|
||||
|
||||
// Office Hours
|
||||
'office-hours-spec-review': ['office-hours/**', 'scripts/gen-skill-docs.ts'],
|
||||
|
||||
@@ -95,6 +104,14 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
'cso-diff-mode': ['cso/**'],
|
||||
'cso-infra-scope': ['cso/**'],
|
||||
|
||||
// Learnings
|
||||
'learnings-show': ['learn/**', 'bin/gstack-learnings-search', 'bin/gstack-learnings-log', 'scripts/resolvers/learnings.ts'],
|
||||
|
||||
// Session Intelligence (timeline, context recovery, checkpoint)
|
||||
'timeline-event-flow': ['bin/gstack-timeline-log', 'bin/gstack-timeline-read'],
|
||||
'context-recovery-artifacts': ['scripts/resolvers/preamble.ts', 'bin/gstack-timeline-log', 'bin/gstack-slug', 'learn/**'],
|
||||
'checkpoint-save-resume': ['checkpoint/**', 'bin/gstack-slug'],
|
||||
|
||||
// Document-release
|
||||
'document-release': ['document-release/**'],
|
||||
|
||||
@@ -119,6 +136,7 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
// Plan completion audit + verification
|
||||
'ship-plan-completion': ['ship/**', 'scripts/gen-skill-docs.ts'],
|
||||
'ship-plan-verification': ['ship/**', 'qa-only/**', 'scripts/gen-skill-docs.ts'],
|
||||
'ship-idempotency': ['ship/**', 'scripts/resolvers/utility.ts'],
|
||||
'review-plan-completion': ['review/**', 'scripts/gen-skill-docs.ts'],
|
||||
|
||||
// Design
|
||||
@@ -149,6 +167,7 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
// Sidebar agent
|
||||
'sidebar-navigate': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/sidebar-utils.ts', 'extension/**'],
|
||||
'sidebar-url-accuracy': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/sidebar-utils.ts', 'extension/background.js'],
|
||||
'sidebar-css-interaction': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/write-commands.ts', 'browse/src/read-commands.ts', 'browse/src/cdp-inspector.ts', 'extension/**'],
|
||||
|
||||
// Autoplan
|
||||
'autoplan-core': ['autoplan/**', 'plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**'],
|
||||
@@ -179,8 +198,8 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
'skillmd-setup-discovery': 'gate',
|
||||
'skillmd-no-local-binary': 'gate',
|
||||
'skillmd-outside-git': 'gate',
|
||||
'contributor-mode': 'gate',
|
||||
'session-awareness': 'gate',
|
||||
'operational-learning': 'gate',
|
||||
|
||||
// QA — gate for functional, periodic for quality/benchmarks
|
||||
'qa-quick': 'gate',
|
||||
@@ -200,6 +219,15 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
'review-plan-completion': 'gate',
|
||||
'review-dashboard-via': 'gate',
|
||||
|
||||
// Review Army — gate for core functionality, periodic for multi-specialist
|
||||
'review-army-migration-safety': 'gate', // Specialist activation guardrail
|
||||
'review-army-perf-n-plus-one': 'gate', // Specialist activation guardrail
|
||||
'review-army-delivery-audit': 'gate', // Delivery integrity guardrail
|
||||
'review-army-quality-score': 'gate', // Score computation
|
||||
'review-army-json-findings': 'gate', // JSON schema compliance
|
||||
'review-army-red-team': 'periodic', // Multi-agent coordination
|
||||
'review-army-consensus': 'periodic', // Multi-specialist agreement
|
||||
|
||||
// Office Hours
|
||||
'office-hours-spec-review': 'gate',
|
||||
|
||||
@@ -218,6 +246,11 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
'codex-offered-design-review': 'gate',
|
||||
'codex-offered-eng-review': 'gate',
|
||||
|
||||
// Session Intelligence — gate for data flow, periodic for agent integration
|
||||
'timeline-event-flow': 'gate', // Binary data flow (no LLM needed)
|
||||
'context-recovery-artifacts': 'gate', // Preamble reads seeded artifacts
|
||||
'checkpoint-save-resume': 'gate', // Checkpoint round-trip
|
||||
|
||||
// Ship — gate (end-to-end ship path)
|
||||
'ship-base-branch': 'gate',
|
||||
'ship-local-workflow': 'gate',
|
||||
@@ -225,6 +258,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
'ship-triage': 'gate',
|
||||
'ship-plan-completion': 'gate',
|
||||
'ship-plan-verification': 'gate',
|
||||
'ship-idempotency': 'periodic',
|
||||
|
||||
// Retro — gate for cheap branch detection, periodic for full Opus retro
|
||||
'retro': 'periodic',
|
||||
@@ -238,6 +272,9 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
'cso-diff-mode': 'gate',
|
||||
'cso-infra-scope': 'periodic',
|
||||
|
||||
// Learnings — gate (functional guardrail: seeded learnings must appear)
|
||||
'learnings-show': 'gate',
|
||||
|
||||
// Document-release — gate (CHANGELOG guardrail)
|
||||
'document-release': 'gate',
|
||||
|
||||
@@ -276,6 +313,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
// Sidebar agent
|
||||
'sidebar-navigate': 'periodic',
|
||||
'sidebar-url-accuracy': 'periodic',
|
||||
'sidebar-css-interaction': 'periodic',
|
||||
|
||||
// Autoplan — periodic (not yet implemented)
|
||||
'autoplan-core': 'periodic',
|
||||
|
||||
@@ -0,0 +1,524 @@
|
||||
/**
|
||||
* Host config system tests — 100% coverage of host-config.ts, hosts/index.ts,
|
||||
* host-config-export.ts, and golden-file regression checks.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { validateHostConfig, validateAllConfigs, type HostConfig } from '../scripts/host-config';
|
||||
import {
|
||||
ALL_HOST_CONFIGS,
|
||||
ALL_HOST_NAMES,
|
||||
HOST_CONFIG_MAP,
|
||||
getHostConfig,
|
||||
resolveHostArg,
|
||||
getExternalHosts,
|
||||
claude,
|
||||
codex,
|
||||
factory,
|
||||
kiro,
|
||||
opencode,
|
||||
slate,
|
||||
cursor,
|
||||
openclaw,
|
||||
} from '../hosts/index';
|
||||
import { HOST_PATHS } from '../scripts/resolvers/types';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
|
||||
// ─── hosts/index.ts ─────────────────────────────────────────
|
||||
|
||||
describe('hosts/index.ts', () => {
|
||||
test('ALL_HOST_CONFIGS has 8 hosts', () => {
|
||||
expect(ALL_HOST_CONFIGS.length).toBe(8);
|
||||
});
|
||||
|
||||
test('ALL_HOST_NAMES matches config names', () => {
|
||||
expect(ALL_HOST_NAMES).toEqual(ALL_HOST_CONFIGS.map(c => c.name));
|
||||
});
|
||||
|
||||
test('HOST_CONFIG_MAP keys match names', () => {
|
||||
for (const config of ALL_HOST_CONFIGS) {
|
||||
expect(HOST_CONFIG_MAP[config.name]).toBe(config);
|
||||
}
|
||||
});
|
||||
|
||||
test('individual config re-exports match registry', () => {
|
||||
expect(claude.name).toBe('claude');
|
||||
expect(codex.name).toBe('codex');
|
||||
expect(factory.name).toBe('factory');
|
||||
expect(kiro.name).toBe('kiro');
|
||||
expect(opencode.name).toBe('opencode');
|
||||
expect(slate.name).toBe('slate');
|
||||
expect(cursor.name).toBe('cursor');
|
||||
expect(openclaw.name).toBe('openclaw');
|
||||
});
|
||||
|
||||
test('getHostConfig returns correct config', () => {
|
||||
const c = getHostConfig('codex');
|
||||
expect(c.name).toBe('codex');
|
||||
expect(c.displayName).toBe('OpenAI Codex CLI');
|
||||
});
|
||||
|
||||
test('getHostConfig throws on unknown host', () => {
|
||||
expect(() => getHostConfig('nonexistent')).toThrow('Unknown host');
|
||||
});
|
||||
|
||||
test('resolveHostArg resolves direct names', () => {
|
||||
for (const name of ALL_HOST_NAMES) {
|
||||
expect(resolveHostArg(name)).toBe(name);
|
||||
}
|
||||
});
|
||||
|
||||
test('resolveHostArg resolves aliases', () => {
|
||||
expect(resolveHostArg('agents')).toBe('codex');
|
||||
expect(resolveHostArg('droid')).toBe('factory');
|
||||
});
|
||||
|
||||
test('resolveHostArg throws on unknown alias', () => {
|
||||
expect(() => resolveHostArg('nonexistent')).toThrow('Unknown host');
|
||||
});
|
||||
|
||||
test('getExternalHosts excludes claude', () => {
|
||||
const external = getExternalHosts();
|
||||
expect(external.find(c => c.name === 'claude')).toBeUndefined();
|
||||
expect(external.length).toBe(ALL_HOST_CONFIGS.length - 1);
|
||||
});
|
||||
|
||||
test('every host has a unique name', () => {
|
||||
const names = new Set(ALL_HOST_NAMES);
|
||||
expect(names.size).toBe(ALL_HOST_NAMES.length);
|
||||
});
|
||||
|
||||
test('every host has a unique hostSubdir', () => {
|
||||
const subdirs = new Set(ALL_HOST_CONFIGS.map(c => c.hostSubdir));
|
||||
expect(subdirs.size).toBe(ALL_HOST_CONFIGS.length);
|
||||
});
|
||||
|
||||
test('every host has a unique globalRoot', () => {
|
||||
const roots = new Set(ALL_HOST_CONFIGS.map(c => c.globalRoot));
|
||||
expect(roots.size).toBe(ALL_HOST_CONFIGS.length);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── validateHostConfig ─────────────────────────────────────
|
||||
|
||||
describe('validateHostConfig', () => {
|
||||
function makeValid(): HostConfig {
|
||||
return {
|
||||
name: 'test-host',
|
||||
displayName: 'Test Host',
|
||||
cliCommand: 'testcli',
|
||||
globalRoot: '.test/skills/gstack',
|
||||
localSkillRoot: '.test/skills/gstack',
|
||||
hostSubdir: '.test',
|
||||
usesEnvVars: true,
|
||||
frontmatter: { mode: 'allowlist', keepFields: ['name', 'description'] },
|
||||
generation: { generateMetadata: false },
|
||||
pathRewrites: [],
|
||||
runtimeRoot: { globalSymlinks: ['bin'] },
|
||||
install: { prefixable: false, linkingStrategy: 'symlink-generated' },
|
||||
};
|
||||
}
|
||||
|
||||
test('valid config passes', () => {
|
||||
expect(validateHostConfig(makeValid())).toEqual([]);
|
||||
});
|
||||
|
||||
test('invalid name is caught', () => {
|
||||
const c = makeValid();
|
||||
c.name = 'UPPER_CASE';
|
||||
const errors = validateHostConfig(c);
|
||||
expect(errors.some(e => e.includes('name'))).toBe(true);
|
||||
});
|
||||
|
||||
test('name with special chars is caught', () => {
|
||||
const c = makeValid();
|
||||
c.name = 'has spaces';
|
||||
expect(validateHostConfig(c).length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test('empty displayName is caught', () => {
|
||||
const c = makeValid();
|
||||
c.displayName = '';
|
||||
expect(validateHostConfig(c).some(e => e.includes('displayName'))).toBe(true);
|
||||
});
|
||||
|
||||
test('invalid cliCommand is caught', () => {
|
||||
const c = makeValid();
|
||||
c.cliCommand = 'has spaces';
|
||||
expect(validateHostConfig(c).some(e => e.includes('cliCommand'))).toBe(true);
|
||||
});
|
||||
|
||||
test('invalid cliAlias is caught', () => {
|
||||
const c = makeValid();
|
||||
c.cliAliases = ['good', 'BAD!'];
|
||||
expect(validateHostConfig(c).some(e => e.includes('cliAlias'))).toBe(true);
|
||||
});
|
||||
|
||||
test('valid cliAliases pass', () => {
|
||||
const c = makeValid();
|
||||
c.cliAliases = ['alias-one', 'alias-two'];
|
||||
expect(validateHostConfig(c)).toEqual([]);
|
||||
});
|
||||
|
||||
test('invalid globalRoot is caught', () => {
|
||||
const c = makeValid();
|
||||
c.globalRoot = 'path with spaces';
|
||||
expect(validateHostConfig(c).some(e => e.includes('globalRoot'))).toBe(true);
|
||||
});
|
||||
|
||||
test('invalid localSkillRoot is caught', () => {
|
||||
const c = makeValid();
|
||||
c.localSkillRoot = 'invalid<path>';
|
||||
expect(validateHostConfig(c).some(e => e.includes('localSkillRoot'))).toBe(true);
|
||||
});
|
||||
|
||||
test('invalid hostSubdir is caught', () => {
|
||||
const c = makeValid();
|
||||
c.hostSubdir = 'no spaces allowed';
|
||||
expect(validateHostConfig(c).some(e => e.includes('hostSubdir'))).toBe(true);
|
||||
});
|
||||
|
||||
test('invalid frontmatter.mode is caught', () => {
|
||||
const c = makeValid();
|
||||
(c.frontmatter as any).mode = 'invalid';
|
||||
expect(validateHostConfig(c).some(e => e.includes('frontmatter.mode'))).toBe(true);
|
||||
});
|
||||
|
||||
test('invalid linkingStrategy is caught', () => {
|
||||
const c = makeValid();
|
||||
(c.install as any).linkingStrategy = 'invalid';
|
||||
expect(validateHostConfig(c).some(e => e.includes('linkingStrategy'))).toBe(true);
|
||||
});
|
||||
|
||||
test('paths with $ and ~ are valid', () => {
|
||||
const c = makeValid();
|
||||
c.globalRoot = '$HOME/.test/skills/gstack';
|
||||
c.localSkillRoot = '~/.test/skills/gstack';
|
||||
expect(validateHostConfig(c)).toEqual([]);
|
||||
});
|
||||
|
||||
test('shell injection attempt in cliCommand is caught', () => {
|
||||
const c = makeValid();
|
||||
c.cliCommand = 'opencode;rm -rf /';
|
||||
expect(validateHostConfig(c).some(e => e.includes('cliCommand'))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── validateAllConfigs ─────────────────────────────────────
|
||||
|
||||
describe('validateAllConfigs', () => {
|
||||
test('real configs all pass validation', () => {
|
||||
const errors = validateAllConfigs(ALL_HOST_CONFIGS);
|
||||
expect(errors).toEqual([]);
|
||||
});
|
||||
|
||||
test('duplicate name detected', () => {
|
||||
const dup = { ...codex, name: 'claude' } as HostConfig;
|
||||
const errors = validateAllConfigs([claude, dup]);
|
||||
expect(errors.some(e => e.includes('Duplicate name'))).toBe(true);
|
||||
});
|
||||
|
||||
test('duplicate hostSubdir detected', () => {
|
||||
const dup = { ...codex, name: 'dup-host', hostSubdir: '.claude', globalRoot: '.dup/skills/gstack' } as HostConfig;
|
||||
const errors = validateAllConfigs([claude, dup]);
|
||||
expect(errors.some(e => e.includes('Duplicate hostSubdir'))).toBe(true);
|
||||
});
|
||||
|
||||
test('duplicate globalRoot detected', () => {
|
||||
const dup = { ...codex, name: 'dup-host', hostSubdir: '.dup', globalRoot: '.claude/skills/gstack' } as HostConfig;
|
||||
const errors = validateAllConfigs([claude, dup]);
|
||||
expect(errors.some(e => e.includes('Duplicate globalRoot'))).toBe(true);
|
||||
});
|
||||
|
||||
test('per-config validation errors are prefixed with host name', () => {
|
||||
const bad = { ...codex, name: 'BAD', cliCommand: 'also bad' } as HostConfig;
|
||||
const errors = validateAllConfigs([bad]);
|
||||
expect(errors.every(e => e.startsWith('[BAD]'))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── HOST_PATHS derivation ──────────────────────────────────
|
||||
|
||||
describe('HOST_PATHS derivation from configs', () => {
|
||||
test('Claude uses literal home paths (no env vars)', () => {
|
||||
expect(HOST_PATHS.claude.skillRoot).toBe('~/.claude/skills/gstack');
|
||||
expect(HOST_PATHS.claude.binDir).toBe('~/.claude/skills/gstack/bin');
|
||||
expect(HOST_PATHS.claude.browseDir).toBe('~/.claude/skills/gstack/browse/dist');
|
||||
expect(HOST_PATHS.claude.designDir).toBe('~/.claude/skills/gstack/design/dist');
|
||||
});
|
||||
|
||||
test('Codex uses $GSTACK_ROOT env vars', () => {
|
||||
expect(HOST_PATHS.codex.skillRoot).toBe('$GSTACK_ROOT');
|
||||
expect(HOST_PATHS.codex.binDir).toBe('$GSTACK_BIN');
|
||||
expect(HOST_PATHS.codex.browseDir).toBe('$GSTACK_BROWSE');
|
||||
expect(HOST_PATHS.codex.designDir).toBe('$GSTACK_DESIGN');
|
||||
});
|
||||
|
||||
test('every host with usesEnvVars=true gets env var paths', () => {
|
||||
for (const config of ALL_HOST_CONFIGS) {
|
||||
if (config.usesEnvVars) {
|
||||
expect(HOST_PATHS[config.name].skillRoot).toBe('$GSTACK_ROOT');
|
||||
expect(HOST_PATHS[config.name].binDir).toBe('$GSTACK_BIN');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('every host with usesEnvVars=false gets literal paths', () => {
|
||||
for (const config of ALL_HOST_CONFIGS) {
|
||||
if (!config.usesEnvVars) {
|
||||
expect(HOST_PATHS[config.name].skillRoot).toContain('~/');
|
||||
expect(HOST_PATHS[config.name].binDir).toContain('/bin');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('localSkillRoot matches config for every host', () => {
|
||||
for (const config of ALL_HOST_CONFIGS) {
|
||||
expect(HOST_PATHS[config.name].localSkillRoot).toBe(config.localSkillRoot);
|
||||
}
|
||||
});
|
||||
|
||||
test('HOST_PATHS has entry for every registered host', () => {
|
||||
for (const name of ALL_HOST_NAMES) {
|
||||
expect(HOST_PATHS[name]).toBeDefined();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ─── host-config-export.ts CLI ──────────────────────────────
|
||||
|
||||
describe('host-config-export.ts CLI', () => {
|
||||
const EXPORT_SCRIPT = path.join(ROOT, 'scripts', 'host-config-export.ts');
|
||||
|
||||
function run(...args: string[]): { stdout: string; stderr: string; exitCode: number } {
|
||||
const result = Bun.spawnSync(['bun', 'run', EXPORT_SCRIPT, ...args], {
|
||||
cwd: ROOT, stdout: 'pipe', stderr: 'pipe',
|
||||
});
|
||||
return {
|
||||
stdout: result.stdout.toString().trim(),
|
||||
stderr: result.stderr.toString().trim(),
|
||||
exitCode: result.exitCode,
|
||||
};
|
||||
}
|
||||
|
||||
test('list prints all host names', () => {
|
||||
const { stdout, exitCode } = run('list');
|
||||
expect(exitCode).toBe(0);
|
||||
const names = stdout.split('\n');
|
||||
expect(names).toEqual(ALL_HOST_NAMES);
|
||||
});
|
||||
|
||||
test('get returns string field', () => {
|
||||
const { stdout, exitCode } = run('get', 'codex', 'globalRoot');
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).toBe('.codex/skills/gstack');
|
||||
});
|
||||
|
||||
test('get returns boolean as 1/0', () => {
|
||||
const { stdout: t } = run('get', 'claude', 'usesEnvVars');
|
||||
expect(t).toBe('0');
|
||||
const { stdout: f } = run('get', 'codex', 'usesEnvVars');
|
||||
expect(f).toBe('1');
|
||||
});
|
||||
|
||||
test('get with missing args exits 1', () => {
|
||||
const { exitCode } = run('get', 'codex');
|
||||
expect(exitCode).toBe(1);
|
||||
});
|
||||
|
||||
test('get with unknown field exits 1', () => {
|
||||
const { exitCode } = run('get', 'codex', 'nonexistent');
|
||||
expect(exitCode).toBe(1);
|
||||
});
|
||||
|
||||
test('get with unknown host exits 1', () => {
|
||||
const { exitCode } = run('get', 'nonexistent', 'name');
|
||||
expect(exitCode).not.toBe(0);
|
||||
});
|
||||
|
||||
test('validate passes for real configs', () => {
|
||||
const { stdout, exitCode } = run('validate');
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).toContain('configs valid');
|
||||
});
|
||||
|
||||
test('symlinks returns asset list', () => {
|
||||
const { stdout, exitCode } = run('symlinks', 'codex');
|
||||
expect(exitCode).toBe(0);
|
||||
const lines = stdout.split('\n');
|
||||
expect(lines).toContain('bin');
|
||||
expect(lines).toContain('ETHOS.md');
|
||||
expect(lines).toContain('review/checklist.md');
|
||||
});
|
||||
|
||||
test('symlinks with missing host exits 1', () => {
|
||||
const { exitCode } = run('symlinks');
|
||||
expect(exitCode).toBe(1);
|
||||
});
|
||||
|
||||
test('detect finds claude (since we are running in claude)', () => {
|
||||
const { stdout, exitCode } = run('detect');
|
||||
expect(exitCode).toBe(0);
|
||||
// claude binary should be on PATH in this environment
|
||||
expect(stdout).toContain('claude');
|
||||
});
|
||||
|
||||
test('unknown command exits 1', () => {
|
||||
const { exitCode } = run('badcommand');
|
||||
expect(exitCode).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Golden-file regression ─────────────────────────────────
|
||||
|
||||
describe('golden-file regression', () => {
|
||||
const GOLDEN_DIR = path.join(ROOT, 'test', 'fixtures', 'golden');
|
||||
|
||||
test('Claude ship skill matches golden baseline', () => {
|
||||
const golden = fs.readFileSync(path.join(GOLDEN_DIR, 'claude-ship-SKILL.md'), 'utf-8');
|
||||
const current = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(current).toBe(golden);
|
||||
});
|
||||
|
||||
test('Codex ship skill matches golden baseline', () => {
|
||||
const golden = fs.readFileSync(path.join(GOLDEN_DIR, 'codex-ship-SKILL.md'), 'utf-8');
|
||||
const current = fs.readFileSync(path.join(ROOT, '.agents', 'skills', 'gstack-ship', 'SKILL.md'), 'utf-8');
|
||||
expect(current).toBe(golden);
|
||||
});
|
||||
|
||||
test('Factory ship skill matches golden baseline', () => {
|
||||
const golden = fs.readFileSync(path.join(GOLDEN_DIR, 'factory-ship-SKILL.md'), 'utf-8');
|
||||
const current = fs.readFileSync(path.join(ROOT, '.factory', 'skills', 'gstack-ship', 'SKILL.md'), 'utf-8');
|
||||
expect(current).toBe(golden);
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Individual host config correctness ─────────────────────
|
||||
|
||||
describe('host config correctness', () => {
|
||||
test('claude is the only prefixable host', () => {
|
||||
for (const config of ALL_HOST_CONFIGS) {
|
||||
if (config.name === 'claude') {
|
||||
expect(config.install.prefixable).toBe(true);
|
||||
} else {
|
||||
expect(config.install.prefixable).toBe(false);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('claude is the only host with real-dir-symlink strategy', () => {
|
||||
for (const config of ALL_HOST_CONFIGS) {
|
||||
if (config.name === 'claude') {
|
||||
expect(config.install.linkingStrategy).toBe('real-dir-symlink');
|
||||
} else {
|
||||
expect(config.install.linkingStrategy).toBe('symlink-generated');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('claude does not use env vars', () => {
|
||||
expect(claude.usesEnvVars).toBe(false);
|
||||
});
|
||||
|
||||
test('all external hosts use env vars', () => {
|
||||
for (const config of getExternalHosts()) {
|
||||
expect(config.usesEnvVars).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
test('codex has 1024-char description limit with error behavior', () => {
|
||||
expect(codex.frontmatter.descriptionLimit).toBe(1024);
|
||||
expect(codex.frontmatter.descriptionLimitBehavior).toBe('error');
|
||||
});
|
||||
|
||||
test('codex generates openai.yaml metadata', () => {
|
||||
expect(codex.generation.generateMetadata).toBe(true);
|
||||
expect(codex.generation.metadataFormat).toBe('openai.yaml');
|
||||
});
|
||||
|
||||
test('codex has sidecar config', () => {
|
||||
expect(codex.sidecar).toBeDefined();
|
||||
expect(codex.sidecar!.path).toBe('.agents/skills/gstack');
|
||||
});
|
||||
|
||||
test('factory has tool rewrites', () => {
|
||||
expect(factory.toolRewrites).toBeDefined();
|
||||
expect(Object.keys(factory.toolRewrites!).length).toBeGreaterThan(0);
|
||||
expect(factory.toolRewrites!['use the Bash tool']).toBe('run this command');
|
||||
});
|
||||
|
||||
test('factory has conditional disable-model-invocation field', () => {
|
||||
expect(factory.frontmatter.conditionalFields).toBeDefined();
|
||||
expect(factory.frontmatter.conditionalFields!.length).toBe(1);
|
||||
expect(factory.frontmatter.conditionalFields![0].if).toEqual({ sensitive: true });
|
||||
expect(factory.frontmatter.conditionalFields![0].add).toEqual({ 'disable-model-invocation': true });
|
||||
});
|
||||
|
||||
test('codex has suppressedResolvers for self-invocation prevention', () => {
|
||||
expect(codex.suppressedResolvers).toBeDefined();
|
||||
expect(codex.suppressedResolvers).toContain('CODEX_SECOND_OPINION');
|
||||
expect(codex.suppressedResolvers).toContain('ADVERSARIAL_STEP');
|
||||
expect(codex.suppressedResolvers).toContain('REVIEW_ARMY');
|
||||
});
|
||||
|
||||
test('codex has boundary instruction', () => {
|
||||
expect(codex.boundaryInstruction).toBeDefined();
|
||||
expect(codex.boundaryInstruction).toContain('Do NOT read');
|
||||
});
|
||||
|
||||
test('openclaw has tool rewrites for exec/read/write', () => {
|
||||
expect(openclaw.toolRewrites).toBeDefined();
|
||||
expect(openclaw.toolRewrites!['use the Bash tool']).toBe('use the exec tool');
|
||||
expect(openclaw.toolRewrites!['use the Read tool']).toBe('use the read tool');
|
||||
});
|
||||
|
||||
test('openclaw has CLAUDE.md→AGENTS.md path rewrite', () => {
|
||||
expect(openclaw.pathRewrites.some(r => r.from === 'CLAUDE.md' && r.to === 'AGENTS.md')).toBe(true);
|
||||
});
|
||||
|
||||
test('openclaw has adapter path', () => {
|
||||
expect(openclaw.adapter).toBeDefined();
|
||||
expect(openclaw.adapter).toContain('openclaw-adapter');
|
||||
});
|
||||
|
||||
test('openclaw has no staticFiles (SOUL.md removed)', () => {
|
||||
expect(openclaw.staticFiles).toBeUndefined();
|
||||
});
|
||||
|
||||
test('openclaw includeSkills is empty (native skills replaced generated ones)', () => {
|
||||
expect(openclaw.generation.includeSkills).toBeDefined();
|
||||
expect(openclaw.generation.includeSkills!.length).toBe(0);
|
||||
});
|
||||
|
||||
test('every host has coAuthorTrailer or undefined', () => {
|
||||
// Claude, Codex, Factory, OpenClaw have explicit trailers
|
||||
expect(claude.coAuthorTrailer).toContain('Claude');
|
||||
expect(codex.coAuthorTrailer).toContain('Codex');
|
||||
expect(factory.coAuthorTrailer).toContain('Factory');
|
||||
expect(openclaw.coAuthorTrailer).toContain('OpenClaw');
|
||||
});
|
||||
|
||||
test('every external host skips the codex skill', () => {
|
||||
for (const config of getExternalHosts()) {
|
||||
expect(config.generation.skipSkills).toContain('codex');
|
||||
}
|
||||
});
|
||||
|
||||
test('every host has at least one pathRewrite (except claude)', () => {
|
||||
for (const config of getExternalHosts()) {
|
||||
expect(config.pathRewrites.length).toBeGreaterThan(0);
|
||||
}
|
||||
expect(claude.pathRewrites.length).toBe(0);
|
||||
});
|
||||
|
||||
test('every host has runtimeRoot.globalSymlinks', () => {
|
||||
for (const config of ALL_HOST_CONFIGS) {
|
||||
expect(config.runtimeRoot.globalSymlinks.length).toBeGreaterThan(0);
|
||||
expect(config.runtimeRoot.globalSymlinks).toContain('bin');
|
||||
expect(config.runtimeRoot.globalSymlinks).toContain('ETHOS.md');
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,48 @@
|
||||
import { describe, test, expect } from "bun:test";
|
||||
import { readFileSync } from "fs";
|
||||
import path from "path";
|
||||
|
||||
const SCRIPT = path.join(import.meta.dir, "..", "bin", "gstack-learnings-search");
|
||||
|
||||
describe("gstack-learnings-search injection prevention", () => {
|
||||
const script = readFileSync(SCRIPT, "utf-8");
|
||||
|
||||
test("no shell interpolation inside bun -e string", () => {
|
||||
// Extract the bun -e block (everything between `bun -e "` and the closing `"`)
|
||||
const bunBlock = script.slice(script.indexOf('bun -e "'));
|
||||
|
||||
// Should NOT contain ${VAR} patterns (shell interpolation)
|
||||
// These are RCE vectors: a malicious learnings entry with '; rm -rf / ;' in the
|
||||
// query field would execute arbitrary commands via shell interpolation.
|
||||
const shellInterpolations = bunBlock.match(/'\$\{[A-Z_]+\}'/g) || [];
|
||||
const bareInterpolations = bunBlock.match(/\$\{[A-Z_]+\}/g) || [];
|
||||
|
||||
// Filter out any that are inside process.env references (those are safe)
|
||||
const unsafeInterpolations = [
|
||||
...shellInterpolations,
|
||||
...bareInterpolations,
|
||||
].filter((m) => !m.includes("process.env"));
|
||||
|
||||
expect(unsafeInterpolations).toEqual([]);
|
||||
});
|
||||
|
||||
test("uses process.env for all user-controlled values", () => {
|
||||
const bunBlock = script.slice(script.indexOf('bun -e "'));
|
||||
|
||||
// Must use process.env for TYPE, QUERY, LIMIT, SLUG, CROSS_PROJECT
|
||||
expect(bunBlock).toContain("process.env.GSTACK_SEARCH_TYPE");
|
||||
expect(bunBlock).toContain("process.env.GSTACK_SEARCH_QUERY");
|
||||
expect(bunBlock).toContain("process.env.GSTACK_SEARCH_LIMIT");
|
||||
expect(bunBlock).toContain("process.env.GSTACK_SEARCH_SLUG");
|
||||
expect(bunBlock).toContain("process.env.GSTACK_SEARCH_CROSS");
|
||||
});
|
||||
|
||||
test("env vars are set on the bun command line", () => {
|
||||
// The env vars must be passed to bun, not just set in the shell
|
||||
expect(script).toContain("GSTACK_SEARCH_TYPE=");
|
||||
expect(script).toContain("GSTACK_SEARCH_QUERY=");
|
||||
expect(script).toContain("GSTACK_SEARCH_LIMIT=");
|
||||
expect(script).toContain("GSTACK_SEARCH_SLUG=");
|
||||
expect(script).toContain("GSTACK_SEARCH_CROSS=");
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,283 @@
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import { execSync, ExecSyncOptionsWithStringEncoding } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BIN = path.join(ROOT, 'bin');
|
||||
|
||||
let tmpDir: string;
|
||||
let slugDir: string;
|
||||
let learningsFile: string;
|
||||
|
||||
function runLog(input: string, opts: { expectFail?: boolean } = {}): { stdout: string; exitCode: number } {
|
||||
const execOpts: ExecSyncOptionsWithStringEncoding = {
|
||||
cwd: ROOT,
|
||||
env: { ...process.env, GSTACK_HOME: tmpDir },
|
||||
encoding: 'utf-8',
|
||||
timeout: 15000,
|
||||
};
|
||||
try {
|
||||
const stdout = execSync(`${BIN}/gstack-learnings-log '${input.replace(/'/g, "'\\''")}'`, execOpts).trim();
|
||||
return { stdout, exitCode: 0 };
|
||||
} catch (e: any) {
|
||||
if (opts.expectFail) {
|
||||
return { stdout: e.stderr?.toString() || '', exitCode: e.status || 1 };
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
function runSearch(args: string = ''): string {
|
||||
const execOpts: ExecSyncOptionsWithStringEncoding = {
|
||||
cwd: ROOT,
|
||||
env: { ...process.env, GSTACK_HOME: tmpDir },
|
||||
encoding: 'utf-8',
|
||||
timeout: 15000,
|
||||
};
|
||||
try {
|
||||
return execSync(`${BIN}/gstack-learnings-search ${args}`, execOpts).trim();
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-learn-'));
|
||||
slugDir = path.join(tmpDir, 'projects');
|
||||
fs.mkdirSync(slugDir, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function findLearningsFile(): string | null {
|
||||
const projectDirs = fs.readdirSync(slugDir);
|
||||
if (projectDirs.length === 0) return null;
|
||||
const f = path.join(slugDir, projectDirs[0], 'learnings.jsonl');
|
||||
return fs.existsSync(f) ? f : null;
|
||||
}
|
||||
|
||||
describe('gstack-learnings-log', () => {
|
||||
test('appends valid JSON to learnings.jsonl', () => {
|
||||
const input = '{"skill":"review","type":"pattern","key":"test-key","insight":"test insight","confidence":8,"source":"observed"}';
|
||||
const result = runLog(input);
|
||||
expect(result.exitCode).toBe(0);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const content = fs.readFileSync(f!, 'utf-8').trim();
|
||||
const parsed = JSON.parse(content);
|
||||
expect(parsed.skill).toBe('review');
|
||||
expect(parsed.key).toBe('test-key');
|
||||
expect(parsed.confidence).toBe(8);
|
||||
});
|
||||
|
||||
test('auto-injects timestamp when ts is missing', () => {
|
||||
const input = '{"skill":"review","type":"pattern","key":"ts-test","insight":"test","confidence":5,"source":"observed"}';
|
||||
runLog(input);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.ts).toBeDefined();
|
||||
expect(new Date(parsed.ts).getTime()).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test('rejects non-JSON input with non-zero exit code', () => {
|
||||
const result = runLog('not json at all', { expectFail: true });
|
||||
expect(result.exitCode).not.toBe(0);
|
||||
});
|
||||
|
||||
test('append-only: duplicate keys create multiple entries', () => {
|
||||
const input1 = '{"skill":"review","type":"pattern","key":"dup-key","insight":"first version","confidence":6,"source":"observed"}';
|
||||
const input2 = '{"skill":"review","type":"pattern","key":"dup-key","insight":"second version","confidence":8,"source":"observed"}';
|
||||
runLog(input1);
|
||||
runLog(input2);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const lines = fs.readFileSync(f!, 'utf-8').trim().split('\n');
|
||||
expect(lines.length).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-learnings-search', () => {
|
||||
test('returns empty and exits 0 when no learnings file exists', () => {
|
||||
const output = runSearch();
|
||||
expect(output).toBe('');
|
||||
});
|
||||
|
||||
test('returns formatted output when learnings exist', () => {
|
||||
runLog('{"skill":"review","type":"pattern","key":"test-search","insight":"search test insight","confidence":7,"source":"observed"}');
|
||||
const output = runSearch();
|
||||
expect(output).toContain('LEARNINGS:');
|
||||
expect(output).toContain('test-search');
|
||||
expect(output).toContain('search test insight');
|
||||
});
|
||||
|
||||
test('deduplicates entries by key+type (latest wins)', () => {
|
||||
const old = JSON.stringify({ skill: 'review', type: 'pattern', key: 'dedup-test', insight: 'old version', confidence: 5, source: 'observed', ts: '2026-01-01T00:00:00Z' });
|
||||
const newer = JSON.stringify({ skill: 'review', type: 'pattern', key: 'dedup-test', insight: 'new version', confidence: 8, source: 'observed', ts: '2026-03-28T00:00:00Z' });
|
||||
runLog(old);
|
||||
runLog(newer);
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('new version');
|
||||
expect(output).not.toContain('old version');
|
||||
expect(output).toContain('1 loaded');
|
||||
});
|
||||
|
||||
test('filters by --type', () => {
|
||||
runLog('{"skill":"review","type":"pattern","key":"p1","insight":"a pattern","confidence":7,"source":"observed"}');
|
||||
runLog('{"skill":"review","type":"pitfall","key":"p2","insight":"a pitfall","confidence":7,"source":"observed"}');
|
||||
|
||||
const patternOnly = runSearch('--type pattern');
|
||||
expect(patternOnly).toContain('p1');
|
||||
expect(patternOnly).not.toContain('p2');
|
||||
});
|
||||
|
||||
test('filters by --query', () => {
|
||||
runLog('{"skill":"review","type":"pattern","key":"auth-bypass","insight":"check session tokens","confidence":7,"source":"observed"}');
|
||||
runLog('{"skill":"review","type":"pattern","key":"n-plus-one","insight":"use includes for associations","confidence":7,"source":"observed"}');
|
||||
|
||||
const authOnly = runSearch('--query auth');
|
||||
expect(authOnly).toContain('auth-bypass');
|
||||
expect(authOnly).not.toContain('n-plus-one');
|
||||
});
|
||||
|
||||
test('respects --limit', () => {
|
||||
for (let i = 0; i < 5; i++) {
|
||||
runLog(`{"skill":"review","type":"pattern","key":"limit-${i}","insight":"insight ${i}","confidence":7,"source":"observed"}`);
|
||||
}
|
||||
|
||||
const limited = runSearch('--limit 2');
|
||||
// Should show 2, not 5
|
||||
expect(limited).toContain('2 loaded');
|
||||
});
|
||||
|
||||
test('applies confidence decay for observed/inferred sources', () => {
|
||||
// Entry from 90 days ago with source=observed, confidence=8
|
||||
// Should decay to 8 - floor(90/30) = 8 - 3 = 5
|
||||
const ts = new Date(Date.now() - 90 * 86400000).toISOString();
|
||||
runLog(`{"skill":"review","type":"pattern","key":"decay-test","insight":"old observation","confidence":8,"source":"observed","ts":"${ts}"}`);
|
||||
|
||||
const output = runSearch();
|
||||
// Should show confidence 5 (decayed from 8)
|
||||
expect(output).toContain('confidence: 5/10');
|
||||
});
|
||||
|
||||
test('does NOT decay user-stated learnings', () => {
|
||||
const ts = new Date(Date.now() - 90 * 86400000).toISOString();
|
||||
runLog(`{"skill":"review","type":"preference","key":"no-decay-test","insight":"user preference","confidence":9,"source":"user-stated","ts":"${ts}"}`);
|
||||
|
||||
const output = runSearch();
|
||||
// Should still show confidence 9 (no decay for user-stated)
|
||||
expect(output).toContain('confidence: 9/10');
|
||||
});
|
||||
|
||||
test('skips malformed JSONL lines gracefully', () => {
|
||||
// Write a valid entry, then manually append a bad line
|
||||
runLog('{"skill":"review","type":"pattern","key":"valid-entry","insight":"valid","confidence":7,"source":"observed"}');
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
fs.appendFileSync(f!, '\nthis is not json\n');
|
||||
fs.appendFileSync(f!, '{"skill":"review","type":"pattern","key":"also-valid","insight":"also valid","confidence":6,"source":"observed","ts":"2026-03-28T00:00:00Z"}\n');
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('valid-entry');
|
||||
expect(output).toContain('also-valid');
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-learnings-log edge cases', () => {
|
||||
test('preserves existing timestamp when ts is present', () => {
|
||||
const input = '{"skill":"review","type":"pattern","key":"ts-preserve","insight":"test","confidence":5,"source":"observed","ts":"2025-06-15T10:00:00Z"}';
|
||||
runLog(input);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.ts).toBe('2025-06-15T10:00:00Z');
|
||||
});
|
||||
|
||||
test('handles JSON with special characters in insight', () => {
|
||||
const input = JSON.stringify({ skill: 'review', type: 'pattern', key: 'special-chars', insight: 'Use "quotes" and \\backslashes', confidence: 7, source: 'observed' });
|
||||
runLog(input);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.insight).toContain('quotes');
|
||||
expect(parsed.insight).toContain('backslashes');
|
||||
});
|
||||
|
||||
test('handles JSON with files array field', () => {
|
||||
const input = JSON.stringify({ skill: 'review', type: 'architecture', key: 'with-files', insight: 'test', confidence: 8, source: 'observed', files: ['src/auth.ts', 'src/db.ts'] });
|
||||
runLog(input);
|
||||
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.files).toEqual(['src/auth.ts', 'src/db.ts']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-learnings-search edge cases', () => {
|
||||
test('sorts by confidence then recency', () => {
|
||||
// Two entries: one high confidence old, one lower confidence recent
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'high-conf', insight: 'high confidence entry', confidence: 9, source: 'user-stated', ts: '2026-01-01T00:00:00Z' }));
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'recent', insight: 'recent entry', confidence: 5, source: 'observed', ts: '2026-03-28T00:00:00Z' }));
|
||||
|
||||
const output = runSearch();
|
||||
const highIdx = output.indexOf('high-conf');
|
||||
const recentIdx = output.indexOf('recent');
|
||||
// High confidence should appear first
|
||||
expect(highIdx).toBeLessThan(recentIdx);
|
||||
});
|
||||
|
||||
test('groups output by type', () => {
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'p1', insight: 'a pattern', confidence: 7, source: 'observed' }));
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pitfall', key: 'pit1', insight: 'a pitfall', confidence: 7, source: 'observed' }));
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('## Patterns');
|
||||
expect(output).toContain('## Pitfalls');
|
||||
});
|
||||
|
||||
test('combined --type and --query filtering', () => {
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'auth-token', insight: 'check token expiry', confidence: 7, source: 'observed' }));
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pitfall', key: 'auth-leak', insight: 'auth token in logs', confidence: 7, source: 'observed' }));
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'cache-key', insight: 'cache invalidation', confidence: 7, source: 'observed' }));
|
||||
|
||||
const output = runSearch('--type pattern --query auth');
|
||||
expect(output).toContain('auth-token');
|
||||
expect(output).not.toContain('auth-leak'); // wrong type
|
||||
expect(output).not.toContain('cache-key'); // wrong query
|
||||
});
|
||||
|
||||
test('entries with missing key or type are skipped', () => {
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'valid', insight: 'valid entry', confidence: 7, source: 'observed' }));
|
||||
const f = findLearningsFile();
|
||||
expect(f).not.toBeNull();
|
||||
// Append entries missing key and type
|
||||
fs.appendFileSync(f!, JSON.stringify({ skill: 'review', type: 'pattern', insight: 'no key', confidence: 7, source: 'observed' }) + '\n');
|
||||
fs.appendFileSync(f!, JSON.stringify({ skill: 'review', key: 'no-type', insight: 'no type', confidence: 7, source: 'observed' }) + '\n');
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('valid');
|
||||
expect(output).not.toContain('no key');
|
||||
expect(output).not.toContain('no-type');
|
||||
});
|
||||
|
||||
test('confidence decay floors at 0 (never negative)', () => {
|
||||
// Entry from 1 year ago with confidence 3 — decay would be 12, clamped to 0
|
||||
const ts = new Date(Date.now() - 365 * 86400000).toISOString();
|
||||
runLog(JSON.stringify({ skill: 'review', type: 'pattern', key: 'ancient', insight: 'very old', confidence: 3, source: 'observed', ts }));
|
||||
|
||||
const output = runSearch();
|
||||
expect(output).toContain('confidence: 0/10');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,515 @@
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import { execSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BIN = path.join(ROOT, 'bin');
|
||||
|
||||
let tmpDir: string;
|
||||
let skillsDir: string;
|
||||
let installDir: string;
|
||||
|
||||
function run(cmd: string, env: Record<string, string> = {}, expectFail = false): string {
|
||||
try {
|
||||
return execSync(cmd, {
|
||||
cwd: ROOT,
|
||||
env: { ...process.env, GSTACK_STATE_DIR: tmpDir, ...env },
|
||||
encoding: 'utf-8',
|
||||
timeout: 10000,
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
}).trim();
|
||||
} catch (e: any) {
|
||||
if (expectFail) return (e.stderr || e.stdout || '').toString().trim();
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
// Create a mock gstack install directory with skill subdirs
|
||||
function setupMockInstall(skills: string[]): void {
|
||||
installDir = path.join(tmpDir, 'gstack-install');
|
||||
skillsDir = path.join(tmpDir, 'skills');
|
||||
fs.mkdirSync(installDir, { recursive: true });
|
||||
fs.mkdirSync(skillsDir, { recursive: true });
|
||||
|
||||
// Copy the real gstack-config and gstack-relink to the mock install
|
||||
const mockBin = path.join(installDir, 'bin');
|
||||
fs.mkdirSync(mockBin, { recursive: true });
|
||||
fs.copyFileSync(path.join(BIN, 'gstack-config'), path.join(mockBin, 'gstack-config'));
|
||||
fs.chmodSync(path.join(mockBin, 'gstack-config'), 0o755);
|
||||
if (fs.existsSync(path.join(BIN, 'gstack-relink'))) {
|
||||
fs.copyFileSync(path.join(BIN, 'gstack-relink'), path.join(mockBin, 'gstack-relink'));
|
||||
fs.chmodSync(path.join(mockBin, 'gstack-relink'), 0o755);
|
||||
}
|
||||
if (fs.existsSync(path.join(BIN, 'gstack-patch-names'))) {
|
||||
fs.copyFileSync(path.join(BIN, 'gstack-patch-names'), path.join(mockBin, 'gstack-patch-names'));
|
||||
fs.chmodSync(path.join(mockBin, 'gstack-patch-names'), 0o755);
|
||||
}
|
||||
|
||||
// Create mock skill directories with proper frontmatter
|
||||
for (const skill of skills) {
|
||||
fs.mkdirSync(path.join(installDir, skill), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(installDir, skill, 'SKILL.md'),
|
||||
`---\nname: ${skill}\ndescription: test\n---\n# ${skill}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-relink-test-'));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe('gstack-relink (#578)', () => {
|
||||
// Test 11: prefixed symlinks when skill_prefix=true
|
||||
test('creates gstack-* symlinks when skill_prefix=true', () => {
|
||||
setupMockInstall(['qa', 'ship', 'review']);
|
||||
// Set config to prefix mode (pass install/skills env so auto-relink uses mock install)
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
// Run relink with env pointing to the mock install
|
||||
const output = run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
// Verify gstack-* symlinks exist
|
||||
expect(fs.existsSync(path.join(skillsDir, 'gstack-qa'))).toBe(true);
|
||||
expect(fs.existsSync(path.join(skillsDir, 'gstack-ship'))).toBe(true);
|
||||
expect(fs.existsSync(path.join(skillsDir, 'gstack-review'))).toBe(true);
|
||||
expect(output).toContain('gstack-');
|
||||
});
|
||||
|
||||
// Test 12: flat symlinks when skill_prefix=false
|
||||
test('creates flat symlinks when skill_prefix=false', () => {
|
||||
setupMockInstall(['qa', 'ship', 'review']);
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
const output = run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
expect(fs.existsSync(path.join(skillsDir, 'qa'))).toBe(true);
|
||||
expect(fs.existsSync(path.join(skillsDir, 'ship'))).toBe(true);
|
||||
expect(fs.existsSync(path.join(skillsDir, 'review'))).toBe(true);
|
||||
expect(output).toContain('flat');
|
||||
});
|
||||
|
||||
// REGRESSION: unprefixed skills must be real directories, not symlinks (#761)
|
||||
// Claude Code auto-prefixes skills nested under a parent dir symlink.
|
||||
// e.g., `qa -> gstack/qa` gets discovered as "gstack-qa", not "qa".
|
||||
// The fix: create real directories with SKILL.md symlinks inside.
|
||||
test('unprefixed skills are real directories with SKILL.md symlinks, not dir symlinks', () => {
|
||||
setupMockInstall(['qa', 'ship', 'review', 'plan-ceo-review']);
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
for (const skill of ['qa', 'ship', 'review', 'plan-ceo-review']) {
|
||||
const skillPath = path.join(skillsDir, skill);
|
||||
const skillMdPath = path.join(skillPath, 'SKILL.md');
|
||||
// Must be a real directory, NOT a symlink
|
||||
expect(fs.lstatSync(skillPath).isDirectory()).toBe(true);
|
||||
expect(fs.lstatSync(skillPath).isSymbolicLink()).toBe(false);
|
||||
// Must contain a SKILL.md that IS a symlink
|
||||
expect(fs.existsSync(skillMdPath)).toBe(true);
|
||||
expect(fs.lstatSync(skillMdPath).isSymbolicLink()).toBe(true);
|
||||
// The SKILL.md symlink must point to the source skill's SKILL.md
|
||||
const target = fs.readlinkSync(skillMdPath);
|
||||
expect(target).toContain(skill);
|
||||
expect(target).toEndWith('/SKILL.md');
|
||||
}
|
||||
});
|
||||
|
||||
// Same invariant for prefixed mode
|
||||
test('prefixed skills are real directories with SKILL.md symlinks, not dir symlinks', () => {
|
||||
setupMockInstall(['qa', 'ship']);
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
for (const skill of ['gstack-qa', 'gstack-ship']) {
|
||||
const skillPath = path.join(skillsDir, skill);
|
||||
const skillMdPath = path.join(skillPath, 'SKILL.md');
|
||||
expect(fs.lstatSync(skillPath).isDirectory()).toBe(true);
|
||||
expect(fs.lstatSync(skillPath).isSymbolicLink()).toBe(false);
|
||||
expect(fs.lstatSync(skillMdPath).isSymbolicLink()).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
// Upgrade: old directory symlinks get replaced with real directories
|
||||
test('upgrades old directory symlinks to real directories', () => {
|
||||
setupMockInstall(['qa', 'ship']);
|
||||
// Simulate old behavior: create directory symlinks (the old pattern)
|
||||
fs.symlinkSync(path.join(installDir, 'qa'), path.join(skillsDir, 'qa'));
|
||||
fs.symlinkSync(path.join(installDir, 'ship'), path.join(skillsDir, 'ship'));
|
||||
// Verify they start as symlinks
|
||||
expect(fs.lstatSync(path.join(skillsDir, 'qa')).isSymbolicLink()).toBe(true);
|
||||
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
|
||||
// After relink: must be real directories, not symlinks
|
||||
expect(fs.lstatSync(path.join(skillsDir, 'qa')).isSymbolicLink()).toBe(false);
|
||||
expect(fs.lstatSync(path.join(skillsDir, 'qa')).isDirectory()).toBe(true);
|
||||
expect(fs.lstatSync(path.join(skillsDir, 'qa', 'SKILL.md')).isSymbolicLink()).toBe(true);
|
||||
});
|
||||
|
||||
// FIRST INSTALL: --no-prefix must create ONLY flat names, zero gstack-* pollution
|
||||
test('first install --no-prefix: only flat names exist, zero gstack-* entries', () => {
|
||||
setupMockInstall(['qa', 'ship', 'review', 'plan-ceo-review', 'gstack-upgrade']);
|
||||
// Simulate first install: no saved config, pass --no-prefix equivalent
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
// Enumerate everything in skills dir
|
||||
const entries = fs.readdirSync(skillsDir);
|
||||
// Expected: qa, ship, review, plan-ceo-review, gstack-upgrade (its real name)
|
||||
expect(entries.sort()).toEqual(['gstack-upgrade', 'plan-ceo-review', 'qa', 'review', 'ship']);
|
||||
// No gstack-qa, gstack-ship, gstack-review, gstack-plan-ceo-review
|
||||
const leaked = entries.filter(e => e.startsWith('gstack-') && e !== 'gstack-upgrade');
|
||||
expect(leaked).toEqual([]);
|
||||
});
|
||||
|
||||
// FIRST INSTALL: --prefix must create ONLY gstack-* names, zero flat-name pollution
|
||||
test('first install --prefix: only gstack-* entries exist, zero flat names', () => {
|
||||
setupMockInstall(['qa', 'ship', 'review', 'plan-ceo-review', 'gstack-upgrade']);
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
const entries = fs.readdirSync(skillsDir);
|
||||
// Expected: gstack-qa, gstack-ship, gstack-review, gstack-plan-ceo-review, gstack-upgrade
|
||||
expect(entries.sort()).toEqual([
|
||||
'gstack-plan-ceo-review', 'gstack-qa', 'gstack-review', 'gstack-ship', 'gstack-upgrade',
|
||||
]);
|
||||
// No unprefixed qa, ship, review, plan-ceo-review
|
||||
const leaked = entries.filter(e => !e.startsWith('gstack-'));
|
||||
expect(leaked).toEqual([]);
|
||||
});
|
||||
|
||||
// FIRST INSTALL: non-TTY (no saved config, piped stdin) defaults to flat names
|
||||
test('non-TTY first install defaults to flat names via relink', () => {
|
||||
setupMockInstall(['qa', 'ship']);
|
||||
// Don't set any config — simulate fresh install
|
||||
// gstack-relink reads config; on fresh install config returns empty → defaults to false
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
const entries = fs.readdirSync(skillsDir);
|
||||
// Should be flat names (relink defaults to false when config returns empty)
|
||||
expect(entries.sort()).toEqual(['qa', 'ship']);
|
||||
});
|
||||
|
||||
// SWITCH: prefix → no-prefix must clean up ALL gstack-* entries
|
||||
test('switching prefix to no-prefix removes all gstack-* entries completely', () => {
|
||||
setupMockInstall(['qa', 'ship', 'review', 'plan-ceo-review', 'gstack-upgrade']);
|
||||
// Start in prefix mode
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
let entries = fs.readdirSync(skillsDir);
|
||||
expect(entries.filter(e => !e.startsWith('gstack-'))).toEqual([]);
|
||||
|
||||
// Switch to no-prefix
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
entries = fs.readdirSync(skillsDir);
|
||||
// Only flat names + gstack-upgrade (its real name)
|
||||
expect(entries.sort()).toEqual(['gstack-upgrade', 'plan-ceo-review', 'qa', 'review', 'ship']);
|
||||
const leaked = entries.filter(e => e.startsWith('gstack-') && e !== 'gstack-upgrade');
|
||||
expect(leaked).toEqual([]);
|
||||
});
|
||||
|
||||
// SWITCH: no-prefix → prefix must clean up ALL flat entries
|
||||
test('switching no-prefix to prefix removes all flat entries completely', () => {
|
||||
setupMockInstall(['qa', 'ship', 'review', 'gstack-upgrade']);
|
||||
// Start in no-prefix mode
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
let entries = fs.readdirSync(skillsDir);
|
||||
expect(entries.filter(e => e.startsWith('gstack-') && e !== 'gstack-upgrade')).toEqual([]);
|
||||
|
||||
// Switch to prefix
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
entries = fs.readdirSync(skillsDir);
|
||||
// Only gstack-* names
|
||||
expect(entries.sort()).toEqual([
|
||||
'gstack-qa', 'gstack-review', 'gstack-ship', 'gstack-upgrade',
|
||||
]);
|
||||
const leaked = entries.filter(e => !e.startsWith('gstack-'));
|
||||
expect(leaked).toEqual([]);
|
||||
});
|
||||
|
||||
// Test 13: cleans stale symlinks from opposite mode
|
||||
test('cleans up stale symlinks from opposite mode', () => {
|
||||
setupMockInstall(['qa', 'ship']);
|
||||
// Create prefixed symlinks first
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
expect(fs.existsSync(path.join(skillsDir, 'gstack-qa'))).toBe(true);
|
||||
|
||||
// Switch to flat mode
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
|
||||
// Flat symlinks should exist, prefixed should be gone
|
||||
expect(fs.existsSync(path.join(skillsDir, 'qa'))).toBe(true);
|
||||
expect(fs.existsSync(path.join(skillsDir, 'gstack-qa'))).toBe(false);
|
||||
});
|
||||
|
||||
// Test 14: error when install dir missing
|
||||
test('prints error when install dir missing', () => {
|
||||
const output = run(`${BIN}/gstack-relink`, {
|
||||
GSTACK_INSTALL_DIR: '/nonexistent/path/gstack',
|
||||
GSTACK_SKILLS_DIR: '/nonexistent/path/skills',
|
||||
}, true);
|
||||
expect(output).toContain('setup');
|
||||
});
|
||||
|
||||
// Test: gstack-upgrade does NOT get double-prefixed
|
||||
test('does not double-prefix gstack-upgrade directory', () => {
|
||||
setupMockInstall(['qa', 'ship', 'gstack-upgrade']);
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
// gstack-upgrade should keep its name, NOT become gstack-gstack-upgrade
|
||||
expect(fs.existsSync(path.join(skillsDir, 'gstack-upgrade'))).toBe(true);
|
||||
expect(fs.existsSync(path.join(skillsDir, 'gstack-gstack-upgrade'))).toBe(false);
|
||||
// Regular skills still get prefixed
|
||||
expect(fs.existsSync(path.join(skillsDir, 'gstack-qa'))).toBe(true);
|
||||
});
|
||||
|
||||
// Test 15: gstack-config set skill_prefix triggers relink
|
||||
test('gstack-config set skill_prefix triggers relink', () => {
|
||||
setupMockInstall(['qa', 'ship']);
|
||||
// Run gstack-config set which should auto-trigger relink
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
// If relink was triggered, symlinks should exist
|
||||
expect(fs.existsSync(path.join(skillsDir, 'gstack-qa'))).toBe(true);
|
||||
expect(fs.existsSync(path.join(skillsDir, 'gstack-ship'))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('upgrade migrations', () => {
|
||||
const MIGRATIONS_DIR = path.join(ROOT, 'gstack-upgrade', 'migrations');
|
||||
|
||||
test('migrations directory exists', () => {
|
||||
expect(fs.existsSync(MIGRATIONS_DIR)).toBe(true);
|
||||
});
|
||||
|
||||
test('all migration scripts are executable and parse without syntax errors', () => {
|
||||
const scripts = fs.readdirSync(MIGRATIONS_DIR).filter(f => f.endsWith('.sh'));
|
||||
expect(scripts.length).toBeGreaterThan(0);
|
||||
for (const script of scripts) {
|
||||
const fullPath = path.join(MIGRATIONS_DIR, script);
|
||||
// Must be executable
|
||||
const stat = fs.statSync(fullPath);
|
||||
expect(stat.mode & 0o111).toBeGreaterThan(0);
|
||||
// Must parse without syntax errors (bash -n is a syntax check, doesn't execute)
|
||||
const result = execSync(`bash -n "${fullPath}" 2>&1`, { encoding: 'utf-8', timeout: 5000 });
|
||||
// bash -n outputs nothing on success
|
||||
}
|
||||
});
|
||||
|
||||
test('migration filenames follow v{VERSION}.sh pattern', () => {
|
||||
const scripts = fs.readdirSync(MIGRATIONS_DIR).filter(f => f.endsWith('.sh'));
|
||||
for (const script of scripts) {
|
||||
expect(script).toMatch(/^v\d+\.\d+\.\d+\.\d+\.sh$/);
|
||||
}
|
||||
});
|
||||
|
||||
test('v0.15.2.0 migration runs gstack-relink', () => {
|
||||
const content = fs.readFileSync(path.join(MIGRATIONS_DIR, 'v0.15.2.0.sh'), 'utf-8');
|
||||
expect(content).toContain('gstack-relink');
|
||||
});
|
||||
|
||||
test('v0.15.2.0 migration fixes stale directory symlinks', () => {
|
||||
setupMockInstall(['qa', 'ship', 'review']);
|
||||
// Simulate old state: directory symlinks (pre-v0.15.2.0 pattern)
|
||||
fs.symlinkSync(path.join(installDir, 'qa'), path.join(skillsDir, 'qa'));
|
||||
fs.symlinkSync(path.join(installDir, 'ship'), path.join(skillsDir, 'ship'));
|
||||
fs.symlinkSync(path.join(installDir, 'review'), path.join(skillsDir, 'review'));
|
||||
// Set no-prefix mode (suppress auto-relink so symlinks stay intact for the test)
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
|
||||
GSTACK_SETUP_RUNNING: '1',
|
||||
});
|
||||
// Verify old state: symlinks
|
||||
expect(fs.lstatSync(path.join(skillsDir, 'qa')).isSymbolicLink()).toBe(true);
|
||||
|
||||
// Run the migration (it calls gstack-relink internally)
|
||||
run(`bash ${path.join(MIGRATIONS_DIR, 'v0.15.2.0.sh')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
|
||||
// After migration: real directories with SKILL.md symlinks
|
||||
for (const skill of ['qa', 'ship', 'review']) {
|
||||
const skillPath = path.join(skillsDir, skill);
|
||||
expect(fs.lstatSync(skillPath).isSymbolicLink()).toBe(false);
|
||||
expect(fs.lstatSync(skillPath).isDirectory()).toBe(true);
|
||||
expect(fs.lstatSync(path.join(skillPath, 'SKILL.md')).isSymbolicLink()).toBe(true);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-patch-names (#620/#578)', () => {
|
||||
// Helper to read name: from SKILL.md frontmatter
|
||||
function readSkillName(skillDir: string): string | null {
|
||||
const content = fs.readFileSync(path.join(skillDir, 'SKILL.md'), 'utf-8');
|
||||
const match = content.match(/^name:\s*(.+)$/m);
|
||||
return match ? match[1].trim() : null;
|
||||
}
|
||||
|
||||
test('prefix=true patches name: field in SKILL.md', () => {
|
||||
setupMockInstall(['qa', 'ship', 'review']);
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
// Verify name: field is patched with gstack- prefix
|
||||
expect(readSkillName(path.join(installDir, 'qa'))).toBe('gstack-qa');
|
||||
expect(readSkillName(path.join(installDir, 'ship'))).toBe('gstack-ship');
|
||||
expect(readSkillName(path.join(installDir, 'review'))).toBe('gstack-review');
|
||||
});
|
||||
|
||||
test('prefix=false restores name: field in SKILL.md', () => {
|
||||
setupMockInstall(['qa', 'ship']);
|
||||
// First, prefix them
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
expect(readSkillName(path.join(installDir, 'qa'))).toBe('gstack-qa');
|
||||
// Now switch to flat mode
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix false`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
// Verify name: field is restored to unprefixed
|
||||
expect(readSkillName(path.join(installDir, 'qa'))).toBe('qa');
|
||||
expect(readSkillName(path.join(installDir, 'ship'))).toBe('ship');
|
||||
});
|
||||
|
||||
test('gstack-upgrade name: not double-prefixed', () => {
|
||||
setupMockInstall(['qa', 'gstack-upgrade']);
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
// gstack-upgrade should keep its name, NOT become gstack-gstack-upgrade
|
||||
expect(readSkillName(path.join(installDir, 'gstack-upgrade'))).toBe('gstack-upgrade');
|
||||
// Regular skill should be prefixed
|
||||
expect(readSkillName(path.join(installDir, 'qa'))).toBe('gstack-qa');
|
||||
});
|
||||
|
||||
test('SKILL.md without frontmatter is a no-op', () => {
|
||||
setupMockInstall(['qa']);
|
||||
// Overwrite qa SKILL.md with no frontmatter
|
||||
fs.writeFileSync(path.join(installDir, 'qa', 'SKILL.md'), '# qa\nSome content.');
|
||||
run(`${path.join(installDir, 'bin', 'gstack-config')} set skill_prefix true`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
// Should not crash
|
||||
run(`${path.join(installDir, 'bin', 'gstack-relink')}`, {
|
||||
GSTACK_INSTALL_DIR: installDir,
|
||||
GSTACK_SKILLS_DIR: skillsDir,
|
||||
});
|
||||
// Content should be unchanged (no name: to patch)
|
||||
const content = fs.readFileSync(path.join(installDir, 'qa', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toBe('# qa\nSome content.');
|
||||
});
|
||||
});
|
||||
+78
-30
@@ -20,6 +20,7 @@ let tmpDir: string;
|
||||
describeIfSelected('Skill E2E tests', [
|
||||
'browse-basic', 'browse-snapshot', 'skillmd-setup-discovery',
|
||||
'skillmd-no-local-binary', 'skillmd-outside-git', 'session-awareness',
|
||||
'operational-learning',
|
||||
], () => {
|
||||
beforeAll(() => {
|
||||
testServer = startTestServer();
|
||||
@@ -177,49 +178,96 @@ Report the exact output — either "READY: <path>" or "NEEDS_SETUP".`,
|
||||
try { fs.rmSync(nonGitDir, { recursive: true, force: true }); } catch {}
|
||||
}, 60_000);
|
||||
|
||||
testConcurrentIfSelected('contributor-mode', async () => {
|
||||
const contribDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-contrib-'));
|
||||
const logsDir = path.join(contribDir, 'contributor-logs');
|
||||
fs.mkdirSync(logsDir, { recursive: true });
|
||||
testConcurrentIfSelected('operational-learning', async () => {
|
||||
const opDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-oplearn-'));
|
||||
const gstackHome = path.join(opDir, '.gstack-home');
|
||||
|
||||
// Init git repo
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: opDir, stdio: 'pipe', timeout: 5000 });
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
fs.writeFileSync(path.join(opDir, 'app.ts'), 'console.log("hello");\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
// Copy bin scripts
|
||||
const binDir = path.join(opDir, 'bin');
|
||||
fs.mkdirSync(binDir, { recursive: true });
|
||||
for (const script of ['gstack-learnings-log', 'gstack-slug']) {
|
||||
fs.copyFileSync(path.join(ROOT, 'bin', script), path.join(binDir, script));
|
||||
fs.chmodSync(path.join(binDir, script), 0o755);
|
||||
}
|
||||
|
||||
// gstack-learnings-log will create the project dir automatically via gstack-slug
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are in contributor mode (gstack_contributor=true). You just ran this browse command and it failed:
|
||||
prompt: `You just ran \`npm test\` in this project and it failed with this error:
|
||||
|
||||
$ /nonexistent/browse goto https://example.com
|
||||
/nonexistent/browse: No such file or directory
|
||||
Error: --experimental-vm-modules flag is required for ESM support in this project.
|
||||
Run: npm test --experimental-vm-modules
|
||||
|
||||
Per the contributor mode instructions, file a field report to ${logsDir}/browse-missing-binary.md using the Write tool. Include all required sections: title, what you tried, what happened, rating, repro steps, raw output, what would make it a 10, and the date/version footer.`,
|
||||
workingDirectory: contribDir,
|
||||
Per the Operational Self-Improvement instructions below, log an operational learning about this failure.
|
||||
|
||||
## Operational Self-Improvement
|
||||
|
||||
Before completing, reflect on this session:
|
||||
- Did any commands fail unexpectedly?
|
||||
|
||||
If yes, log an operational learning for future sessions:
|
||||
|
||||
\`\`\`bash
|
||||
GSTACK_HOME="${gstackHome}" ${binDir}/gstack-learnings-log '{"skill":"qa","type":"operational","key":"SHORT_KEY","insight":"DESCRIPTION","confidence":N,"source":"observed"}'
|
||||
\`\`\`
|
||||
|
||||
Replace SHORT_KEY with a kebab-case key like "esm-vm-modules-flag".
|
||||
Replace DESCRIPTION with a one-sentence description of what you learned.
|
||||
Replace N with a confidence score 1-10.
|
||||
|
||||
Log the operational learning now. Then say what you logged.`,
|
||||
workingDirectory: opDir,
|
||||
maxTurns: 5,
|
||||
timeout: 30_000,
|
||||
testName: 'contributor-mode',
|
||||
testName: 'operational-learning',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('contributor mode', result);
|
||||
// Override passed: this test intentionally triggers a browse error (nonexistent binary)
|
||||
// so browseErrors will be non-empty — that's expected, not a failure
|
||||
recordE2E(evalCollector, 'contributor mode report', 'Skill E2E tests', result, {
|
||||
passed: result.exitReason === 'success',
|
||||
logCost('operational learning', result);
|
||||
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
// Check if learnings file was created with an operational entry
|
||||
// The slug is derived from the git repo (dirname), so search all project dirs
|
||||
let hasOperational = false;
|
||||
const projectsDir = path.join(gstackHome, 'projects');
|
||||
if (fs.existsSync(projectsDir)) {
|
||||
for (const slug of fs.readdirSync(projectsDir)) {
|
||||
const lPath = path.join(projectsDir, slug, 'learnings.jsonl');
|
||||
if (fs.existsSync(lPath)) {
|
||||
const jsonl = fs.readFileSync(lPath, 'utf-8').trim();
|
||||
if (jsonl) {
|
||||
const entries = jsonl.split('\n').map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
|
||||
const opEntry = entries.find(e => e.type === 'operational');
|
||||
if (opEntry) {
|
||||
hasOperational = true;
|
||||
console.log(`Operational learning logged: key="${opEntry.key}" insight="${opEntry.insight}" (slug: ${slug})`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
recordE2E(evalCollector, 'operational learning', 'Skill E2E tests', result, {
|
||||
passed: exitOk && hasOperational,
|
||||
});
|
||||
|
||||
// Verify a contributor log was created with expected format
|
||||
const logFiles = fs.readdirSync(logsDir).filter(f => f.endsWith('.md'));
|
||||
expect(logFiles.length).toBeGreaterThan(0);
|
||||
|
||||
// Verify report has key structural sections (agent may phrase differently)
|
||||
const logContent = fs.readFileSync(path.join(logsDir, logFiles[0]), 'utf-8');
|
||||
// Must have a title (# heading)
|
||||
expect(logContent).toMatch(/^#\s/m);
|
||||
// Must mention the failed command or browse
|
||||
expect(logContent).toMatch(/browse|nonexistent|not found|no such file/i);
|
||||
// Must have some kind of rating
|
||||
expect(logContent).toMatch(/rating|\/10/i);
|
||||
// Must have steps or reproduction info
|
||||
expect(logContent).toMatch(/step|repro|reproduce/i);
|
||||
expect(exitOk).toBe(true);
|
||||
expect(hasOperational).toBe(true);
|
||||
|
||||
// Clean up
|
||||
try { fs.rmSync(contribDir, { recursive: true, force: true }); } catch {}
|
||||
try { fs.rmSync(opDir, { recursive: true, force: true }); } catch {}
|
||||
}, 90_000);
|
||||
|
||||
testConcurrentIfSelected('session-awareness', async () => {
|
||||
|
||||
@@ -0,0 +1,138 @@
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { runSkillTest } from './helpers/session-runner';
|
||||
import {
|
||||
ROOT, runId, evalsEnabled,
|
||||
describeIfSelected, testConcurrentIfSelected,
|
||||
copyDirSync, logCost, recordE2E,
|
||||
createEvalCollector, finalizeEvalCollector,
|
||||
} from './helpers/e2e-helpers';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const evalCollector = createEvalCollector('e2e-learnings');
|
||||
|
||||
// --- Learnings E2E: seed learnings, run /learn, verify output ---
|
||||
|
||||
describeIfSelected('Learnings E2E', ['learnings-show'], () => {
|
||||
let workDir: string;
|
||||
let gstackHome: string;
|
||||
|
||||
beforeAll(() => {
|
||||
workDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-learnings-'));
|
||||
gstackHome = path.join(workDir, '.gstack-home');
|
||||
|
||||
// Init git repo
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 });
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
fs.writeFileSync(path.join(workDir, 'app.ts'), 'console.log("hello");\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
// Copy the /learn skill
|
||||
copyDirSync(path.join(ROOT, 'learn'), path.join(workDir, 'learn'));
|
||||
|
||||
// Copy bin scripts needed by /learn
|
||||
const binDir = path.join(workDir, 'bin');
|
||||
fs.mkdirSync(binDir, { recursive: true });
|
||||
for (const script of ['gstack-learnings-search', 'gstack-learnings-log', 'gstack-slug']) {
|
||||
fs.copyFileSync(path.join(ROOT, 'bin', script), path.join(binDir, script));
|
||||
fs.chmodSync(path.join(binDir, script), 0o755);
|
||||
}
|
||||
|
||||
// Seed learnings JSONL — slug must match what gstack-slug computes.
|
||||
// With no git remote, gstack-slug falls back to basename(workDir).
|
||||
const slug = path.basename(workDir).replace(/[^a-zA-Z0-9._-]/g, '');
|
||||
const projectDir = path.join(gstackHome, 'projects', slug);
|
||||
fs.mkdirSync(projectDir, { recursive: true });
|
||||
|
||||
const learnings = [
|
||||
{
|
||||
skill: 'review', type: 'pattern', key: 'n-plus-one-queries',
|
||||
insight: 'ActiveRecord associations in loops cause N+1 queries. Always use includes/preload.',
|
||||
confidence: 9, source: 'observed', ts: new Date().toISOString(),
|
||||
files: ['app/models/user.rb'],
|
||||
},
|
||||
{
|
||||
skill: 'investigate', type: 'pitfall', key: 'stale-cache-after-deploy',
|
||||
insight: 'Redis cache not invalidated on deploy causes stale data for 5 minutes.',
|
||||
confidence: 7, source: 'observed', ts: new Date().toISOString(),
|
||||
files: ['config/redis.yml'],
|
||||
},
|
||||
{
|
||||
skill: 'ship', type: 'preference', key: 'always-run-rubocop',
|
||||
insight: 'User wants rubocop to run before every commit, no exceptions.',
|
||||
confidence: 10, source: 'user-stated', ts: new Date().toISOString(),
|
||||
},
|
||||
{
|
||||
skill: 'qa', type: 'operational', key: 'test-timeout-flag',
|
||||
insight: 'bun test requires --timeout 30000 for E2E tests in this project.',
|
||||
confidence: 9, source: 'observed', ts: new Date().toISOString(),
|
||||
},
|
||||
];
|
||||
|
||||
fs.writeFileSync(
|
||||
path.join(projectDir, 'learnings.jsonl'),
|
||||
learnings.map(l => JSON.stringify(l)).join('\n') + '\n',
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
finalizeEvalCollector(evalCollector);
|
||||
});
|
||||
|
||||
testConcurrentIfSelected('learnings-show', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read the file learn/SKILL.md for the /learn skill instructions.
|
||||
|
||||
Run the /learn command (no arguments — show recent learnings).
|
||||
|
||||
IMPORTANT:
|
||||
- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
|
||||
- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
|
||||
Replace any references to ~/.claude/skills/gstack/bin/ with ./bin/ when running commands.
|
||||
- Replace any references to ~/.claude/skills/gstack/bin/gstack-slug with ./bin/gstack-slug.
|
||||
- Do NOT use AskUserQuestion.
|
||||
- Do NOT implement code changes.
|
||||
- Just show the learnings and summarize what you found.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 15,
|
||||
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
|
||||
timeout: 120_000,
|
||||
testName: 'learnings-show',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/learn show', result);
|
||||
|
||||
const output = result.output.toLowerCase();
|
||||
|
||||
// The agent should have found and displayed the seeded learnings
|
||||
const mentionsNPlusOne = output.includes('n-plus-one') || output.includes('n+1');
|
||||
const mentionsCache = output.includes('stale') || output.includes('cache');
|
||||
const mentionsRubocop = output.includes('rubocop');
|
||||
|
||||
// At least 2 of 3 learnings should appear in the output
|
||||
const foundCount = [mentionsNPlusOne, mentionsCache, mentionsRubocop].filter(Boolean).length;
|
||||
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, '/learn', 'Learnings show E2E', result, {
|
||||
passed: exitOk && foundCount >= 2,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(foundCount).toBeGreaterThanOrEqual(2);
|
||||
|
||||
if (foundCount === 3) {
|
||||
console.log('All 3 seeded learnings found in output');
|
||||
} else {
|
||||
console.warn(`Only ${foundCount}/3 learnings found (N+1: ${mentionsNPlusOne}, cache: ${mentionsCache}, rubocop: ${mentionsRubocop})`);
|
||||
}
|
||||
}, 180_000);
|
||||
});
|
||||
@@ -0,0 +1,562 @@
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { runSkillTest } from './helpers/session-runner';
|
||||
import {
|
||||
ROOT, runId, describeIfSelected, testConcurrentIfSelected,
|
||||
logCost, recordE2E, createEvalCollector, finalizeEvalCollector,
|
||||
} from './helpers/e2e-helpers';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const evalCollector = createEvalCollector('e2e-review-army');
|
||||
|
||||
// Helper: create a git repo with a feature branch
|
||||
function setupRepo(prefix: string): { dir: string; run: (cmd: string, args: string[]) => void } {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), `skill-e2e-${prefix}-`));
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: dir, stdio: 'pipe', timeout: 5000 });
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
return { dir, run };
|
||||
}
|
||||
|
||||
// Helper: copy review skill files to test dir
|
||||
function copyReviewFiles(dir: string) {
|
||||
fs.copyFileSync(path.join(ROOT, 'review', 'SKILL.md'), path.join(dir, 'review-SKILL.md'));
|
||||
fs.copyFileSync(path.join(ROOT, 'review', 'checklist.md'), path.join(dir, 'review-checklist.md'));
|
||||
fs.copyFileSync(path.join(ROOT, 'review', 'greptile-triage.md'), path.join(dir, 'review-greptile-triage.md'));
|
||||
// Copy specialist checklists
|
||||
const specDir = path.join(dir, 'review-specialists');
|
||||
fs.mkdirSync(specDir, { recursive: true });
|
||||
const specialistsRoot = path.join(ROOT, 'review', 'specialists');
|
||||
for (const f of fs.readdirSync(specialistsRoot)) {
|
||||
fs.copyFileSync(path.join(specialistsRoot, f), path.join(specDir, f));
|
||||
}
|
||||
}
|
||||
|
||||
// --- Review Army: Migration Safety ---
|
||||
|
||||
describeIfSelected('Review Army: Migration Safety', ['review-army-migration-safety'], () => {
|
||||
let dir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
const repo = setupRepo('army-migration');
|
||||
dir = repo.dir;
|
||||
|
||||
// Base commit
|
||||
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
// Feature branch with unsafe migration
|
||||
repo.run('git', ['checkout', '-b', 'feature/drop-columns']);
|
||||
fs.mkdirSync(path.join(dir, 'db', 'migrate'), { recursive: true });
|
||||
const migrationContent = fs.readFileSync(
|
||||
path.join(ROOT, 'test', 'fixtures', 'review-army-migration.sql'), 'utf-8'
|
||||
);
|
||||
fs.writeFileSync(path.join(dir, 'db', 'migrate', '20260330_drop_columns.sql'), migrationContent);
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'drop email and phone columns']);
|
||||
|
||||
copyReviewFiles(dir);
|
||||
});
|
||||
|
||||
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
|
||||
|
||||
testConcurrentIfSelected('review-army-migration-safety', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are in a git repo on a feature branch with a database migration that drops columns.
|
||||
Read review-SKILL.md for instructions. Also read review-checklist.md.
|
||||
The specialist checklists are in review-specialists/ (testing.md, security.md, performance.md, data-migration.md, etc.).
|
||||
|
||||
Skip the preamble, lake intro, telemetry sections.
|
||||
Run Step 4 (Critical pass) then Step 4.5 (Review Army — Specialist Dispatch).
|
||||
The base branch is main. Run gstack-diff-scope style analysis on the changed files.
|
||||
Since db/migrate/ files changed, the Data Migration specialist should activate.
|
||||
|
||||
For the specialist dispatch, instead of launching subagents, just read review-specialists/data-migration.md
|
||||
and apply it yourself against the diff (git diff main...HEAD).
|
||||
|
||||
Write your findings to ${dir}/review-output.md`,
|
||||
workingDirectory: dir,
|
||||
maxTurns: 20,
|
||||
timeout: 180_000,
|
||||
testName: 'review-army-migration-safety',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/review army migration', result);
|
||||
recordE2E(evalCollector, '/review army migration safety', 'Review Army', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
// Verify migration issues were caught
|
||||
const outputPath = path.join(dir, 'review-output.md');
|
||||
if (fs.existsSync(outputPath)) {
|
||||
const content = fs.readFileSync(outputPath, 'utf-8').toLowerCase();
|
||||
const hasMigrationFinding =
|
||||
content.includes('drop') ||
|
||||
content.includes('data loss') ||
|
||||
content.includes('reversib') ||
|
||||
content.includes('migration') ||
|
||||
content.includes('column');
|
||||
expect(hasMigrationFinding).toBe(true);
|
||||
}
|
||||
}, 210_000);
|
||||
});
|
||||
|
||||
// --- Review Army: N+1 Performance ---
|
||||
|
||||
describeIfSelected('Review Army: N+1 Performance', ['review-army-perf-n-plus-one'], () => {
|
||||
let dir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
const repo = setupRepo('army-n-plus-one');
|
||||
dir = repo.dir;
|
||||
|
||||
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
repo.run('git', ['checkout', '-b', 'feature/add-posts-index']);
|
||||
const n1Content = fs.readFileSync(
|
||||
path.join(ROOT, 'test', 'fixtures', 'review-army-n-plus-one.rb'), 'utf-8'
|
||||
);
|
||||
fs.writeFileSync(path.join(dir, 'posts_controller.rb'), n1Content);
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'add posts controller']);
|
||||
|
||||
copyReviewFiles(dir);
|
||||
});
|
||||
|
||||
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
|
||||
|
||||
testConcurrentIfSelected('review-army-perf-n-plus-one', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are in a git repo on a feature branch with a Ruby controller that has N+1 queries.
|
||||
Read review-SKILL.md for instructions. Also read review-checklist.md.
|
||||
The specialist checklists are in review-specialists/ (testing.md, performance.md, etc.).
|
||||
|
||||
Skip the preamble, lake intro, telemetry sections.
|
||||
Run Step 4 (Critical pass) then Step 4.5 (Review Army).
|
||||
The base branch is main. This is a Ruby backend file, so Performance specialist should activate.
|
||||
|
||||
For the specialist dispatch, read review-specialists/performance.md and apply it against the diff.
|
||||
|
||||
Write your findings to ${dir}/review-output.md`,
|
||||
workingDirectory: dir,
|
||||
maxTurns: 20,
|
||||
timeout: 180_000,
|
||||
testName: 'review-army-perf-n-plus-one',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/review army n+1', result);
|
||||
recordE2E(evalCollector, '/review army N+1 detection', 'Review Army', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
const outputPath = path.join(dir, 'review-output.md');
|
||||
if (fs.existsSync(outputPath)) {
|
||||
const content = fs.readFileSync(outputPath, 'utf-8').toLowerCase();
|
||||
const hasN1Finding =
|
||||
content.includes('n+1') ||
|
||||
content.includes('n + 1') ||
|
||||
content.includes('eager') ||
|
||||
content.includes('includes') ||
|
||||
content.includes('preload') ||
|
||||
content.includes('query') ||
|
||||
content.includes('loop');
|
||||
expect(hasN1Finding).toBe(true);
|
||||
}
|
||||
}, 210_000);
|
||||
});
|
||||
|
||||
// --- Review Army: Delivery Audit ---
|
||||
|
||||
describeIfSelected('Review Army: Delivery Audit', ['review-army-delivery-audit'], () => {
|
||||
let dir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
const repo = setupRepo('army-delivery');
|
||||
dir = repo.dir;
|
||||
|
||||
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
repo.run('git', ['checkout', '-b', 'feature/three-features']);
|
||||
|
||||
// Write a plan file promising 3 features
|
||||
fs.writeFileSync(path.join(dir, 'PLAN.md'), `# Feature Plan
|
||||
|
||||
## Implementation Items
|
||||
1. Add user authentication with login/logout
|
||||
2. Add user profile page with avatar upload
|
||||
3. Add email notification system for new signups
|
||||
|
||||
## Test Items
|
||||
- Test login flow
|
||||
- Test profile page rendering
|
||||
- Test email sending
|
||||
`);
|
||||
repo.run('git', ['add', 'PLAN.md']);
|
||||
repo.run('git', ['commit', '-m', 'add plan']);
|
||||
|
||||
// Implement only 2 of 3 features
|
||||
fs.writeFileSync(path.join(dir, 'auth.rb'), `class AuthController
|
||||
def login
|
||||
# authenticate user
|
||||
session[:user_id] = user.id
|
||||
end
|
||||
|
||||
def logout
|
||||
session.delete(:user_id)
|
||||
end
|
||||
end
|
||||
`);
|
||||
fs.writeFileSync(path.join(dir, 'profile.rb'), `class ProfileController
|
||||
def show
|
||||
@user = User.find(params[:id])
|
||||
end
|
||||
|
||||
def update_avatar
|
||||
@user.avatar.attach(params[:avatar])
|
||||
end
|
||||
end
|
||||
`);
|
||||
// NOTE: email notification system is NOT implemented (intentionally missing)
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'implement auth and profile features']);
|
||||
|
||||
copyReviewFiles(dir);
|
||||
});
|
||||
|
||||
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
|
||||
|
||||
testConcurrentIfSelected('review-army-delivery-audit', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are in a git repo on branch feature/three-features.
|
||||
There is a PLAN.md file that promises 3 features: auth, profile, and email notifications.
|
||||
The diff (git diff main...HEAD) only implements 2 of them (auth and profile).
|
||||
|
||||
Read review-SKILL.md for the review workflow. Focus on the Plan Completion Audit section.
|
||||
The plan file is at ./PLAN.md. Cross-reference it against the diff.
|
||||
|
||||
For each plan item, classify as DONE, PARTIAL, NOT DONE, or CHANGED.
|
||||
The email notification system should be classified as NOT DONE.
|
||||
|
||||
Write your completion audit to ${dir}/review-output.md`,
|
||||
workingDirectory: dir,
|
||||
maxTurns: 15,
|
||||
timeout: 120_000,
|
||||
testName: 'review-army-delivery-audit',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/review army delivery', result);
|
||||
recordE2E(evalCollector, '/review army delivery audit', 'Review Army', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
const outputPath = path.join(dir, 'review-output.md');
|
||||
if (fs.existsSync(outputPath)) {
|
||||
const content = fs.readFileSync(outputPath, 'utf-8').toLowerCase();
|
||||
// Should identify email notifications as NOT DONE
|
||||
const hasNotDone =
|
||||
content.includes('not done') ||
|
||||
content.includes('not_done') ||
|
||||
content.includes('missing') ||
|
||||
content.includes('not implemented');
|
||||
const mentionsEmail =
|
||||
content.includes('email') ||
|
||||
content.includes('notification');
|
||||
expect(hasNotDone).toBe(true);
|
||||
expect(mentionsEmail).toBe(true);
|
||||
}
|
||||
}, 150_000);
|
||||
});
|
||||
|
||||
// --- Review Army: Quality Score ---
|
||||
|
||||
describeIfSelected('Review Army: Quality Score', ['review-army-quality-score'], () => {
|
||||
let dir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
const repo = setupRepo('army-quality');
|
||||
dir = repo.dir;
|
||||
|
||||
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
repo.run('git', ['checkout', '-b', 'feature/add-controller']);
|
||||
// Code with obvious issues for quality score computation
|
||||
fs.writeFileSync(path.join(dir, 'user_controller.rb'), `class UserController
|
||||
def create
|
||||
# SQL injection
|
||||
User.where("name = '#{params[:name]}'")
|
||||
# Magic number
|
||||
if users.count > 42
|
||||
raise "too many"
|
||||
end
|
||||
end
|
||||
end
|
||||
`);
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'add user controller']);
|
||||
|
||||
copyReviewFiles(dir);
|
||||
});
|
||||
|
||||
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
|
||||
|
||||
testConcurrentIfSelected('review-army-quality-score', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are in a git repo with a vulnerable user controller.
|
||||
Read review-SKILL.md and review-checklist.md.
|
||||
Skip preamble, lake intro, telemetry.
|
||||
|
||||
Run the Critical pass (Step 4) against the diff (git diff main...HEAD).
|
||||
Then compute the PR Quality Score as described in the Review Army merge step:
|
||||
quality_score = max(0, 10 - (critical_count * 2 + informational_count * 0.5))
|
||||
|
||||
Write your findings AND the computed quality score to ${dir}/review-output.md
|
||||
Include the line: "PR Quality Score: X/10" where X is the computed score.`,
|
||||
workingDirectory: dir,
|
||||
maxTurns: 15,
|
||||
timeout: 120_000,
|
||||
testName: 'review-army-quality-score',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/review army quality', result);
|
||||
recordE2E(evalCollector, '/review army quality score', 'Review Army', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
const outputPath = path.join(dir, 'review-output.md');
|
||||
if (fs.existsSync(outputPath)) {
|
||||
const content = fs.readFileSync(outputPath, 'utf-8');
|
||||
// Should contain a quality score
|
||||
const hasScore =
|
||||
content.toLowerCase().includes('quality score') ||
|
||||
content.match(/\d+\/10/);
|
||||
expect(hasScore).toBeTruthy();
|
||||
}
|
||||
}, 150_000);
|
||||
});
|
||||
|
||||
// --- Review Army: JSON Findings ---
|
||||
|
||||
describeIfSelected('Review Army: JSON Findings', ['review-army-json-findings'], () => {
|
||||
let dir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
const repo = setupRepo('army-json');
|
||||
dir = repo.dir;
|
||||
|
||||
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
repo.run('git', ['checkout', '-b', 'feature/vuln']);
|
||||
fs.writeFileSync(path.join(dir, 'search.rb'), `class SearchController
|
||||
def index
|
||||
# SQL injection via string interpolation
|
||||
results = ActiveRecord::Base.connection.execute(
|
||||
"SELECT * FROM products WHERE name LIKE '%#{params[:q]}%'"
|
||||
)
|
||||
render json: results
|
||||
end
|
||||
end
|
||||
`);
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'add search']);
|
||||
|
||||
copyReviewFiles(dir);
|
||||
});
|
||||
|
||||
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
|
||||
|
||||
testConcurrentIfSelected('review-army-json-findings', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are reviewing a git diff with a SQL injection vulnerability.
|
||||
Read review-specialists/security.md for the security checklist.
|
||||
|
||||
Apply the checklist against this diff (git diff main...HEAD).
|
||||
Output your findings as JSON objects, one per line, following the schema:
|
||||
{"severity":"CRITICAL","confidence":9,"path":"search.rb","line":4,"category":"injection","summary":"SQL injection via string interpolation","fix":"Use parameterized query","fingerprint":"search.rb:4:injection","specialist":"security"}
|
||||
|
||||
Write ONLY JSON findings (no preamble) to ${dir}/findings.json`,
|
||||
workingDirectory: dir,
|
||||
maxTurns: 12,
|
||||
timeout: 90_000,
|
||||
testName: 'review-army-json-findings',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/review army json', result);
|
||||
recordE2E(evalCollector, '/review army JSON findings', 'Review Army', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
const findingsPath = path.join(dir, 'findings.json');
|
||||
if (fs.existsSync(findingsPath)) {
|
||||
const content = fs.readFileSync(findingsPath, 'utf-8').trim();
|
||||
const lines = content.split('\n').filter(l => l.trim());
|
||||
// At least one finding
|
||||
expect(lines.length).toBeGreaterThanOrEqual(1);
|
||||
// Each line should be valid JSON with required fields
|
||||
for (const line of lines) {
|
||||
let parsed: any;
|
||||
try { parsed = JSON.parse(line); } catch { continue; }
|
||||
// Required fields per schema
|
||||
expect(parsed).toHaveProperty('severity');
|
||||
expect(parsed).toHaveProperty('confidence');
|
||||
expect(parsed).toHaveProperty('path');
|
||||
expect(parsed).toHaveProperty('category');
|
||||
expect(parsed).toHaveProperty('summary');
|
||||
expect(parsed).toHaveProperty('specialist');
|
||||
break; // One valid line is enough for the gate test
|
||||
}
|
||||
}
|
||||
}, 120_000);
|
||||
});
|
||||
|
||||
// --- Review Army: Red Team (periodic) ---
|
||||
|
||||
describeIfSelected('Review Army: Red Team', ['review-army-red-team'], () => {
|
||||
let dir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
const repo = setupRepo('army-redteam');
|
||||
dir = repo.dir;
|
||||
|
||||
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
repo.run('git', ['checkout', '-b', 'feature/large-change']);
|
||||
// Create a large diff (300+ lines)
|
||||
const lines: string[] = ['class LargeController'];
|
||||
for (let i = 0; i < 100; i++) {
|
||||
lines.push(` def method_${i}`);
|
||||
lines.push(` data = params[:input_${i}]`);
|
||||
lines.push(` process(data)`);
|
||||
lines.push(' end');
|
||||
lines.push('');
|
||||
}
|
||||
lines.push('end');
|
||||
fs.writeFileSync(path.join(dir, 'large_controller.rb'), lines.join('\n'));
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'add large controller']);
|
||||
|
||||
copyReviewFiles(dir);
|
||||
});
|
||||
|
||||
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
|
||||
|
||||
testConcurrentIfSelected('review-army-red-team', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are reviewing a large diff (300+ lines). Read review-SKILL.md.
|
||||
Skip preamble, lake intro, telemetry.
|
||||
|
||||
The diff is large enough to activate the Red Team specialist.
|
||||
Read review-specialists/red-team.md and apply it against the diff (git diff main...HEAD).
|
||||
Focus on finding issues that other specialists might miss.
|
||||
|
||||
Write your red team findings to ${dir}/review-output.md
|
||||
Start the file with "RED TEAM REVIEW" on the first line.`,
|
||||
workingDirectory: dir,
|
||||
maxTurns: 20,
|
||||
timeout: 180_000,
|
||||
testName: 'review-army-red-team',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/review army red-team', result);
|
||||
recordE2E(evalCollector, '/review army red team', 'Review Army', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
const outputPath = path.join(dir, 'review-output.md');
|
||||
if (fs.existsSync(outputPath)) {
|
||||
const content = fs.readFileSync(outputPath, 'utf-8');
|
||||
expect(content.toLowerCase()).toMatch(/red team|adversarial/);
|
||||
}
|
||||
}, 210_000);
|
||||
});
|
||||
|
||||
// --- Review Army: Consensus (periodic) ---
|
||||
|
||||
describeIfSelected('Review Army: Consensus', ['review-army-consensus'], () => {
|
||||
let dir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
const repo = setupRepo('army-consensus');
|
||||
dir = repo.dir;
|
||||
|
||||
fs.writeFileSync(path.join(dir, 'app.rb'), '# base\n');
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
repo.run('git', ['checkout', '-b', 'feature/vuln-auth']);
|
||||
// SQL injection that both security AND testing specialists should flag
|
||||
fs.writeFileSync(path.join(dir, 'auth_controller.rb'), `class AuthController
|
||||
def login
|
||||
user = User.find_by("email = '#{params[:email]}' AND password = '#{params[:password]}'")
|
||||
if user
|
||||
session[:user_id] = user.id
|
||||
redirect_to root_path
|
||||
else
|
||||
flash[:error] = "Invalid credentials"
|
||||
render :login
|
||||
end
|
||||
end
|
||||
end
|
||||
`);
|
||||
repo.run('git', ['add', '.']);
|
||||
repo.run('git', ['commit', '-m', 'add auth controller']);
|
||||
|
||||
copyReviewFiles(dir);
|
||||
});
|
||||
|
||||
afterAll(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
|
||||
|
||||
testConcurrentIfSelected('review-army-consensus', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are reviewing a git diff with a SQL injection in an auth controller.
|
||||
Read review-SKILL.md, review-checklist.md, and the specialist checklists in review-specialists/.
|
||||
|
||||
This vulnerability should be caught by BOTH the security specialist (injection vector)
|
||||
AND the testing specialist (no test for auth bypass).
|
||||
|
||||
Run the review. In your output, if a finding is flagged by multiple perspectives,
|
||||
mark it as "MULTI-SPECIALIST CONFIRMED" with the confirming categories.
|
||||
|
||||
Write findings to ${dir}/review-output.md`,
|
||||
workingDirectory: dir,
|
||||
maxTurns: 20,
|
||||
timeout: 180_000,
|
||||
testName: 'review-army-consensus',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/review army consensus', result);
|
||||
recordE2E(evalCollector, '/review army consensus', 'Review Army', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
const outputPath = path.join(dir, 'review-output.md');
|
||||
if (fs.existsSync(outputPath)) {
|
||||
const content = fs.readFileSync(outputPath, 'utf-8').toLowerCase();
|
||||
// Should catch the SQL injection
|
||||
const hasSqlFinding =
|
||||
content.includes('sql') ||
|
||||
content.includes('injection') ||
|
||||
content.includes('interpolat');
|
||||
expect(hasSqlFinding).toBe(true);
|
||||
}
|
||||
}, 210_000);
|
||||
});
|
||||
|
||||
// Finalize eval collector
|
||||
afterAll(async () => {
|
||||
await finalizeEvalCollector(evalCollector);
|
||||
});
|
||||
@@ -0,0 +1,268 @@
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { runSkillTest } from './helpers/session-runner';
|
||||
import {
|
||||
ROOT, runId, evalsEnabled,
|
||||
describeIfSelected, testConcurrentIfSelected,
|
||||
copyDirSync, logCost, recordE2E,
|
||||
createEvalCollector, finalizeEvalCollector,
|
||||
} from './helpers/e2e-helpers';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const evalCollector = createEvalCollector('e2e-session-intelligence');
|
||||
|
||||
// --- Session Intelligence E2E ---
|
||||
// Tests the core contract: timeline events flow in, context recovery flows out,
|
||||
// checkpoints round-trip.
|
||||
|
||||
describeIfSelected('Session Intelligence E2E', [
|
||||
'timeline-event-flow', 'context-recovery-artifacts', 'checkpoint-save-resume',
|
||||
], () => {
|
||||
let workDir: string;
|
||||
let gstackHome: string;
|
||||
let slug: string;
|
||||
|
||||
beforeAll(() => {
|
||||
workDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-session-intel-'));
|
||||
gstackHome = path.join(workDir, '.gstack-home');
|
||||
|
||||
// Init git repo
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 });
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
fs.writeFileSync(path.join(workDir, 'app.ts'), 'console.log("hello");\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
// Copy bin scripts needed by timeline and checkpoint
|
||||
const binDir = path.join(workDir, 'bin');
|
||||
fs.mkdirSync(binDir, { recursive: true });
|
||||
for (const script of [
|
||||
'gstack-timeline-log', 'gstack-timeline-read', 'gstack-slug',
|
||||
'gstack-learnings-log', 'gstack-learnings-search',
|
||||
]) {
|
||||
const src = path.join(ROOT, 'bin', script);
|
||||
if (fs.existsSync(src)) {
|
||||
fs.copyFileSync(src, path.join(binDir, script));
|
||||
fs.chmodSync(path.join(binDir, script), 0o755);
|
||||
}
|
||||
}
|
||||
|
||||
// Compute slug (same logic as gstack-slug without git remote)
|
||||
slug = path.basename(workDir).replace(/[^a-zA-Z0-9._-]/g, '');
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
finalizeEvalCollector(evalCollector);
|
||||
});
|
||||
|
||||
// --- Test 1: Timeline event flow ---
|
||||
// Write a timeline event via gstack-timeline-log, read it back via gstack-timeline-read.
|
||||
// This is the foundational data flow test: events go in, they come back out.
|
||||
testConcurrentIfSelected('timeline-event-flow', async () => {
|
||||
const projectDir = path.join(gstackHome, 'projects', slug);
|
||||
fs.mkdirSync(projectDir, { recursive: true });
|
||||
|
||||
// Write two events via the binary
|
||||
const logBin = path.join(workDir, 'bin', 'gstack-timeline-log');
|
||||
const readBin = path.join(workDir, 'bin', 'gstack-timeline-read');
|
||||
const env = { ...process.env, GSTACK_HOME: gstackHome };
|
||||
const opts = { cwd: workDir, env, stdio: 'pipe' as const, timeout: 10000 };
|
||||
|
||||
spawnSync(logBin, [JSON.stringify({
|
||||
skill: 'review', event: 'started', branch: 'main', session: 'test-1',
|
||||
})], opts);
|
||||
spawnSync(logBin, [JSON.stringify({
|
||||
skill: 'review', event: 'completed', branch: 'main',
|
||||
outcome: 'success', duration_s: 120, session: 'test-1',
|
||||
})], opts);
|
||||
|
||||
// Read via gstack-timeline-read
|
||||
const readResult = spawnSync(readBin, ['--branch', 'main'], opts);
|
||||
const readOutput = readResult.stdout?.toString() || '';
|
||||
|
||||
// Verify timeline.jsonl exists and has content
|
||||
const timelinePath = path.join(projectDir, 'timeline.jsonl');
|
||||
expect(fs.existsSync(timelinePath)).toBe(true);
|
||||
|
||||
const lines = fs.readFileSync(timelinePath, 'utf-8').trim().split('\n');
|
||||
expect(lines.length).toBe(2);
|
||||
|
||||
// Verify the events are valid JSON with expected fields
|
||||
const event1 = JSON.parse(lines[0]);
|
||||
expect(event1.skill).toBe('review');
|
||||
expect(event1.event).toBe('started');
|
||||
expect(event1.ts).toBeDefined();
|
||||
|
||||
const event2 = JSON.parse(lines[1]);
|
||||
expect(event2.event).toBe('completed');
|
||||
expect(event2.outcome).toBe('success');
|
||||
|
||||
// Verify gstack-timeline-read output includes the events
|
||||
expect(readOutput).toContain('review');
|
||||
|
||||
recordE2E(evalCollector, 'timeline event flow', 'Session Intelligence E2E', {
|
||||
output: readOutput,
|
||||
exitReason: 'success',
|
||||
duration: 0,
|
||||
toolCalls: [],
|
||||
browseErrors: [],
|
||||
costEstimate: { inputChars: 0, outputChars: 0, estimatedTokens: 0, estimatedCost: 0, turnsUsed: 0 },
|
||||
transcript: [],
|
||||
model: 'direct',
|
||||
firstResponseMs: 0,
|
||||
maxInterTurnMs: 0,
|
||||
}, { passed: true });
|
||||
|
||||
console.log(`Timeline flow: ${lines.length} events written, read output ${readOutput.length} chars`);
|
||||
}, 30_000);
|
||||
|
||||
// --- Test 2: Context recovery with seeded artifacts ---
|
||||
// Seed CEO plans and timeline events, then run a skill and verify the preamble
|
||||
// outputs "RECENT ARTIFACTS" and "LAST_SESSION".
|
||||
testConcurrentIfSelected('context-recovery-artifacts', async () => {
|
||||
const projectDir = path.join(gstackHome, 'projects', slug);
|
||||
fs.mkdirSync(path.join(projectDir, 'ceo-plans'), { recursive: true });
|
||||
|
||||
// Seed a CEO plan
|
||||
fs.writeFileSync(
|
||||
path.join(projectDir, 'ceo-plans', '2026-03-31-test-feature.md'),
|
||||
'---\nstatus: ACTIVE\n---\n# CEO Plan: Test Feature\nThis is a test plan.\n',
|
||||
);
|
||||
|
||||
// Seed timeline with a completed event on main branch
|
||||
const timelineEntry = JSON.stringify({
|
||||
ts: new Date().toISOString(),
|
||||
skill: 'ship',
|
||||
event: 'completed',
|
||||
branch: 'main',
|
||||
outcome: 'success',
|
||||
duration_s: 60,
|
||||
session: 'prior-session',
|
||||
});
|
||||
fs.writeFileSync(path.join(projectDir, 'timeline.jsonl'), timelineEntry + '\n');
|
||||
|
||||
// Copy the /learn skill (lightweight, tier-2 skill that runs context recovery)
|
||||
copyDirSync(path.join(ROOT, 'learn'), path.join(workDir, 'learn'));
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read the file learn/SKILL.md for instructions.
|
||||
|
||||
Run the context recovery check — the preamble should show recent artifacts.
|
||||
|
||||
IMPORTANT:
|
||||
- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
|
||||
- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
|
||||
Replace any references to ~/.claude/skills/gstack/bin/ with ./bin/ when running commands.
|
||||
- Do NOT use AskUserQuestion.
|
||||
- Just run the preamble bash block and report what you see.
|
||||
- Look for "RECENT ARTIFACTS" and "LAST_SESSION" in the output.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 10,
|
||||
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
|
||||
timeout: 120_000,
|
||||
testName: 'context-recovery-artifacts',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('context recovery', result);
|
||||
|
||||
const output = result.output.toLowerCase();
|
||||
|
||||
// The preamble should have found the seeded artifacts
|
||||
const foundArtifacts = output.includes('recent artifacts') || output.includes('ceo-plans');
|
||||
const foundLastSession = output.includes('last_session') || output.includes('ship');
|
||||
const foundTimeline = output.includes('timeline') || output.includes('completed');
|
||||
|
||||
// At least the CEO plan or timeline should be visible
|
||||
const foundCount = [foundArtifacts, foundLastSession, foundTimeline].filter(Boolean).length;
|
||||
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'context recovery', 'Session Intelligence E2E', result, {
|
||||
passed: exitOk && foundCount >= 1,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(foundCount).toBeGreaterThanOrEqual(1);
|
||||
|
||||
console.log(`Context recovery: artifacts=${foundArtifacts}, lastSession=${foundLastSession}, timeline=${foundTimeline}`);
|
||||
}, 180_000);
|
||||
|
||||
// --- Test 3: Checkpoint save and resume ---
|
||||
// Run /checkpoint save via claude -p, verify file created. Then run /checkpoint resume
|
||||
// and verify it reads the checkpoint back.
|
||||
testConcurrentIfSelected('checkpoint-save-resume', async () => {
|
||||
const projectDir = path.join(gstackHome, 'projects', slug);
|
||||
fs.mkdirSync(path.join(projectDir, 'checkpoints'), { recursive: true });
|
||||
|
||||
// Copy the /checkpoint skill
|
||||
copyDirSync(path.join(ROOT, 'checkpoint'), path.join(workDir, 'checkpoint'));
|
||||
|
||||
// Add a staged change so /checkpoint has something to capture
|
||||
fs.writeFileSync(path.join(workDir, 'feature.ts'), 'export function newFeature() { return true; }\n');
|
||||
spawnSync('git', ['add', 'feature.ts'], { cwd: workDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
// Extract the checkpoint save section from the skill template
|
||||
const full = fs.readFileSync(path.join(ROOT, 'checkpoint', 'SKILL.md'), 'utf-8');
|
||||
const saveStart = full.indexOf('## Save');
|
||||
const resumeStart = full.indexOf('## Resume');
|
||||
const saveSection = full.slice(saveStart, resumeStart > saveStart ? resumeStart : undefined);
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are testing the /checkpoint skill. Follow these instructions to save a checkpoint.
|
||||
|
||||
${saveSection.slice(0, 2000)}
|
||||
|
||||
IMPORTANT:
|
||||
- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
|
||||
- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
|
||||
Replace any references to ~/.claude/skills/gstack/bin/ with ./bin/ when running commands.
|
||||
- Save the checkpoint to ${projectDir}/checkpoints/ with a filename like "20260401-test-checkpoint.md".
|
||||
- Include YAML frontmatter with status, branch, and timestamp.
|
||||
- Include a summary of what's being worked on (you can see from git status).
|
||||
- Do NOT use AskUserQuestion.`,
|
||||
workingDirectory: workDir,
|
||||
maxTurns: 10,
|
||||
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
|
||||
timeout: 120_000,
|
||||
testName: 'checkpoint-save-resume',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('checkpoint save', result);
|
||||
|
||||
// Check that a checkpoint file was created
|
||||
const checkpointDir = path.join(projectDir, 'checkpoints');
|
||||
const checkpointFiles = fs.existsSync(checkpointDir)
|
||||
? fs.readdirSync(checkpointDir).filter(f => f.endsWith('.md'))
|
||||
: [];
|
||||
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
const checkpointCreated = checkpointFiles.length > 0;
|
||||
|
||||
let checkpointContent = '';
|
||||
if (checkpointCreated) {
|
||||
checkpointContent = fs.readFileSync(path.join(checkpointDir, checkpointFiles[0]), 'utf-8');
|
||||
}
|
||||
|
||||
// Verify checkpoint has expected structure
|
||||
const hasYamlFrontmatter = checkpointContent.includes('---') && checkpointContent.includes('status:');
|
||||
const hasBranch = checkpointContent.includes('branch:') || checkpointContent.includes('main');
|
||||
|
||||
recordE2E(evalCollector, 'checkpoint save-resume', 'Session Intelligence E2E', result, {
|
||||
passed: exitOk && checkpointCreated && hasYamlFrontmatter,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(checkpointCreated).toBe(true);
|
||||
expect(hasYamlFrontmatter).toBe(true);
|
||||
|
||||
console.log(`Checkpoint: ${checkpointFiles.length} files created, YAML frontmatter: ${hasYamlFrontmatter}, branch: ${hasBranch}`);
|
||||
}, 180_000);
|
||||
});
|
||||
@@ -116,9 +116,10 @@ describeIfSelected('Sidebar URL accuracy E2E', ['sidebar-url-accuracy'], () => {
|
||||
}
|
||||
|
||||
expect(lastEntry).not.toBeNull();
|
||||
// Extension URL should be used, not the Playwright fallback
|
||||
// Extension URL should be used, not the Playwright fallback.
|
||||
// The pageUrl field carries the extension URL; the prompt itself
|
||||
// contains only the system prompt + user message (URL is metadata).
|
||||
expect(lastEntry.pageUrl).toBe(extensionUrl);
|
||||
expect(lastEntry.prompt).toContain(extensionUrl);
|
||||
expect(lastEntry.pageUrl).not.toBe('about:blank');
|
||||
|
||||
// Also test: chrome:// URL should be rejected, falling back to about:blank
|
||||
@@ -149,6 +150,197 @@ describeIfSelected('Sidebar URL accuracy E2E', ['sidebar-url-accuracy'], () => {
|
||||
}, 30_000);
|
||||
});
|
||||
|
||||
// --- Sidebar CSS Interaction E2E (real Claude + real browser) ---
|
||||
// Goes to HN, reads comments, identifies the most insightful one, highlights it.
|
||||
// Exercises: navigation, snapshot, text reading, LLM judgment, CSS style injection.
|
||||
|
||||
describeIfSelected('Sidebar CSS interaction E2E', ['sidebar-css-interaction'], () => {
|
||||
let serverProc: Subprocess | null = null;
|
||||
let agentProc: Subprocess | null = null;
|
||||
let serverPort: number = 0;
|
||||
let authToken: string = '';
|
||||
let tmpDir: string = '';
|
||||
let stateFile: string = '';
|
||||
let queueFile: string = '';
|
||||
let serverLogFile: string = '';
|
||||
let serverErrFile: string = '';
|
||||
let agentLogFile: string = '';
|
||||
let agentErrFile: string = '';
|
||||
|
||||
async function api(pathname: string, opts: RequestInit = {}): Promise<Response> {
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
...(opts.headers as Record<string, string> || {}),
|
||||
};
|
||||
if (!headers['Authorization'] && authToken) {
|
||||
headers['Authorization'] = `Bearer ${authToken}`;
|
||||
}
|
||||
return fetch(`http://127.0.0.1:${serverPort}${pathname}`, { ...opts, headers });
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sidebar-e2e-css-'));
|
||||
stateFile = path.join(tmpDir, 'browse.json');
|
||||
queueFile = path.join(tmpDir, 'sidebar-queue.jsonl');
|
||||
fs.mkdirSync(path.dirname(queueFile), { recursive: true });
|
||||
|
||||
// Start server WITH a real browser for CSS interaction
|
||||
const serverScript = path.resolve(ROOT, 'browse', 'src', 'server.ts');
|
||||
serverLogFile = path.join(tmpDir, 'server.log');
|
||||
serverErrFile = path.join(tmpDir, 'server.err');
|
||||
// Use 'pipe' stdio — closing file descriptors kills the child on macOS/bun
|
||||
serverProc = spawn(['bun', 'run', serverScript], {
|
||||
env: {
|
||||
...process.env,
|
||||
BROWSE_STATE_FILE: stateFile,
|
||||
BROWSE_PORT: '0',
|
||||
SIDEBAR_QUEUE_PATH: queueFile,
|
||||
BROWSE_IDLE_TIMEOUT: '600000', // 10 min in ms — test takes ~3 min
|
||||
},
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
// Wait for state file with port/token
|
||||
const deadline = Date.now() + 30000;
|
||||
while (Date.now() < deadline) {
|
||||
if (fs.existsSync(stateFile)) {
|
||||
try {
|
||||
const state = JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
|
||||
if (state.port && state.token) {
|
||||
serverPort = state.port;
|
||||
authToken = state.token;
|
||||
break;
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
await new Promise(r => setTimeout(r, 200));
|
||||
}
|
||||
if (!serverPort) throw new Error('Server did not start in time');
|
||||
|
||||
// Verify server is healthy before proceeding
|
||||
const healthDeadline = Date.now() + 10000;
|
||||
let healthy = false;
|
||||
while (Date.now() < healthDeadline) {
|
||||
try {
|
||||
const resp = await fetch(`http://127.0.0.1:${serverPort}/health`);
|
||||
if (resp.ok) { healthy = true; break; }
|
||||
} catch {}
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
}
|
||||
if (!healthy) throw new Error('Server started but health check failed');
|
||||
|
||||
// Start sidebar-agent with the real browse binary
|
||||
const agentScript = path.resolve(ROOT, 'browse', 'src', 'sidebar-agent.ts');
|
||||
const browseBin = path.resolve(ROOT, 'browse', 'dist', 'browse');
|
||||
agentLogFile = path.join(tmpDir, 'agent.log');
|
||||
agentErrFile = path.join(tmpDir, 'agent.err');
|
||||
// Use 'pipe' stdio — closing file descriptors kills the child on macOS/bun
|
||||
agentProc = spawn(['bun', 'run', agentScript], {
|
||||
env: {
|
||||
...process.env,
|
||||
BROWSE_SERVER_PORT: String(serverPort),
|
||||
BROWSE_STATE_FILE: stateFile,
|
||||
SIDEBAR_QUEUE_PATH: queueFile,
|
||||
SIDEBAR_AGENT_TIMEOUT: '180000', // 3 min — multi-step HN comment task
|
||||
BROWSE_BIN: fs.existsSync(browseBin) ? browseBin : 'echo',
|
||||
},
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
await new Promise(r => setTimeout(r, 2000));
|
||||
}, 35000);
|
||||
|
||||
afterAll(() => {
|
||||
if (agentProc) { try { agentProc.kill(); } catch {} }
|
||||
if (serverProc) { try { serverProc.kill(); } catch {} }
|
||||
finalizeEvalCollector(evalCollector);
|
||||
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
testIfSelected('sidebar-css-interaction', async () => {
|
||||
// Fresh session + clean queue
|
||||
try { await api('/sidebar-session/new', { method: 'POST' }); } catch {}
|
||||
fs.writeFileSync(queueFile, '');
|
||||
const startTime = Date.now();
|
||||
|
||||
// Simple task: go to example.com, read the title, apply a style
|
||||
// (much faster than multi-step HN comment navigation)
|
||||
const resp = await api('/sidebar-command', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({
|
||||
message: 'Go to https://example.com. Read the page title. Add a 4px solid orange outline to the h1 element.',
|
||||
activeTabUrl: 'about:blank',
|
||||
}),
|
||||
});
|
||||
expect(resp.status).toBe(200);
|
||||
|
||||
// Poll for agent_done (4 min timeout — multi-step task with opus LLM)
|
||||
const deadline = Date.now() + 240000;
|
||||
let entries: any[] = [];
|
||||
while (Date.now() < deadline) {
|
||||
try {
|
||||
const chatResp = await api('/sidebar-chat?after=0');
|
||||
const data = await chatResp.json();
|
||||
entries = data.entries || [];
|
||||
if (entries.some((e: any) => e.type === 'agent_done')) break;
|
||||
} catch (err: any) {
|
||||
// Server may be temporarily busy or restarting — retry on connection errors
|
||||
const isConnErr = err.code === 'ConnectionRefused' || err.message?.includes('ConnectionRefused') || err.message?.includes('Unable to connect');
|
||||
if (!isConnErr) throw err;
|
||||
}
|
||||
await new Promise(r => setTimeout(r, 3000));
|
||||
}
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
const doneEntry = entries.find((e: any) => e.type === 'agent_done');
|
||||
|
||||
// Dump debug info on failure
|
||||
if (!doneEntry || entries.length === 0) {
|
||||
console.log('ENTRIES:', JSON.stringify(entries.slice(-5), null, 2));
|
||||
console.log('SERVER exitCode:', serverProc?.exitCode, 'signalCode:', serverProc?.signalCode, 'killed:', serverProc?.killed);
|
||||
console.log('AGENT exitCode:', agentProc?.exitCode, 'signalCode:', agentProc?.signalCode, 'killed:', agentProc?.killed);
|
||||
const queueContent = fs.existsSync(queueFile) ? fs.readFileSync(queueFile, 'utf-8').slice(-500) : 'NO QUEUE';
|
||||
console.log('QUEUE:', queueContent.length > 0 ? 'has entries' : 'empty');
|
||||
}
|
||||
|
||||
// Agent should have completed
|
||||
expect(doneEntry).toBeDefined();
|
||||
|
||||
// Agent should have run browse commands (look for tool_use entries)
|
||||
const toolUses = entries.filter((e: any) => e.type === 'tool_use');
|
||||
expect(toolUses.length).toBeGreaterThanOrEqual(2); // At minimum: goto + one more
|
||||
|
||||
// Agent text should mention something about the comment it found
|
||||
const agentText = entries
|
||||
.filter((e: any) => e.role === 'agent' && (e.type === 'text' || e.type === 'result'))
|
||||
.map((e: any) => e.text || '')
|
||||
.join(' ')
|
||||
.toLowerCase();
|
||||
|
||||
// Should have navigated to example.com (look for example.com in any entry text)
|
||||
const allEntryText = entries
|
||||
.map((e: any) => `${e.text || ''} ${e.input || ''} ${e.message || ''}`)
|
||||
.join(' ');
|
||||
const navigatedToTarget = allEntryText.includes('example.com') || allEntryText.includes('Example Domain');
|
||||
if (!navigatedToTarget) {
|
||||
console.log('ALL ENTRY TEXT (first 2000):', allEntryText.slice(0, 2000));
|
||||
}
|
||||
expect(navigatedToTarget).toBe(true);
|
||||
|
||||
// Should have applied a style (look for orange/outline in tool commands)
|
||||
const allText = entries.map((e: any) => e.text || '').join(' ');
|
||||
const appliedStyle = allText.includes('outline') || allText.includes('orange') || allText.includes('style');
|
||||
|
||||
evalCollector?.addTest({
|
||||
name: 'sidebar-css-interaction', suite: 'Sidebar CSS interaction E2E', tier: 'e2e',
|
||||
passed: !!doneEntry && navigatedToTarget && appliedStyle,
|
||||
duration_ms: duration,
|
||||
cost_usd: 0,
|
||||
exit_reason: doneEntry ? 'success' : 'timeout',
|
||||
});
|
||||
}, 300_000);
|
||||
});
|
||||
|
||||
// --- Sidebar Navigate (real Claude, requires ANTHROPIC_API_KEY) ---
|
||||
|
||||
describeIfSelected('Sidebar navigate E2E', ['sidebar-navigate'], () => {
|
||||
|
||||
@@ -467,8 +467,18 @@ describeIfSelected('Codex skill E2E', ['codex-review'], () => {
|
||||
run('git', ['add', 'user_controller.rb']);
|
||||
run('git', ['commit', '-m', 'add vulnerable controller']);
|
||||
|
||||
// Copy the codex skill file
|
||||
fs.copyFileSync(path.join(ROOT, 'codex', 'SKILL.md'), path.join(codexDir, 'codex-SKILL.md'));
|
||||
// Extract only the review-relevant section from codex SKILL.md (~120 lines vs 1075).
|
||||
// Full SKILL.md is 55KB / ~14K tokens — takes 8 Read calls to consume, exhausting turns.
|
||||
const full = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md'), 'utf-8');
|
||||
const startMarker = '# /codex — Multi-AI Second Opinion';
|
||||
const endMarker = '## Plan File Review Report';
|
||||
const start = full.indexOf(startMarker);
|
||||
const end = full.indexOf(endMarker, start);
|
||||
const reviewSection = full.slice(
|
||||
start >= 0 ? start : 0,
|
||||
end > start ? end : undefined,
|
||||
);
|
||||
fs.writeFileSync(path.join(codexDir, 'codex-SKILL.md'), reviewSection);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
@@ -485,11 +495,11 @@ describeIfSelected('Codex skill E2E', ['codex-review'], () => {
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are in a git repo on branch feature/add-vuln with changes against main.
|
||||
Read codex-SKILL.md for the /codex skill instructions.
|
||||
Run /codex review to review the current diff against main.
|
||||
Read codex-SKILL.md for the /codex review instructions (it's short — ~120 lines).
|
||||
Follow those instructions to run codex review against the diff on this branch.
|
||||
Write the full output (including the GATE verdict) to ${codexDir}/codex-output.md`,
|
||||
workingDirectory: codexDir,
|
||||
maxTurns: 15,
|
||||
maxTurns: 25,
|
||||
timeout: 300_000,
|
||||
testName: 'codex-review',
|
||||
runId,
|
||||
|
||||
+96
-56
@@ -325,62 +325,6 @@ Report the exact output — either "READY: <path>" or "NEEDS_SETUP".`,
|
||||
try { fs.rmSync(nonGitDir, { recursive: true, force: true }); } catch {}
|
||||
}, 60_000);
|
||||
|
||||
testIfSelected('contributor-mode', async () => {
|
||||
const contribDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-contrib-'));
|
||||
const logsDir = path.join(contribDir, 'contributor-logs');
|
||||
fs.mkdirSync(logsDir, { recursive: true });
|
||||
|
||||
// Extract contributor mode instructions from generated SKILL.md
|
||||
const skillMd = fs.readFileSync(path.join(ROOT, 'SKILL.md'), 'utf-8');
|
||||
const contribStart = skillMd.indexOf('## Contributor Mode');
|
||||
const contribEnd = skillMd.indexOf('\n## ', contribStart + 1);
|
||||
const contribBlock = skillMd.slice(contribStart, contribEnd > 0 ? contribEnd : undefined);
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are in contributor mode (_CONTRIB=true).
|
||||
|
||||
${contribBlock}
|
||||
|
||||
OVERRIDE: Write contributor logs to ${logsDir}/ instead of ~/.gstack/contributor-logs/
|
||||
|
||||
Now try this browse command (it will fail — there is no binary at this path):
|
||||
/nonexistent/path/browse goto https://example.com
|
||||
|
||||
This is a gstack issue (the browse binary is missing/misconfigured).
|
||||
File a contributor report about this issue. Then tell me what you filed.`,
|
||||
workingDirectory: contribDir,
|
||||
maxTurns: 8,
|
||||
timeout: 60_000,
|
||||
testName: 'contributor-mode',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('contributor mode', result);
|
||||
// Override passed: this test intentionally triggers a browse error (nonexistent binary)
|
||||
// so browseErrors will be non-empty — that's expected, not a failure
|
||||
recordE2E('contributor mode report', 'Skill E2E tests', result, {
|
||||
passed: result.exitReason === 'success',
|
||||
});
|
||||
|
||||
// Verify a contributor log was created with expected format
|
||||
const logFiles = fs.readdirSync(logsDir).filter(f => f.endsWith('.md'));
|
||||
expect(logFiles.length).toBeGreaterThan(0);
|
||||
|
||||
// Verify new reflection-based format
|
||||
const logContent = fs.readFileSync(path.join(logsDir, logFiles[0]), 'utf-8');
|
||||
expect(logContent).toContain('Hey gstack team');
|
||||
expect(logContent).toContain('What I was trying to do');
|
||||
expect(logContent).toContain('What happened instead');
|
||||
expect(logContent).toMatch(/rating/i);
|
||||
// Verify report has repro steps (agent may use "Steps to reproduce", "Repro Steps", etc.)
|
||||
expect(logContent).toMatch(/repro|steps to reproduce|how to reproduce/i);
|
||||
// Verify report has date/version footer (agent may format differently)
|
||||
expect(logContent).toMatch(/date.*2026|2026.*date/i);
|
||||
|
||||
// Clean up
|
||||
try { fs.rmSync(contribDir, { recursive: true, force: true }); } catch {}
|
||||
}, 90_000);
|
||||
|
||||
testIfSelected('session-awareness', async () => {
|
||||
const sessionDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-session-'));
|
||||
|
||||
@@ -3313,6 +3257,102 @@ Write your summary to ${benefitsDir}/benefits-summary.md`,
|
||||
}, 180_000);
|
||||
});
|
||||
|
||||
// --- Ship idempotency (#649) ---
|
||||
describeIfSelected('Ship idempotency', ['ship-idempotency'], () => {
|
||||
let idempDir: string;
|
||||
const gitRun = (args: string[], cwd: string) =>
|
||||
spawnSync('git', args, { cwd, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
beforeAll(() => {
|
||||
idempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-ship-idemp-'));
|
||||
|
||||
// Create git repo with initial commit on main
|
||||
gitRun(['init', '-b', 'main'], idempDir);
|
||||
gitRun(['config', 'user.email', 'test@test.com'], idempDir);
|
||||
gitRun(['config', 'user.name', 'Test'], idempDir);
|
||||
|
||||
fs.writeFileSync(path.join(idempDir, 'app.ts'), 'console.log("v1");\n');
|
||||
fs.writeFileSync(path.join(idempDir, 'VERSION'), '0.1.0.0\n');
|
||||
fs.writeFileSync(path.join(idempDir, 'CHANGELOG.md'), '# Changelog\n');
|
||||
gitRun(['add', '.'], idempDir);
|
||||
gitRun(['commit', '-m', 'initial'], idempDir);
|
||||
|
||||
// Create feature branch with changes
|
||||
gitRun(['checkout', '-b', 'feat/my-feature'], idempDir);
|
||||
fs.writeFileSync(path.join(idempDir, 'app.ts'), 'console.log("v2");\n');
|
||||
gitRun(['add', 'app.ts'], idempDir);
|
||||
gitRun(['commit', '-m', 'feat: update to v2'], idempDir);
|
||||
|
||||
// Simulate prior /ship run: bump VERSION and write CHANGELOG entry
|
||||
fs.writeFileSync(path.join(idempDir, 'VERSION'), '0.2.0.0\n');
|
||||
fs.writeFileSync(path.join(idempDir, 'CHANGELOG.md'),
|
||||
'# Changelog\n\n## [0.2.0.0] — 2026-03-30\n\n- Updated app to v2\n');
|
||||
gitRun(['add', 'VERSION', 'CHANGELOG.md'], idempDir);
|
||||
gitRun(['commit', '-m', 'chore: bump version to 0.2.0.0'], idempDir);
|
||||
|
||||
// Extract just the idempotency-relevant sections from ship/SKILL.md
|
||||
const full = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
const step4Start = full.indexOf('## Step 4: Version bump');
|
||||
const step4End = full.indexOf('\n---\n', step4Start);
|
||||
const step7Start = full.indexOf('## Step 7: Push');
|
||||
const step8End = full.indexOf('## Step 8.5');
|
||||
const extracted = [
|
||||
full.slice(step4Start, step4End > step4Start ? step4End : step4Start + 500),
|
||||
full.slice(step7Start, step8End > step7Start ? step8End : step7Start + 500),
|
||||
].join('\n\n---\n\n');
|
||||
fs.writeFileSync(path.join(idempDir, 'ship-steps.md'), extracted);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(idempDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
testIfSelected('ship-idempotency', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are in a git repo on branch feat/my-feature. A prior /ship run already:
|
||||
- Bumped VERSION from 0.1.0.0 to 0.2.0.0
|
||||
- Wrote a CHANGELOG entry for 0.2.0.0
|
||||
- But the push/PR step failed
|
||||
|
||||
Read ship-steps.md for the idempotency check instructions from the ship workflow.
|
||||
|
||||
Run ONLY the idempotency checks described in Steps 4 and 7. Do NOT actually push or create PRs (there is no remote).
|
||||
|
||||
After running the checks, write a report to ${idempDir}/idemp-result.md containing:
|
||||
- Whether VERSION was detected as ALREADY_BUMPED or not
|
||||
- Whether the push was detected as ALREADY_PUSHED or PUSH_NEEDED
|
||||
- The current VERSION value (should still be 0.2.0.0)
|
||||
|
||||
Do NOT modify VERSION or CHANGELOG. Only run the detection checks and report.`,
|
||||
workingDirectory: idempDir,
|
||||
maxTurns: 10,
|
||||
timeout: 60_000,
|
||||
testName: 'ship-idempotency',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/ship idempotency', result);
|
||||
recordE2E('/ship idempotency guard', 'Ship idempotency', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
// Verify VERSION was NOT modified
|
||||
const version = fs.readFileSync(path.join(idempDir, 'VERSION'), 'utf-8').trim();
|
||||
expect(version).toBe('0.2.0.0');
|
||||
|
||||
// Verify CHANGELOG was NOT duplicated
|
||||
const changelog = fs.readFileSync(path.join(idempDir, 'CHANGELOG.md'), 'utf-8');
|
||||
const versionEntries = (changelog.match(/## \[0\.2\.0\.0\]/g) || []).length;
|
||||
expect(versionEntries).toBe(1);
|
||||
|
||||
// Check the result report if it was written
|
||||
const reportPath = path.join(idempDir, 'idemp-result.md');
|
||||
if (fs.existsSync(reportPath)) {
|
||||
const report = fs.readFileSync(reportPath, 'utf-8');
|
||||
expect(report.toLowerCase()).toContain('already_bumped');
|
||||
}
|
||||
}, 120_000);
|
||||
});
|
||||
|
||||
// Module-level afterAll — finalize eval collector after all tests complete
|
||||
afterAll(async () => {
|
||||
if (evalCollector) {
|
||||
|
||||
@@ -93,11 +93,30 @@ function installSkills(tmpDir: string) {
|
||||
}
|
||||
}
|
||||
|
||||
// Copy CLAUDE.md so Claude has project context for skill routing.
|
||||
const claudeMdSrc = path.join(ROOT, 'CLAUDE.md');
|
||||
if (fs.existsSync(claudeMdSrc)) {
|
||||
fs.copyFileSync(claudeMdSrc, path.join(tmpDir, 'CLAUDE.md'));
|
||||
}
|
||||
// Write a CLAUDE.md with explicit routing instructions.
|
||||
// The skill descriptions in system-reminder aren't strong enough to override
|
||||
// Claude's default behavior of answering directly. A CLAUDE.md instruction
|
||||
// puts routing rules in project context which Claude weighs more heavily.
|
||||
fs.writeFileSync(path.join(tmpDir, 'CLAUDE.md'), `# Project Instructions
|
||||
|
||||
## Skill routing
|
||||
|
||||
When the user's request matches an available skill, ALWAYS invoke it using the Skill
|
||||
tool as your FIRST action. Do NOT answer directly, do NOT use other tools first.
|
||||
The skill has specialized workflows that produce better results than ad-hoc answers.
|
||||
|
||||
Key routing rules:
|
||||
- Product ideas, "is this worth building", brainstorming → invoke office-hours
|
||||
- Bugs, errors, "why is this broken", 500 errors → invoke investigate
|
||||
- Ship, deploy, push, create PR → invoke ship
|
||||
- QA, test the site, find bugs → invoke qa
|
||||
- Code review, check my diff → invoke review
|
||||
- Update docs after shipping → invoke document-release
|
||||
- Weekly retro → invoke retro
|
||||
- Design system, brand → invoke design-consultation
|
||||
- Visual audit, design polish → invoke design-review
|
||||
- Architecture review → invoke plan-eng-review
|
||||
`);
|
||||
}
|
||||
|
||||
/** Init a git repo with config */
|
||||
|
||||
@@ -721,45 +721,8 @@ describe('investigate skill structure', () => {
|
||||
}
|
||||
});
|
||||
|
||||
// --- Contributor mode preamble structure validation ---
|
||||
|
||||
describe('Contributor mode preamble structure', () => {
|
||||
const skillsWithPreamble = [
|
||||
'SKILL.md', 'browse/SKILL.md', 'qa/SKILL.md',
|
||||
'qa-only/SKILL.md',
|
||||
'setup-browser-cookies/SKILL.md',
|
||||
'ship/SKILL.md', 'review/SKILL.md',
|
||||
'plan-ceo-review/SKILL.md', 'plan-eng-review/SKILL.md',
|
||||
'retro/SKILL.md',
|
||||
'plan-design-review/SKILL.md',
|
||||
'design-review/SKILL.md',
|
||||
'design-consultation/SKILL.md',
|
||||
'document-release/SKILL.md',
|
||||
'canary/SKILL.md',
|
||||
'benchmark/SKILL.md',
|
||||
'land-and-deploy/SKILL.md',
|
||||
'setup-deploy/SKILL.md',
|
||||
];
|
||||
|
||||
for (const skill of skillsWithPreamble) {
|
||||
test(`${skill} has 0-10 rating in contributor mode`, () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill), 'utf-8');
|
||||
expect(content).toContain('0-10');
|
||||
expect(content).toContain('Rating');
|
||||
});
|
||||
|
||||
test(`${skill} has "what would make this a 10" field`, () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill), 'utf-8');
|
||||
expect(content).toContain('What would make this a 10');
|
||||
});
|
||||
|
||||
test(`${skill} uses periodic reflection (not per-command)`, () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill), 'utf-8');
|
||||
expect(content).toContain('workflow step');
|
||||
expect(content).not.toContain('After you use gstack-provided CLIs');
|
||||
});
|
||||
}
|
||||
});
|
||||
// Contributor mode was removed in v0.13.10.0 — replaced by operational self-improvement.
|
||||
// Tests for contributor mode preamble structure are no longer applicable.
|
||||
|
||||
describe('Enum & Value Completeness in review checklist', () => {
|
||||
const checklist = fs.readFileSync(path.join(ROOT, 'review', 'checklist.md'), 'utf-8');
|
||||
@@ -1291,38 +1254,49 @@ describe('Codex skill', () => {
|
||||
expect(content).toContain('mktemp');
|
||||
});
|
||||
|
||||
test('adversarial review in /review auto-scales by diff size', () => {
|
||||
test('adversarial review in /review always runs both passes', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Adversarial review (auto-scaled)');
|
||||
// Diff size thresholds
|
||||
expect(content).toContain('< 50');
|
||||
expect(content).toContain('50–199');
|
||||
expect(content).toContain('200+');
|
||||
// All three tiers present
|
||||
expect(content).toContain('Small');
|
||||
expect(content).toContain('Medium tier');
|
||||
expect(content).toContain('Large tier');
|
||||
expect(content).toContain('Adversarial review (always-on)');
|
||||
// Always-on: both Claude and Codex adversarial
|
||||
expect(content).toContain('Claude adversarial subagent (always runs)');
|
||||
expect(content).toContain('Codex adversarial challenge (always runs when available)');
|
||||
// Claude adversarial subagent dispatch
|
||||
expect(content).toContain('Agent tool');
|
||||
expect(content).toContain('FIXABLE');
|
||||
expect(content).toContain('INVESTIGATE');
|
||||
// Codex fallback logic
|
||||
// Codex availability check
|
||||
expect(content).toContain('CODEX_NOT_AVAILABLE');
|
||||
expect(content).toContain('fall back to the Claude adversarial subagent');
|
||||
// Review log uses new skill name
|
||||
// OLD_CFG only gates Codex, not Claude
|
||||
expect(content).toContain('skip Codex passes only');
|
||||
// Review log
|
||||
expect(content).toContain('adversarial-review');
|
||||
expect(content).toContain('reasoning_effort="high"');
|
||||
expect(content).toContain('ADVERSARIAL REVIEW SYNTHESIS');
|
||||
// Large diff structured review still gated
|
||||
expect(content).toContain('Codex structured review (large diffs only');
|
||||
expect(content).toContain('200');
|
||||
});
|
||||
|
||||
test('adversarial review in /ship auto-scales by diff size', () => {
|
||||
test('adversarial review in /ship always runs both passes', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Adversarial review (auto-scaled)');
|
||||
expect(content).toContain('< 50');
|
||||
expect(content).toContain('200+');
|
||||
expect(content).toContain('Adversarial review (always-on)');
|
||||
expect(content).toContain('adversarial-review');
|
||||
expect(content).toContain('reasoning_effort="high"');
|
||||
expect(content).toContain('Investigate and fix');
|
||||
expect(content).toContain('Claude adversarial subagent (always runs)');
|
||||
});
|
||||
|
||||
test('scope drift detection in /review and /ship', () => {
|
||||
const reviewContent = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
const shipContent = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
// Both should contain scope drift from the shared resolver
|
||||
for (const content of [reviewContent, shipContent]) {
|
||||
expect(content).toContain('Scope Check:');
|
||||
expect(content).toContain('DRIFT DETECTED');
|
||||
expect(content).toContain('SCOPE CREEP');
|
||||
expect(content).toContain('MISSING REQUIREMENTS');
|
||||
expect(content).toContain('stated intent');
|
||||
}
|
||||
});
|
||||
|
||||
test('codex-host ship/review do NOT contain adversarial review step', () => {
|
||||
@@ -1395,13 +1369,13 @@ describe('Skill trigger phrases', () => {
|
||||
];
|
||||
|
||||
for (const skill of SKILLS_REQUIRING_PROACTIVE) {
|
||||
test(`${skill}/SKILL.md has "Proactively suggest" phrase`, () => {
|
||||
test(`${skill}/SKILL.md has proactive routing phrase`, () => {
|
||||
const skillPath = path.join(ROOT, skill, 'SKILL.md');
|
||||
if (!fs.existsSync(skillPath)) return;
|
||||
const content = fs.readFileSync(skillPath, 'utf-8');
|
||||
const frontmatterEnd = content.indexOf('---', 4);
|
||||
const frontmatter = content.slice(0, frontmatterEnd);
|
||||
expect(frontmatter).toMatch(/Proactively suggest/i);
|
||||
expect(frontmatter).toMatch(/Proactively (suggest|invoke)/i);
|
||||
});
|
||||
}
|
||||
});
|
||||
@@ -1533,3 +1507,51 @@ describe('Test failure triage in ship skill', () => {
|
||||
expect(content).toContain('In-branch test failures');
|
||||
});
|
||||
});
|
||||
|
||||
describe('no compiled binaries in git', () => {
|
||||
test('git tracks no Mach-O or ELF binaries', () => {
|
||||
const result = require('child_process').execSync(
|
||||
'git ls-files -z | xargs -0 file --mime-type 2>/dev/null | grep -E "application/(x-mach-binary|x-executable|x-pie-executable|x-sharedlib)" || true',
|
||||
{ cwd: ROOT, encoding: 'utf-8' }
|
||||
).trim();
|
||||
const files = result ? result.split('\n').map((l: string) => l.split(':')[0].trim()) : [];
|
||||
expect(files).toEqual([]);
|
||||
});
|
||||
|
||||
test('git tracks no files larger than 2MB', () => {
|
||||
const result = require('child_process').execSync(
|
||||
'git ls-files -z | xargs -0 -I{} sh -c \'size=$(wc -c < "{}" 2>/dev/null | tr -d " "); [ "$size" -gt 2097152 ] 2>/dev/null && echo "{}:${size}"\' || true',
|
||||
{ cwd: ROOT, encoding: 'utf-8' }
|
||||
).trim();
|
||||
const files = result ? result.split('\n').filter(Boolean) : [];
|
||||
expect(files).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('sidebar agent (#584)', () => {
|
||||
// #584 — Sidebar Write: sidebar-agent.ts allowedTools includes Write
|
||||
test('sidebar-agent.ts allowedTools includes Write', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'sidebar-agent.ts'), 'utf-8');
|
||||
// Find the allowedTools line in the askClaude function
|
||||
const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
|
||||
expect(match).not.toBeNull();
|
||||
expect(match![1]).toContain('Write');
|
||||
});
|
||||
|
||||
// #584 — Server Write: server.ts allowedTools includes Write (DRY parity)
|
||||
test('server.ts allowedTools excludes Write (agent is read-only + Bash)', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'server.ts'), 'utf-8');
|
||||
// Find the sidebar allowedTools in the headed-mode path
|
||||
const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
|
||||
expect(match).not.toBeNull();
|
||||
expect(match![1]).toContain('Bash');
|
||||
expect(match![1]).not.toContain('Write');
|
||||
});
|
||||
|
||||
// #584 — Sidebar stderr: stderr handler is not empty
|
||||
test('sidebar-agent.ts stderr handler is not empty', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'sidebar-agent.ts'), 'utf-8');
|
||||
// The stderr handler should NOT be an empty arrow function
|
||||
expect(content).not.toContain("proc.stderr.on('data', () => {})");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,339 @@
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { execSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const SETTINGS_HOOK = path.join(ROOT, 'bin', 'gstack-settings-hook');
|
||||
const SESSION_UPDATE = path.join(ROOT, 'bin', 'gstack-session-update');
|
||||
const TEAM_INIT = path.join(ROOT, 'bin', 'gstack-team-init');
|
||||
|
||||
function mkTmpDir(): string {
|
||||
return fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-team-test-'));
|
||||
}
|
||||
|
||||
function run(cmd: string, opts: { cwd?: string; env?: Record<string, string> } = {}): { stdout: string; stderr: string; exitCode: number } {
|
||||
try {
|
||||
const stdout = execSync(cmd, {
|
||||
cwd: opts.cwd,
|
||||
env: { ...process.env, ...opts.env },
|
||||
encoding: 'utf-8',
|
||||
timeout: 10000,
|
||||
});
|
||||
return { stdout, stderr: '', exitCode: 0 };
|
||||
} catch (e: any) {
|
||||
return { stdout: e.stdout || '', stderr: e.stderr || '', exitCode: e.status ?? 1 };
|
||||
}
|
||||
}
|
||||
|
||||
describe('gstack-settings-hook', () => {
|
||||
let tmpDir: string;
|
||||
let settingsFile: string;
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = mkTmpDir();
|
||||
settingsFile = path.join(tmpDir, 'settings.json');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('add creates settings.json if missing', () => {
|
||||
const result = run(`${SETTINGS_HOOK} add /path/to/gstack-session-update`, {
|
||||
env: { GSTACK_SETTINGS_FILE: settingsFile },
|
||||
});
|
||||
expect(result.exitCode).toBe(0);
|
||||
const settings = JSON.parse(fs.readFileSync(settingsFile, 'utf-8'));
|
||||
expect(settings.hooks.SessionStart).toHaveLength(1);
|
||||
expect(settings.hooks.SessionStart[0].hooks[0].command).toBe('/path/to/gstack-session-update');
|
||||
});
|
||||
|
||||
test('add preserves existing settings', () => {
|
||||
fs.writeFileSync(settingsFile, JSON.stringify({ effortLevel: 'high', permissions: { defaultMode: 'auto' } }, null, 2));
|
||||
const result = run(`${SETTINGS_HOOK} add /path/to/gstack-session-update`, {
|
||||
env: { GSTACK_SETTINGS_FILE: settingsFile },
|
||||
});
|
||||
expect(result.exitCode).toBe(0);
|
||||
const settings = JSON.parse(fs.readFileSync(settingsFile, 'utf-8'));
|
||||
expect(settings.effortLevel).toBe('high');
|
||||
expect(settings.permissions.defaultMode).toBe('auto');
|
||||
expect(settings.hooks.SessionStart).toHaveLength(1);
|
||||
});
|
||||
|
||||
test('add deduplicates (running twice does not double-add)', () => {
|
||||
run(`${SETTINGS_HOOK} add /path/to/gstack-session-update`, {
|
||||
env: { GSTACK_SETTINGS_FILE: settingsFile },
|
||||
});
|
||||
run(`${SETTINGS_HOOK} add /path/to/gstack-session-update`, {
|
||||
env: { GSTACK_SETTINGS_FILE: settingsFile },
|
||||
});
|
||||
const settings = JSON.parse(fs.readFileSync(settingsFile, 'utf-8'));
|
||||
expect(settings.hooks.SessionStart).toHaveLength(1);
|
||||
});
|
||||
|
||||
test('remove removes the hook', () => {
|
||||
run(`${SETTINGS_HOOK} add /path/to/gstack-session-update`, {
|
||||
env: { GSTACK_SETTINGS_FILE: settingsFile },
|
||||
});
|
||||
const result = run(`${SETTINGS_HOOK} remove /path/to/gstack-session-update`, {
|
||||
env: { GSTACK_SETTINGS_FILE: settingsFile },
|
||||
});
|
||||
expect(result.exitCode).toBe(0);
|
||||
const settings = JSON.parse(fs.readFileSync(settingsFile, 'utf-8'));
|
||||
expect(settings.hooks).toBeUndefined();
|
||||
});
|
||||
|
||||
test('remove is safe when settings.json does not exist', () => {
|
||||
const result = run(`${SETTINGS_HOOK} remove /path/to/gstack-session-update`, {
|
||||
env: { GSTACK_SETTINGS_FILE: settingsFile },
|
||||
});
|
||||
expect(result.exitCode).toBe(0);
|
||||
});
|
||||
|
||||
test('remove preserves other hooks', () => {
|
||||
fs.writeFileSync(settingsFile, JSON.stringify({
|
||||
hooks: {
|
||||
SessionStart: [
|
||||
{ hooks: [{ type: 'command', command: '/path/to/gstack-session-update' }] },
|
||||
{ hooks: [{ type: 'command', command: '/other/hook' }] },
|
||||
],
|
||||
},
|
||||
}, null, 2));
|
||||
run(`${SETTINGS_HOOK} remove /path/to/gstack-session-update`, {
|
||||
env: { GSTACK_SETTINGS_FILE: settingsFile },
|
||||
});
|
||||
const settings = JSON.parse(fs.readFileSync(settingsFile, 'utf-8'));
|
||||
expect(settings.hooks.SessionStart).toHaveLength(1);
|
||||
expect(settings.hooks.SessionStart[0].hooks[0].command).toBe('/other/hook');
|
||||
});
|
||||
|
||||
test('atomic write (no partial file on success)', () => {
|
||||
run(`${SETTINGS_HOOK} add /path/to/gstack-session-update`, {
|
||||
env: { GSTACK_SETTINGS_FILE: settingsFile },
|
||||
});
|
||||
// .tmp file should not exist after successful write
|
||||
expect(fs.existsSync(settingsFile + '.tmp')).toBe(false);
|
||||
// File should be valid JSON
|
||||
expect(() => JSON.parse(fs.readFileSync(settingsFile, 'utf-8'))).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-session-update', () => {
|
||||
let tmpDir: string;
|
||||
let gstackDir: string;
|
||||
let stateDir: string;
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = mkTmpDir();
|
||||
gstackDir = path.join(tmpDir, 'gstack');
|
||||
stateDir = path.join(tmpDir, 'state');
|
||||
fs.mkdirSync(gstackDir, { recursive: true });
|
||||
fs.mkdirSync(stateDir, { recursive: true });
|
||||
|
||||
// Init a git repo to pass the .git guard
|
||||
execSync('git init', { cwd: gstackDir });
|
||||
execSync('git commit --allow-empty -m "init"', { cwd: gstackDir });
|
||||
fs.writeFileSync(path.join(gstackDir, 'VERSION'), '0.1.0');
|
||||
|
||||
// Create a minimal gstack-config that returns auto_upgrade=true
|
||||
const binDir = path.join(gstackDir, 'bin');
|
||||
fs.mkdirSync(binDir, { recursive: true });
|
||||
fs.writeFileSync(path.join(binDir, 'gstack-config'), '#!/bin/bash\necho "true"');
|
||||
fs.chmodSync(path.join(binDir, 'gstack-config'), 0o755);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('exits 0 when .git is missing', () => {
|
||||
fs.rmSync(path.join(gstackDir, '.git'), { recursive: true });
|
||||
const result = run(SESSION_UPDATE, {
|
||||
env: { GSTACK_DIR: gstackDir, GSTACK_STATE_DIR: stateDir },
|
||||
});
|
||||
expect(result.exitCode).toBe(0);
|
||||
});
|
||||
|
||||
test('exits 0 when auto_upgrade is not true', () => {
|
||||
// Override gstack-config to return false
|
||||
fs.writeFileSync(path.join(gstackDir, 'bin', 'gstack-config'), '#!/bin/bash\necho "false"');
|
||||
const result = run(SESSION_UPDATE, {
|
||||
env: { GSTACK_DIR: gstackDir, GSTACK_STATE_DIR: stateDir },
|
||||
});
|
||||
expect(result.exitCode).toBe(0);
|
||||
});
|
||||
|
||||
test('throttle: skips when checked recently', () => {
|
||||
// Write a recent throttle timestamp
|
||||
const throttleFile = path.join(stateDir, '.last-session-update');
|
||||
fs.writeFileSync(throttleFile, String(Math.floor(Date.now() / 1000)));
|
||||
|
||||
const result = run(SESSION_UPDATE, {
|
||||
env: { GSTACK_DIR: gstackDir, GSTACK_STATE_DIR: stateDir },
|
||||
});
|
||||
expect(result.exitCode).toBe(0);
|
||||
// No log file should be created (throttled before forking)
|
||||
});
|
||||
|
||||
test('always exits 0 (non-fatal)', () => {
|
||||
// Even with a broken setup, should exit 0
|
||||
const result = run(SESSION_UPDATE, {
|
||||
env: { GSTACK_DIR: '/nonexistent/path', GSTACK_STATE_DIR: stateDir },
|
||||
});
|
||||
expect(result.exitCode).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-team-init', () => {
|
||||
let tmpDir: string;
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = mkTmpDir();
|
||||
execSync('git init', { cwd: tmpDir });
|
||||
execSync('git commit --allow-empty -m "init"', { cwd: tmpDir });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('errors without a mode argument', () => {
|
||||
const result = run(TEAM_INIT, { cwd: tmpDir });
|
||||
expect(result.exitCode).not.toBe(0);
|
||||
expect(result.stderr).toContain('Usage');
|
||||
});
|
||||
|
||||
test('errors outside a git repo', () => {
|
||||
const nonGitDir = mkTmpDir();
|
||||
const result = run(`${TEAM_INIT} optional`, { cwd: nonGitDir });
|
||||
expect(result.exitCode).not.toBe(0);
|
||||
expect(result.stderr).toContain('not in a git repository');
|
||||
fs.rmSync(nonGitDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('optional: creates CLAUDE.md with recommended section', () => {
|
||||
const result = run(`${TEAM_INIT} optional`, { cwd: tmpDir });
|
||||
expect(result.exitCode).toBe(0);
|
||||
const claude = fs.readFileSync(path.join(tmpDir, 'CLAUDE.md'), 'utf-8');
|
||||
expect(claude).toContain('## gstack (recommended)');
|
||||
expect(claude).toContain('./setup --team');
|
||||
});
|
||||
|
||||
test('required: creates CLAUDE.md with required section', () => {
|
||||
const result = run(`${TEAM_INIT} required`, { cwd: tmpDir });
|
||||
expect(result.exitCode).toBe(0);
|
||||
const claude = fs.readFileSync(path.join(tmpDir, 'CLAUDE.md'), 'utf-8');
|
||||
expect(claude).toContain('## gstack (REQUIRED');
|
||||
expect(claude).toContain('GSTACK_MISSING');
|
||||
});
|
||||
|
||||
test('required: creates enforcement hook', () => {
|
||||
run(`${TEAM_INIT} required`, { cwd: tmpDir });
|
||||
const hookPath = path.join(tmpDir, '.claude', 'hooks', 'check-gstack.sh');
|
||||
expect(fs.existsSync(hookPath)).toBe(true);
|
||||
const hook = fs.readFileSync(hookPath, 'utf-8');
|
||||
expect(hook).toContain('BLOCKED: gstack is not installed');
|
||||
// Should be executable
|
||||
const stat = fs.statSync(hookPath);
|
||||
expect(stat.mode & 0o111).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test('required: creates project settings.json with PreToolUse hook', () => {
|
||||
run(`${TEAM_INIT} required`, { cwd: tmpDir });
|
||||
const settingsPath = path.join(tmpDir, '.claude', 'settings.json');
|
||||
expect(fs.existsSync(settingsPath)).toBe(true);
|
||||
const settings = JSON.parse(fs.readFileSync(settingsPath, 'utf-8'));
|
||||
expect(settings.hooks.PreToolUse).toHaveLength(1);
|
||||
expect(settings.hooks.PreToolUse[0].matcher).toBe('Skill');
|
||||
expect(settings.hooks.PreToolUse[0].hooks[0].command).toContain('check-gstack');
|
||||
});
|
||||
|
||||
test('idempotent: running twice does not duplicate CLAUDE.md section', () => {
|
||||
run(`${TEAM_INIT} optional`, { cwd: tmpDir });
|
||||
run(`${TEAM_INIT} optional`, { cwd: tmpDir });
|
||||
const claude = fs.readFileSync(path.join(tmpDir, 'CLAUDE.md'), 'utf-8');
|
||||
const matches = claude.match(/## gstack/g);
|
||||
expect(matches).toHaveLength(1);
|
||||
});
|
||||
|
||||
test('removes vendored copy when present', () => {
|
||||
// Create a fake vendored gstack with VERSION file
|
||||
const vendoredDir = path.join(tmpDir, '.claude', 'skills', 'gstack');
|
||||
fs.mkdirSync(vendoredDir, { recursive: true });
|
||||
fs.writeFileSync(path.join(vendoredDir, 'VERSION'), '0.14.0.0');
|
||||
fs.writeFileSync(path.join(vendoredDir, 'README.md'), 'vendored');
|
||||
// Track it in git
|
||||
execSync('git add .claude/skills/gstack/', { cwd: tmpDir });
|
||||
execSync('git commit -m "add vendored gstack"', { cwd: tmpDir });
|
||||
|
||||
const result = run(`${TEAM_INIT} optional`, { cwd: tmpDir });
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.stdout).toContain('Found vendored gstack copy');
|
||||
expect(result.stdout).toContain('Removed vendored copy');
|
||||
// Vendored dir should be gone
|
||||
expect(fs.existsSync(vendoredDir)).toBe(false);
|
||||
// .gitignore should have the entry
|
||||
const gitignore = fs.readFileSync(path.join(tmpDir, '.gitignore'), 'utf-8');
|
||||
expect(gitignore).toContain('.claude/skills/gstack/');
|
||||
});
|
||||
|
||||
test('skips when no vendored copy present', () => {
|
||||
const result = run(`${TEAM_INIT} optional`, { cwd: tmpDir });
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.stdout).not.toContain('Found vendored gstack copy');
|
||||
});
|
||||
|
||||
test('skips when .claude/skills/gstack is a symlink', () => {
|
||||
// Create a symlink (not a real vendored copy)
|
||||
const skillsDir = path.join(tmpDir, '.claude', 'skills');
|
||||
fs.mkdirSync(skillsDir, { recursive: true });
|
||||
const targetDir = mkTmpDir();
|
||||
fs.writeFileSync(path.join(targetDir, 'VERSION'), '0.14.0.0');
|
||||
fs.symlinkSync(targetDir, path.join(skillsDir, 'gstack'));
|
||||
|
||||
const result = run(`${TEAM_INIT} optional`, { cwd: tmpDir });
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.stdout).not.toContain('Found vendored gstack copy');
|
||||
// Symlink should still exist
|
||||
expect(fs.lstatSync(path.join(skillsDir, 'gstack')).isSymbolicLink()).toBe(true);
|
||||
fs.rmSync(targetDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('does not duplicate .gitignore entry on re-run', () => {
|
||||
// Create vendored copy
|
||||
const vendoredDir = path.join(tmpDir, '.claude', 'skills', 'gstack');
|
||||
fs.mkdirSync(vendoredDir, { recursive: true });
|
||||
fs.writeFileSync(path.join(vendoredDir, 'VERSION'), '0.14.0.0');
|
||||
execSync('git add .claude/skills/gstack/', { cwd: tmpDir });
|
||||
execSync('git commit -m "add vendored"', { cwd: tmpDir });
|
||||
|
||||
run(`${TEAM_INIT} optional`, { cwd: tmpDir });
|
||||
|
||||
// Re-create vendored dir to simulate re-run scenario
|
||||
fs.mkdirSync(vendoredDir, { recursive: true });
|
||||
fs.writeFileSync(path.join(vendoredDir, 'VERSION'), '0.14.0.0');
|
||||
run(`${TEAM_INIT} optional`, { cwd: tmpDir });
|
||||
|
||||
const gitignore = fs.readFileSync(path.join(tmpDir, '.gitignore'), 'utf-8');
|
||||
const matches = gitignore.match(/\.claude\/skills\/gstack\//g);
|
||||
expect(matches).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('setup --team / --no-team / -q', () => {
|
||||
test('setup -q produces no stdout', () => {
|
||||
const result = run(`${path.join(ROOT, 'setup')} -q`, { cwd: ROOT });
|
||||
// -q should suppress informational output (may still have some output from build)
|
||||
// The key test is that the "Skill naming:" prompt and "gstack ready" messages are suppressed
|
||||
expect(result.stdout).not.toContain('Skill naming:');
|
||||
expect(result.stdout).not.toContain('gstack ready');
|
||||
});
|
||||
|
||||
test('setup --local prints deprecation warning', () => {
|
||||
// stderr capture: run via bash redirect so we can capture stderr
|
||||
const result = run(`bash -c '${path.join(ROOT, 'setup')} --local -q 2>&1'`, { cwd: ROOT });
|
||||
expect(result.stdout).toContain('deprecated');
|
||||
});
|
||||
});
|
||||
@@ -396,3 +396,25 @@ describe('gstack-community-dashboard', () => {
|
||||
expect(output).not.toContain('Supabase not configured');
|
||||
});
|
||||
});
|
||||
|
||||
describe('preamble telemetry gating (#467)', () => {
|
||||
test('preamble source does not write JSONL unconditionally', () => {
|
||||
const preamble = fs.readFileSync(path.join(ROOT, 'scripts', 'resolvers', 'preamble.ts'), 'utf-8');
|
||||
const lines = preamble.split('\n');
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
if (lines[i].includes('skill-usage.jsonl') && lines[i].includes('>>')) {
|
||||
// Each JSONL write must be inside a _TEL conditional (within 5 lines above)
|
||||
let foundConditional = false;
|
||||
for (let j = i - 1; j >= Math.max(0, i - 5); j--) {
|
||||
if (lines[j].includes('_TEL') && lines[j].includes('off')) {
|
||||
foundConditional = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!foundConditional) {
|
||||
throw new Error(`Unconditional JSONL write at preamble.ts line ${i + 1}: ${lines[i].trim()}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,154 @@
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import { execSync, ExecSyncOptionsWithStringEncoding } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BIN = path.join(ROOT, 'bin');
|
||||
|
||||
let tmpDir: string;
|
||||
let slugDir: string;
|
||||
|
||||
function runLog(input: string, opts: { expectFail?: boolean } = {}): { stdout: string; exitCode: number } {
|
||||
const execOpts: ExecSyncOptionsWithStringEncoding = {
|
||||
cwd: ROOT,
|
||||
env: { ...process.env, GSTACK_HOME: tmpDir },
|
||||
encoding: 'utf-8',
|
||||
timeout: 15000,
|
||||
};
|
||||
try {
|
||||
const stdout = execSync(`${BIN}/gstack-timeline-log '${input.replace(/'/g, "'\\''")}'`, execOpts).trim();
|
||||
return { stdout, exitCode: 0 };
|
||||
} catch (e: any) {
|
||||
if (opts.expectFail) {
|
||||
return { stdout: e.stderr?.toString() || '', exitCode: e.status || 1 };
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
function runRead(args: string = ''): string {
|
||||
const execOpts: ExecSyncOptionsWithStringEncoding = {
|
||||
cwd: ROOT,
|
||||
env: { ...process.env, GSTACK_HOME: tmpDir },
|
||||
encoding: 'utf-8',
|
||||
timeout: 15000,
|
||||
};
|
||||
try {
|
||||
return execSync(`${BIN}/gstack-timeline-read ${args}`, execOpts).trim();
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-timeline-'));
|
||||
slugDir = path.join(tmpDir, 'projects');
|
||||
fs.mkdirSync(slugDir, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function findTimelineFile(): string | null {
|
||||
const projectDirs = fs.readdirSync(slugDir);
|
||||
if (projectDirs.length === 0) return null;
|
||||
const f = path.join(slugDir, projectDirs[0], 'timeline.jsonl');
|
||||
return fs.existsSync(f) ? f : null;
|
||||
}
|
||||
|
||||
describe('gstack-timeline-log', () => {
|
||||
test('accepts valid JSON and appends to timeline.jsonl', () => {
|
||||
const input = '{"skill":"review","event":"started","branch":"main"}';
|
||||
const result = runLog(input);
|
||||
expect(result.exitCode).toBe(0);
|
||||
|
||||
const f = findTimelineFile();
|
||||
expect(f).not.toBeNull();
|
||||
const content = fs.readFileSync(f!, 'utf-8').trim();
|
||||
const parsed = JSON.parse(content);
|
||||
expect(parsed.skill).toBe('review');
|
||||
expect(parsed.event).toBe('started');
|
||||
expect(parsed.branch).toBe('main');
|
||||
});
|
||||
|
||||
test('rejects invalid JSON with exit 0 (non-blocking)', () => {
|
||||
const result = runLog('not json at all');
|
||||
expect(result.exitCode).toBe(0);
|
||||
|
||||
// No file should be created
|
||||
const f = findTimelineFile();
|
||||
expect(f).toBeNull();
|
||||
});
|
||||
|
||||
test('injects timestamp when ts field is missing', () => {
|
||||
const input = '{"skill":"review","event":"started","branch":"main"}';
|
||||
runLog(input);
|
||||
|
||||
const f = findTimelineFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.ts).toBeDefined();
|
||||
expect(new Date(parsed.ts).getTime()).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test('preserves timestamp when ts field is present', () => {
|
||||
const input = '{"skill":"review","event":"completed","branch":"main","ts":"2025-06-15T10:00:00Z"}';
|
||||
runLog(input);
|
||||
|
||||
const f = findTimelineFile();
|
||||
expect(f).not.toBeNull();
|
||||
const parsed = JSON.parse(fs.readFileSync(f!, 'utf-8').trim());
|
||||
expect(parsed.ts).toBe('2025-06-15T10:00:00Z');
|
||||
});
|
||||
|
||||
test('validates required fields (skill, event) - exits 0 if missing skill', () => {
|
||||
const result = runLog('{"event":"started","branch":"main"}');
|
||||
expect(result.exitCode).toBe(0);
|
||||
|
||||
const f = findTimelineFile();
|
||||
expect(f).toBeNull();
|
||||
});
|
||||
|
||||
test('validates required fields (skill, event) - exits 0 if missing event', () => {
|
||||
const result = runLog('{"skill":"review","branch":"main"}');
|
||||
expect(result.exitCode).toBe(0);
|
||||
|
||||
const f = findTimelineFile();
|
||||
expect(f).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-timeline-read', () => {
|
||||
test('returns empty output for missing file (exit 0)', () => {
|
||||
const output = runRead();
|
||||
expect(output).toBe('');
|
||||
});
|
||||
|
||||
test('filters by --branch', () => {
|
||||
runLog(JSON.stringify({ skill: 'review', event: 'completed', branch: 'feature-a', outcome: 'approved', ts: '2026-03-28T10:00:00Z' }));
|
||||
runLog(JSON.stringify({ skill: 'ship', event: 'completed', branch: 'feature-b', outcome: 'merged', ts: '2026-03-28T11:00:00Z' }));
|
||||
|
||||
const output = runRead('--branch feature-a');
|
||||
expect(output).toContain('review');
|
||||
expect(output).not.toContain('feature-b');
|
||||
});
|
||||
|
||||
test('limits output with --limit', () => {
|
||||
for (let i = 0; i < 5; i++) {
|
||||
runLog(JSON.stringify({ skill: 'review', event: 'completed', branch: 'main', outcome: 'approved', ts: `2026-03-2${i}T10:00:00Z` }));
|
||||
}
|
||||
|
||||
const unlimited = runRead('--limit 20');
|
||||
const limited = runRead('--limit 2');
|
||||
|
||||
// Count event lines (lines starting with "- ")
|
||||
const unlimitedEvents = unlimited.split('\n').filter(l => l.startsWith('- ')).length;
|
||||
const limitedEvents = limited.split('\n').filter(l => l.startsWith('- ')).length;
|
||||
|
||||
expect(unlimitedEvents).toBe(5);
|
||||
expect(limitedEvents).toBe(2);
|
||||
});
|
||||
});
|
||||
@@ -101,7 +101,7 @@ describe('selectTests', () => {
|
||||
expect(result.reason).toBe('diff');
|
||||
// Should include tests that depend on gen-skill-docs.ts
|
||||
expect(result.selected).toContain('skillmd-setup-discovery');
|
||||
expect(result.selected).toContain('contributor-mode');
|
||||
expect(result.selected).toContain('session-awareness');
|
||||
expect(result.selected).toContain('journey-ideation');
|
||||
// Should NOT include tests that don't depend on it
|
||||
expect(result.selected).not.toContain('retro');
|
||||
@@ -144,7 +144,7 @@ describe('selectTests', () => {
|
||||
const result = selectTests(['SKILL.md.tmpl'], E2E_TOUCHFILES);
|
||||
// Should select the 7 tests that depend on root SKILL.md
|
||||
expect(result.selected).toContain('skillmd-setup-discovery');
|
||||
expect(result.selected).toContain('contributor-mode');
|
||||
expect(result.selected).toContain('session-awareness');
|
||||
expect(result.selected).toContain('session-awareness');
|
||||
// Also selects journey routing tests (SKILL.md.tmpl in their touchfiles)
|
||||
expect(result.selected).toContain('journey-ideation');
|
||||
|
||||
@@ -231,6 +231,9 @@ describe('WorktreeManager', () => {
|
||||
spawnSync('git', ['worktree', 'remove', '--force', oldPath], { cwd: repo, stdio: 'pipe' });
|
||||
// Recreate the directory to simulate orphaned state
|
||||
fs.mkdirSync(oldPath, { recursive: true });
|
||||
// Backdate mtime to simulate a stale worktree (> 1 hour old)
|
||||
const staleTime = new Date(Date.now() - 7200_000);
|
||||
fs.utimesSync(oldRunDir, staleTime, staleTime);
|
||||
|
||||
// New manager should prune the old run's directory
|
||||
const newMgr = new WorktreeManager(repo);
|
||||
|
||||
Reference in New Issue
Block a user