mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-05 05:05:08 +02:00
Merge remote-tracking branch 'origin/main' into garrytan/elegance
# Conflicts: # SKILL.md # cso/SKILL.md # cso/SKILL.md.tmpl # scripts/gen-skill-docs.ts
This commit is contained in:
@@ -139,6 +139,9 @@ describeCodex('Codex E2E', () => {
|
||||
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(result.output.length).toBeGreaterThan(0);
|
||||
// Skill loading errors mean our generated SKILL.md files are broken
|
||||
expect(result.stderr).not.toContain('invalid');
|
||||
expect(result.stderr).not.toContain('Skipped loading');
|
||||
// The output should reference the skill name in some form
|
||||
const outputLower = result.output.toLowerCase();
|
||||
expect(
|
||||
|
||||
@@ -76,7 +76,7 @@ if (evalsEnabled && !process.env.EVALS_ALL) {
|
||||
/** Skip an individual test if not selected by diff-based selection. */
|
||||
function testIfSelected(testName: string, fn: () => Promise<void>, timeout: number) {
|
||||
const shouldRun = selectedTests === null || selectedTests.includes(testName);
|
||||
(shouldRun ? test : test.skip)(testName, fn, timeout);
|
||||
(shouldRun ? test.concurrent : test.skip)(testName, fn, timeout);
|
||||
}
|
||||
|
||||
// --- Eval result collector ---
|
||||
|
||||
@@ -139,6 +139,25 @@ describe('gen-skill-docs', () => {
|
||||
}
|
||||
});
|
||||
|
||||
test(`every Codex SKILL.md description stays within ${MAX_SKILL_DESCRIPTION_LENGTH} chars`, () => {
|
||||
const agentsDir = path.join(ROOT, '.agents', 'skills');
|
||||
if (!fs.existsSync(agentsDir)) return; // skip if not generated
|
||||
for (const entry of fs.readdirSync(agentsDir, { withFileTypes: true })) {
|
||||
if (!entry.isDirectory()) continue;
|
||||
const skillMd = path.join(agentsDir, entry.name, 'SKILL.md');
|
||||
if (!fs.existsSync(skillMd)) continue;
|
||||
const content = fs.readFileSync(skillMd, 'utf-8');
|
||||
const description = extractDescription(content);
|
||||
expect(description.length).toBeLessThanOrEqual(MAX_SKILL_DESCRIPTION_LENGTH);
|
||||
}
|
||||
});
|
||||
|
||||
test('package.json version matches VERSION file', () => {
|
||||
const pkg = JSON.parse(fs.readFileSync(path.join(ROOT, 'package.json'), 'utf-8'));
|
||||
const version = fs.readFileSync(path.join(ROOT, 'VERSION'), 'utf-8').trim();
|
||||
expect(pkg.version).toBe(version);
|
||||
});
|
||||
|
||||
test('generated files are fresh (match --dry-run)', () => {
|
||||
const result = Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--dry-run'], {
|
||||
cwd: ROOT,
|
||||
@@ -214,6 +233,17 @@ describe('gen-skill-docs', () => {
|
||||
expect(content).toContain('~/.gstack/analytics');
|
||||
});
|
||||
|
||||
test('preamble .pending-* glob is zsh-safe (uses find, not shell glob)', () => {
|
||||
for (const skill of ALL_SKILLS) {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
|
||||
if (!content.includes('.pending-')) continue;
|
||||
// Must NOT have a bare shell glob ".pending-*" outside of find's -name argument
|
||||
expect(content).not.toMatch(/for _PF in [^\n]*\/\.pending-\*/);
|
||||
// Must use find to avoid zsh NOMATCH error on glob expansion
|
||||
expect(content).toContain("find ~/.gstack/analytics -maxdepth 1 -name '.pending-*'");
|
||||
}
|
||||
});
|
||||
|
||||
test('preamble-using skills have correct skill name in telemetry', () => {
|
||||
const PREAMBLE_SKILLS = [
|
||||
{ dir: '.', name: 'gstack' },
|
||||
@@ -407,6 +437,20 @@ describe('REVIEW_DASHBOARD resolver', () => {
|
||||
expect(content).toContain('REVIEW READINESS DASHBOARD');
|
||||
});
|
||||
|
||||
test('dashboard treats review as a valid Eng Review source', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('plan-eng-review, review, plan-design-review');
|
||||
expect(content).toContain('`review` (diff-scoped pre-landing review)');
|
||||
expect(content).toContain('`plan-eng-review` (plan-stage architecture review)');
|
||||
expect(content).toContain('from either \\`review\\` or \\`plan-eng-review\\`');
|
||||
});
|
||||
|
||||
test('shared dashboard propagates review source to plan-eng-review', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('plan-eng-review, review, plan-design-review');
|
||||
expect(content).toContain('`review` (diff-scoped pre-landing review)');
|
||||
});
|
||||
|
||||
test('resolver output contains key dashboard elements', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('VERDICT');
|
||||
@@ -936,6 +980,14 @@ describe('Codex generation (--host codex)', () => {
|
||||
}
|
||||
});
|
||||
|
||||
test('root gstack bundle has OpenAI metadata for Codex skill browsing', () => {
|
||||
const rootMetadata = path.join(ROOT, 'agents', 'openai.yaml');
|
||||
expect(fs.existsSync(rootMetadata)).toBe(true);
|
||||
const content = fs.readFileSync(rootMetadata, 'utf-8');
|
||||
expect(content).toContain('display_name: "gstack"');
|
||||
expect(content).toContain('Use $gstack to locate the bundled gstack skills.');
|
||||
});
|
||||
|
||||
test('codexSkillName mapping: root is gstack, others are gstack-{dir}', () => {
|
||||
// Root → gstack
|
||||
expect(fs.existsSync(path.join(AGENTS_DIR, 'gstack', 'SKILL.md'))).toBe(true);
|
||||
@@ -965,6 +1017,17 @@ describe('Codex generation (--host codex)', () => {
|
||||
}
|
||||
});
|
||||
|
||||
test('all Codex skills have agents/openai.yaml metadata', () => {
|
||||
for (const skill of CODEX_SKILLS) {
|
||||
const metadata = path.join(AGENTS_DIR, skill.codexName, 'agents', 'openai.yaml');
|
||||
expect(fs.existsSync(metadata)).toBe(true);
|
||||
const content = fs.readFileSync(metadata, 'utf-8');
|
||||
expect(content).toContain(`display_name: "${skill.codexName}"`);
|
||||
expect(content).toContain('short_description:');
|
||||
expect(content).toContain('allow_implicit_invocation: true');
|
||||
}
|
||||
});
|
||||
|
||||
test('no .claude/skills/ in Codex output', () => {
|
||||
for (const skill of CODEX_SKILLS) {
|
||||
const content = fs.readFileSync(path.join(AGENTS_DIR, skill.codexName, 'SKILL.md'), 'utf-8');
|
||||
|
||||
@@ -27,6 +27,7 @@ export interface CodexResult {
|
||||
durationMs: number; // Wall clock time
|
||||
sessionId: string | null; // Thread ID for session continuity
|
||||
rawLines: string[]; // Raw JSONL lines for debugging
|
||||
stderr: string; // Stderr output (skill loading errors, auth failures)
|
||||
}
|
||||
|
||||
// --- JSONL parser (ported from Python in codex/SKILL.md.tmpl) ---
|
||||
@@ -98,7 +99,8 @@ export function parseCodexJSONL(lines: string[]): ParsedCodexJSONL {
|
||||
|
||||
/**
|
||||
* Install a SKILL.md into a temp HOME directory for Codex to discover.
|
||||
* Creates ~/.codex/skills/{skillName}/SKILL.md in the temp HOME.
|
||||
* Creates ~/.codex/skills/{skillName}/SKILL.md in the temp HOME and copies
|
||||
* agents/openai.yaml when present so Codex sees the same metadata as a real install.
|
||||
*
|
||||
* Returns the temp HOME path. Caller is responsible for cleanup.
|
||||
*/
|
||||
@@ -116,6 +118,13 @@ export function installSkillToTempHome(
|
||||
fs.copyFileSync(srcSkill, path.join(destDir, 'SKILL.md'));
|
||||
}
|
||||
|
||||
const srcOpenAIYaml = path.join(skillDir, 'agents', 'openai.yaml');
|
||||
if (fs.existsSync(srcOpenAIYaml)) {
|
||||
const destAgentsDir = path.join(destDir, 'agents');
|
||||
fs.mkdirSync(destAgentsDir, { recursive: true });
|
||||
fs.copyFileSync(srcOpenAIYaml, path.join(destAgentsDir, 'openai.yaml'));
|
||||
}
|
||||
|
||||
return home;
|
||||
}
|
||||
|
||||
@@ -159,6 +168,7 @@ export async function runCodexSkill(opts: {
|
||||
durationMs: Date.now() - startTime,
|
||||
sessionId: null,
|
||||
rawLines: [],
|
||||
stderr: '',
|
||||
};
|
||||
}
|
||||
|
||||
@@ -274,6 +284,7 @@ export async function runCodexSkill(opts: {
|
||||
durationMs,
|
||||
sessionId: parsed.sessionId,
|
||||
rawLines: collectedLines,
|
||||
stderr,
|
||||
};
|
||||
} finally {
|
||||
// Clean up temp HOME
|
||||
|
||||
@@ -83,6 +83,11 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
// Global discover
|
||||
'global-discover': ['bin/gstack-global-discover.ts', 'test/global-discover.test.ts'],
|
||||
|
||||
// CSO
|
||||
'cso-full-audit': ['cso/**'],
|
||||
'cso-diff-mode': ['cso/**'],
|
||||
'cso-infra-scope': ['cso/**'],
|
||||
|
||||
// Document-release
|
||||
'document-release': ['document-release/**'],
|
||||
|
||||
|
||||
@@ -0,0 +1,258 @@
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { runSkillTest } from './helpers/session-runner';
|
||||
import {
|
||||
ROOT, runId, evalsEnabled,
|
||||
describeIfSelected, logCost, recordE2E,
|
||||
createEvalCollector, finalizeEvalCollector,
|
||||
} from './helpers/e2e-helpers';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const evalCollector = createEvalCollector('e2e-cso');
|
||||
|
||||
afterAll(() => {
|
||||
finalizeEvalCollector(evalCollector);
|
||||
});
|
||||
|
||||
// --- CSO v2 E2E Tests ---
|
||||
|
||||
describeIfSelected('CSO v2 — full audit', ['cso-full-audit'], () => {
|
||||
let csoDir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
csoDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-cso-'));
|
||||
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: csoDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
|
||||
// Create a minimal app with a planted vulnerability
|
||||
fs.writeFileSync(path.join(csoDir, 'package.json'), JSON.stringify({
|
||||
name: 'cso-test-app',
|
||||
version: '1.0.0',
|
||||
dependencies: { express: '4.18.0' },
|
||||
}, null, 2));
|
||||
|
||||
// Planted vuln: hardcoded API key
|
||||
fs.writeFileSync(path.join(csoDir, 'server.ts'), `
|
||||
import express from 'express';
|
||||
const app = express();
|
||||
const API_KEY = "sk-1234567890abcdef1234567890abcdef";
|
||||
app.get('/api/data', (req, res) => {
|
||||
const id = req.query.id;
|
||||
res.json({ data: \`result for \${id}\` });
|
||||
});
|
||||
app.listen(3000);
|
||||
`);
|
||||
|
||||
// Planted vuln: .env tracked by git
|
||||
fs.writeFileSync(path.join(csoDir, '.env'), 'DATABASE_URL=postgres://admin:secretpass@prod.db.example.com:5432/myapp\n');
|
||||
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(csoDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/cso finds planted vulnerabilities', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read the file ${path.join(ROOT, 'cso', 'SKILL.md')} for the CSO skill instructions.
|
||||
|
||||
Run /cso on this repo (full daily audit, no flags).
|
||||
|
||||
IMPORTANT:
|
||||
- Do NOT use AskUserQuestion — skip any interactive prompts.
|
||||
- Focus on finding the planted vulnerabilities in this small repo.
|
||||
- Produce the SECURITY FINDINGS table.
|
||||
- Save the report to .gstack/security-reports/.`,
|
||||
workingDirectory: csoDir,
|
||||
maxTurns: 30,
|
||||
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob', 'Agent'],
|
||||
timeout: 300_000,
|
||||
});
|
||||
|
||||
logCost('cso', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
// Should detect hardcoded API key
|
||||
const output = result.output.toLowerCase();
|
||||
expect(
|
||||
output.includes('sk-') || output.includes('hardcoded') || output.includes('api key') || output.includes('api_key')
|
||||
).toBe(true);
|
||||
|
||||
// Should detect .env tracked by git
|
||||
expect(
|
||||
output.includes('.env') && (output.includes('tracked') || output.includes('gitignore'))
|
||||
).toBe(true);
|
||||
|
||||
// Should produce a findings table
|
||||
expect(
|
||||
output.includes('security findings') || output.includes('SECURITY FINDINGS')
|
||||
).toBe(true);
|
||||
|
||||
// Should save a report
|
||||
const reportDir = path.join(csoDir, '.gstack', 'security-reports');
|
||||
const reportExists = fs.existsSync(reportDir);
|
||||
if (reportExists) {
|
||||
const reports = fs.readdirSync(reportDir).filter(f => f.endsWith('.json'));
|
||||
expect(reports.length).toBeGreaterThanOrEqual(1);
|
||||
}
|
||||
|
||||
recordE2E(evalCollector, 'cso-full-audit', 'e2e-cso', result);
|
||||
}, 300_000);
|
||||
});
|
||||
|
||||
describeIfSelected('CSO v2 — diff mode', ['cso-diff-mode'], () => {
|
||||
let csoDiffDir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
csoDiffDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-cso-diff-'));
|
||||
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: csoDiffDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
|
||||
// Clean initial commit
|
||||
fs.writeFileSync(path.join(csoDiffDir, 'package.json'), JSON.stringify({
|
||||
name: 'cso-diff-test', version: '1.0.0',
|
||||
}, null, 2));
|
||||
fs.writeFileSync(path.join(csoDiffDir, 'app.ts'), 'console.log("hello");\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
// Feature branch with a vuln
|
||||
run('git', ['checkout', '-b', 'feat/add-webhook']);
|
||||
fs.writeFileSync(path.join(csoDiffDir, 'webhook.ts'), `
|
||||
import express from 'express';
|
||||
const app = express();
|
||||
// No signature verification!
|
||||
app.post('/webhook/stripe', (req, res) => {
|
||||
const event = req.body;
|
||||
processPayment(event);
|
||||
res.sendStatus(200);
|
||||
});
|
||||
`);
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'feat: add webhook']);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(csoDiffDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/cso --diff scopes to branch changes', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read the file ${path.join(ROOT, 'cso', 'SKILL.md')} for the CSO skill instructions.
|
||||
|
||||
Run /cso --diff on this repo. The base branch is "main".
|
||||
|
||||
IMPORTANT:
|
||||
- Do NOT use AskUserQuestion — skip any interactive prompts.
|
||||
- Focus on changes in the current branch vs main.
|
||||
- The webhook.ts file was added on this branch — it should be analyzed.`,
|
||||
workingDirectory: csoDiffDir,
|
||||
maxTurns: 25,
|
||||
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob', 'Agent'],
|
||||
timeout: 240_000,
|
||||
});
|
||||
|
||||
logCost('cso', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
const output = result.output.toLowerCase();
|
||||
// Should mention webhook and missing signature verification
|
||||
expect(
|
||||
output.includes('webhook') && (output.includes('signature') || output.includes('verify'))
|
||||
).toBe(true);
|
||||
|
||||
recordE2E(evalCollector, 'cso-diff-mode', 'e2e-cso', result);
|
||||
}, 240_000);
|
||||
});
|
||||
|
||||
describeIfSelected('CSO v2 — infra scope', ['cso-infra-scope'], () => {
|
||||
let csoInfraDir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
csoInfraDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-cso-infra-'));
|
||||
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: csoInfraDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
|
||||
// CI workflow with unpinned action
|
||||
fs.mkdirSync(path.join(csoInfraDir, '.github', 'workflows'), { recursive: true });
|
||||
fs.writeFileSync(path.join(csoInfraDir, '.github', 'workflows', 'ci.yml'), `
|
||||
name: CI
|
||||
on: [push]
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: some-third-party/action@main
|
||||
- run: echo "Building..."
|
||||
`);
|
||||
|
||||
// Dockerfile running as root
|
||||
fs.writeFileSync(path.join(csoInfraDir, 'Dockerfile'), `
|
||||
FROM node:20
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
RUN npm install
|
||||
EXPOSE 3000
|
||||
CMD ["node", "server.js"]
|
||||
`);
|
||||
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(csoInfraDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/cso --infra runs infrastructure phases only', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read the file ${path.join(ROOT, 'cso', 'SKILL.md')} for the CSO skill instructions.
|
||||
|
||||
Run /cso --infra on this repo. This should run infrastructure-only phases (0-6, 12-14).
|
||||
|
||||
IMPORTANT:
|
||||
- Do NOT use AskUserQuestion — skip any interactive prompts.
|
||||
- This is a TINY repo with only 3 files: .github/workflows/ci.yml, Dockerfile, and package.json. Do NOT waste turns exploring — just read those files directly and audit them.
|
||||
- The Dockerfile has no USER directive (runs as root). The CI workflow uses an unpinned third-party GitHub Action (some-third-party/action@main).
|
||||
- Focus on infrastructure findings, NOT code-level OWASP scanning.
|
||||
- Skip the preamble (gstack-update-check, telemetry, etc.) — go straight to the audit.
|
||||
- Do NOT use the Agent tool for exploration or verification — read the files yourself. This repo is too small to need subagents.`,
|
||||
workingDirectory: csoInfraDir,
|
||||
maxTurns: 30,
|
||||
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
|
||||
timeout: 360_000,
|
||||
});
|
||||
|
||||
logCost('cso', result);
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
const output = result.output.toLowerCase();
|
||||
// Should mention unpinned action or Dockerfile issues
|
||||
expect(
|
||||
output.includes('unpinned') || output.includes('third-party') ||
|
||||
output.includes('user directive') || output.includes('root')
|
||||
).toBe(true);
|
||||
|
||||
recordE2E(evalCollector, 'cso-infra-scope', 'e2e-cso', result);
|
||||
}, 360_000);
|
||||
});
|
||||
@@ -44,7 +44,7 @@ describeIfSelected('Land-and-Deploy skill E2E', ['land-and-deploy-workflow'], ()
|
||||
try { fs.rmSync(landDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/land-and-deploy detects Fly.io platform and produces deploy report structure', async () => {
|
||||
testConcurrentIfSelected('land-and-deploy-workflow', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read land-and-deploy/SKILL.md for the /land-and-deploy skill instructions.
|
||||
|
||||
@@ -110,7 +110,7 @@ describeIfSelected('Canary skill E2E', ['canary-workflow'], () => {
|
||||
try { fs.rmSync(canaryDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/canary skill produces monitoring report structure', async () => {
|
||||
testConcurrentIfSelected('canary-workflow', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read canary/SKILL.md for the /canary skill instructions.
|
||||
|
||||
@@ -171,7 +171,7 @@ describeIfSelected('Benchmark skill E2E', ['benchmark-workflow'], () => {
|
||||
try { fs.rmSync(benchDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/benchmark skill produces performance report structure', async () => {
|
||||
testConcurrentIfSelected('benchmark-workflow', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read benchmark/SKILL.md for the /benchmark skill instructions.
|
||||
|
||||
@@ -237,7 +237,7 @@ describeIfSelected('Setup-Deploy skill E2E', ['setup-deploy-workflow'], () => {
|
||||
try { fs.rmSync(setupDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/setup-deploy detects Fly.io and writes config to CLAUDE.md', async () => {
|
||||
testConcurrentIfSelected('setup-deploy-workflow', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read setup-deploy/SKILL.md for the /setup-deploy skill instructions.
|
||||
|
||||
|
||||
@@ -560,7 +560,7 @@ describeIfSelected('Design Review E2E', ['design-review-fix'], () => {
|
||||
try { fs.rmSync(qaDesignDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('Test 7: /design-review audits and fixes design issues', async () => {
|
||||
testConcurrentIfSelected('design-review-fix', async () => {
|
||||
const serverUrl = `http://localhost:${(qaDesignServer as any)?.port}`;
|
||||
|
||||
const result = await runSkillTest({
|
||||
|
||||
@@ -66,7 +66,7 @@ We're building a new user dashboard that shows recent activity, notifications, a
|
||||
try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/plan-ceo-review produces structured review output', async () => {
|
||||
testConcurrentIfSelected('plan-ceo-review', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read plan-ceo-review/SKILL.md for the review workflow.
|
||||
|
||||
@@ -150,7 +150,7 @@ We're building a new user dashboard that shows recent activity, notifications, a
|
||||
try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/plan-ceo-review SELECTIVE EXPANSION produces structured review output', async () => {
|
||||
testConcurrentIfSelected('plan-ceo-review-selective', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read plan-ceo-review/SKILL.md for the review workflow.
|
||||
|
||||
@@ -244,7 +244,7 @@ Replace session-cookie auth with JWT tokens. Currently using express-session + R
|
||||
try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/plan-eng-review produces structured review output', async () => {
|
||||
testConcurrentIfSelected('plan-eng-review', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read plan-eng-review/SKILL.md for the review workflow.
|
||||
|
||||
@@ -364,7 +364,7 @@ export function main() { return Dashboard(); }
|
||||
} catch {}
|
||||
});
|
||||
|
||||
test('/plan-eng-review writes test-plan artifact to ~/.gstack/projects/', async () => {
|
||||
testConcurrentIfSelected('plan-eng-review-artifact', async () => {
|
||||
// Count existing test-plan files before
|
||||
const beforeFiles = fs.readdirSync(projectDir).filter(f => f.includes('test-plan'));
|
||||
|
||||
@@ -442,7 +442,7 @@ describeIfSelected('Office Hours Spec Review E2E', ['office-hours-spec-review'],
|
||||
try { fs.rmSync(ohDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/office-hours SKILL.md contains spec review loop', async () => {
|
||||
testConcurrentIfSelected('office-hours-spec-review', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read office-hours/SKILL.md. I want to understand the spec review loop.
|
||||
|
||||
@@ -502,7 +502,7 @@ describeIfSelected('Plan CEO Review Benefits-From E2E', ['plan-ceo-review-benefi
|
||||
try { fs.rmSync(benefitsDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/plan-ceo-review SKILL.md contains prerequisite skill offer', async () => {
|
||||
testConcurrentIfSelected('plan-ceo-review-benefits', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read plan-ceo-review/SKILL.md. Search for sections about "Prerequisite" or "office-hours" or "design doc found".
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import { outcomeJudge } from './helpers/llm-judge';
|
||||
import { judgePassed } from './helpers/eval-store';
|
||||
import {
|
||||
ROOT, browseBin, runId, evalsEnabled, selectedTests, hasApiKey,
|
||||
describeIfSelected, describeE2E,
|
||||
describeIfSelected, describeE2E, testConcurrentIfSelected,
|
||||
copyDirSync, setupBrowseShims, logCost, recordE2E, dumpOutcomeDiagnostic,
|
||||
createEvalCollector, finalizeEvalCollector,
|
||||
} from './helpers/e2e-helpers';
|
||||
@@ -172,17 +172,17 @@ CRITICAL RULES:
|
||||
}
|
||||
|
||||
// B6: Static dashboard — broken link, disabled submit, overflow, missing alt, console error
|
||||
test('/qa finds >= 2 of 5 planted bugs (static)', async () => {
|
||||
testConcurrentIfSelected('qa-b6-static', async () => {
|
||||
await runPlantedBugEval('qa-eval.html', 'qa-eval-ground-truth.json', 'b6-static');
|
||||
}, 360_000);
|
||||
|
||||
// B7: SPA — broken route, stale state, async race, missing aria, console warning
|
||||
test('/qa finds >= 2 of 5 planted SPA bugs', async () => {
|
||||
testConcurrentIfSelected('qa-b7-spa', async () => {
|
||||
await runPlantedBugEval('qa-eval-spa.html', 'qa-eval-spa-ground-truth.json', 'b7-spa');
|
||||
}, 360_000);
|
||||
|
||||
// B8: Checkout — email regex, NaN total, CC overflow, missing required, stripe error
|
||||
test('/qa finds >= 2 of 5 planted checkout bugs', async () => {
|
||||
testConcurrentIfSelected('qa-b8-checkout', async () => {
|
||||
await runPlantedBugEval('qa-eval-checkout.html', 'qa-eval-checkout-ground-truth.json', 'b8-checkout');
|
||||
}, 360_000);
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ describeIfSelected('QA skill E2E', ['qa-quick'], () => {
|
||||
try { fs.rmSync(qaDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/qa quick completes without browse errors', async () => {
|
||||
testConcurrentIfSelected('qa-quick', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `B="${browseBin}"
|
||||
|
||||
@@ -108,7 +108,7 @@ describeIfSelected('QA-Only skill E2E', ['qa-only-no-fix'], () => {
|
||||
try { fs.rmSync(qaOnlyDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/qa-only produces report without using Edit tool', async () => {
|
||||
testConcurrentIfSelected('qa-only-no-fix', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `IMPORTANT: The browse binary is already assigned below as B. Do NOT search for it or run the SKILL.md setup block — just use $B directly.
|
||||
|
||||
@@ -227,7 +227,7 @@ describeIfSelected('QA Fix Loop E2E', ['qa-fix-loop'], () => {
|
||||
try { fs.rmSync(qaFixDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/qa fix loop finds bugs and commits fixes', async () => {
|
||||
testConcurrentIfSelected('qa-fix-loop', async () => {
|
||||
const qaFixUrl = `http://127.0.0.1:${qaFixServer!.port}`;
|
||||
|
||||
const result = await runSkillTest({
|
||||
|
||||
@@ -51,7 +51,7 @@ describeIfSelected('Review skill E2E', ['review-sql-injection'], () => {
|
||||
try { fs.rmSync(reviewDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/review produces findings on SQL injection branch', async () => {
|
||||
testConcurrentIfSelected('review-sql-injection', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are in a git repo on a feature branch with changes against main.
|
||||
Read review-SKILL.md for the review workflow instructions.
|
||||
@@ -125,7 +125,7 @@ describeIfSelected('Review enum completeness E2E', ['review-enum-completeness'],
|
||||
try { fs.rmSync(enumDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/review catches missing enum handlers for new status value', async () => {
|
||||
testConcurrentIfSelected('review-enum-completeness', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are in a git repo on branch feature/add-returned-status with changes against main.
|
||||
Read review-SKILL.md for the review workflow instructions.
|
||||
@@ -200,7 +200,7 @@ describeIfSelected('Review design lite E2E', ['review-design-lite'], () => {
|
||||
try { fs.rmSync(designDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/review catches design anti-patterns in CSS/HTML diff', async () => {
|
||||
testConcurrentIfSelected('review-design-lite', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are in a git repo on branch feature/add-landing-page with changes against main.
|
||||
Read review-SKILL.md for the review workflow instructions.
|
||||
@@ -497,7 +497,7 @@ describeIfSelected('Retro E2E', ['retro'], () => {
|
||||
try { fs.rmSync(retroDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/retro produces analysis from git history', async () => {
|
||||
testConcurrentIfSelected('retro', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read retro/SKILL.md for instructions on how to run a retrospective.
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ describeIfSelected('Document-Release skill E2E', ['document-release'], () => {
|
||||
try { fs.rmSync(docReleaseDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/document-release updates docs without clobbering CHANGELOG', async () => {
|
||||
testConcurrentIfSelected('document-release', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read the file document-release/SKILL.md for the document-release workflow instructions.
|
||||
|
||||
@@ -461,7 +461,7 @@ describe('processPayment', () => {
|
||||
try { fs.rmSync(coverageDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/ship Step 3.4 produces coverage diagram', async () => {
|
||||
testConcurrentIfSelected('ship-coverage-audit', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read the file ship/SKILL.md for the ship workflow instructions.
|
||||
|
||||
@@ -544,7 +544,7 @@ describeIfSelected('Codex skill E2E', ['codex-review'], () => {
|
||||
try { fs.rmSync(codexDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/codex review produces findings and GATE verdict', async () => {
|
||||
testConcurrentIfSelected('codex-review', async () => {
|
||||
// Check codex is available — skip if not installed
|
||||
const codexCheck = spawnSync('which', ['codex'], { stdio: 'pipe', timeout: 3000 });
|
||||
if (codexCheck.status !== 0) {
|
||||
|
||||
@@ -56,7 +56,7 @@ function describeIfSelected(name: string, testNames: string[], fn: () => void) {
|
||||
/** Skip an individual test if not selected (for multi-test describe blocks). */
|
||||
function testIfSelected(testName: string, fn: () => Promise<void>, timeout: number) {
|
||||
const shouldRun = selectedTests === null || selectedTests.includes(testName);
|
||||
(shouldRun ? test : test.skip)(testName, fn, timeout);
|
||||
(shouldRun ? test.concurrent : test.skip)(testName, fn, timeout);
|
||||
}
|
||||
|
||||
describeIfSelected('LLM-as-judge quality evals', [
|
||||
|
||||
@@ -44,7 +44,11 @@ if (evalsEnabled && !process.env.EVALS_ALL) {
|
||||
|
||||
// --- Helper functions ---
|
||||
|
||||
/** Copy all SKILL.md files into tmpDir/.claude/skills/gstack/ for auto-discovery */
|
||||
/** Copy all SKILL.md files for auto-discovery.
|
||||
* Install to BOTH project-level (.claude/skills/) AND user-level (~/.claude/skills/)
|
||||
* because Claude Code discovers skills from both locations. In CI containers,
|
||||
* $HOME may differ from the working directory, so we need both paths to ensure
|
||||
* the Skill tool appears in Claude's available tools list. */
|
||||
function installSkills(tmpDir: string) {
|
||||
const skillDirs = [
|
||||
'', // root gstack SKILL.md
|
||||
@@ -54,15 +58,30 @@ function installSkills(tmpDir: string) {
|
||||
'gstack-upgrade', 'humanizer',
|
||||
];
|
||||
|
||||
// Install to both project-level and user-level skill directories
|
||||
const homeDir = process.env.HOME || os.homedir();
|
||||
const installTargets = [
|
||||
path.join(tmpDir, '.claude', 'skills'), // project-level
|
||||
path.join(homeDir, '.claude', 'skills'), // user-level (~/.claude/skills/)
|
||||
];
|
||||
|
||||
for (const skill of skillDirs) {
|
||||
const srcPath = path.join(ROOT, skill, 'SKILL.md');
|
||||
if (!fs.existsSync(srcPath)) continue;
|
||||
|
||||
const destDir = skill
|
||||
? path.join(tmpDir, '.claude', 'skills', 'gstack', skill)
|
||||
: path.join(tmpDir, '.claude', 'skills', 'gstack');
|
||||
fs.mkdirSync(destDir, { recursive: true });
|
||||
fs.copyFileSync(srcPath, path.join(destDir, 'SKILL.md'));
|
||||
const skillName = skill || 'gstack';
|
||||
|
||||
for (const targetBase of installTargets) {
|
||||
const destDir = path.join(targetBase, skillName);
|
||||
fs.mkdirSync(destDir, { recursive: true });
|
||||
fs.copyFileSync(srcPath, path.join(destDir, 'SKILL.md'));
|
||||
}
|
||||
}
|
||||
|
||||
// Copy CLAUDE.md so Claude has project context for skill routing.
|
||||
const claudeMdSrc = path.join(ROOT, 'CLAUDE.md');
|
||||
if (fs.existsSync(claudeMdSrc)) {
|
||||
fs.copyFileSync(claudeMdSrc, path.join(tmpDir, 'CLAUDE.md'));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,6 +94,31 @@ function initGitRepo(dir: string) {
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a routing test working directory.
|
||||
* Uses the actual repo checkout (ROOT) which has CLAUDE.md, .claude/skills/,
|
||||
* and full project context. This matches the local environment where routing
|
||||
* tests pass reliably. In containerized CI, bare tmpDirs lack the context
|
||||
* Claude needs to make correct routing decisions.
|
||||
*/
|
||||
function createRoutingWorkDir(suffix: string): string {
|
||||
// Clone the repo checkout into a tmpDir so concurrent tests don't interfere
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), `routing-${suffix}-`));
|
||||
// Copy essential context files
|
||||
const filesToCopy = ['CLAUDE.md', 'README.md', 'package.json', 'ETHOS.md'];
|
||||
for (const f of filesToCopy) {
|
||||
const src = path.join(ROOT, f);
|
||||
if (fs.existsSync(src)) fs.copyFileSync(src, path.join(tmpDir, f));
|
||||
}
|
||||
// Copy skill files
|
||||
installSkills(tmpDir);
|
||||
// Init git
|
||||
initGitRepo(tmpDir);
|
||||
spawnSync('git', ['add', '.'], { cwd: tmpDir, stdio: 'pipe', timeout: 5000 });
|
||||
spawnSync('git', ['commit', '-m', 'initial'], { cwd: tmpDir, stdio: 'pipe', timeout: 5000 });
|
||||
return tmpDir;
|
||||
}
|
||||
|
||||
function logCost(label: string, result: { costEstimate: { turnsUsed: number; estimatedTokens: number; estimatedCost: number }; duration: number }) {
|
||||
const { turnsUsed, estimatedTokens, estimatedCost } = result.costEstimate;
|
||||
const durationSec = Math.round(result.duration / 1000);
|
||||
@@ -104,13 +148,8 @@ describeE2E('Skill Routing E2E — Developer Journey', () => {
|
||||
});
|
||||
|
||||
test.concurrent('journey-ideation', async () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'routing-ideation-'));
|
||||
const tmpDir = createRoutingWorkDir('ideation');
|
||||
try {
|
||||
initGitRepo(tmpDir);
|
||||
installSkills(tmpDir);
|
||||
fs.writeFileSync(path.join(tmpDir, 'README.md'), '# New Project\n');
|
||||
spawnSync('git', ['add', '.'], { cwd: tmpDir, stdio: 'pipe', timeout: 5000 });
|
||||
spawnSync('git', ['commit', '-m', 'initial'], { cwd: tmpDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
const testName = 'journey-ideation';
|
||||
const expectedSkill = 'office-hours';
|
||||
@@ -138,10 +177,8 @@ describeE2E('Skill Routing E2E — Developer Journey', () => {
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-plan-eng', async () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'routing-plan-eng-'));
|
||||
const tmpDir = createRoutingWorkDir('plan-eng');
|
||||
try {
|
||||
initGitRepo(tmpDir);
|
||||
installSkills(tmpDir);
|
||||
fs.writeFileSync(path.join(tmpDir, 'plan.md'), `# Waitlist App Architecture
|
||||
|
||||
## Components
|
||||
@@ -190,10 +227,8 @@ describeE2E('Skill Routing E2E — Developer Journey', () => {
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-think-bigger', async () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'routing-think-bigger-'));
|
||||
const tmpDir = createRoutingWorkDir('think-bigger');
|
||||
try {
|
||||
initGitRepo(tmpDir);
|
||||
installSkills(tmpDir);
|
||||
fs.writeFileSync(path.join(tmpDir, 'plan.md'), `# Waitlist App Architecture
|
||||
|
||||
## Components
|
||||
@@ -242,11 +277,8 @@ describeE2E('Skill Routing E2E — Developer Journey', () => {
|
||||
}, 180_000);
|
||||
|
||||
test.concurrent('journey-debug', async () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'routing-debug-'));
|
||||
const tmpDir = createRoutingWorkDir('debug');
|
||||
try {
|
||||
initGitRepo(tmpDir);
|
||||
installSkills(tmpDir);
|
||||
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: tmpDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
@@ -302,11 +334,8 @@ export default app;
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-qa', async () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'routing-qa-'));
|
||||
const tmpDir = createRoutingWorkDir('qa');
|
||||
try {
|
||||
initGitRepo(tmpDir);
|
||||
installSkills(tmpDir);
|
||||
|
||||
fs.writeFileSync(path.join(tmpDir, 'package.json'), JSON.stringify({ name: 'waitlist-app', scripts: { dev: 'next dev' } }, null, 2));
|
||||
fs.mkdirSync(path.join(tmpDir, 'src'), { recursive: true });
|
||||
fs.writeFileSync(path.join(tmpDir, 'src/index.html'), '<html><body><h1>Waitlist App</h1></body></html>');
|
||||
@@ -341,17 +370,14 @@ export default app;
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-code-review', async () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'routing-code-review-'));
|
||||
const tmpDir = createRoutingWorkDir('code-review');
|
||||
try {
|
||||
initGitRepo(tmpDir);
|
||||
installSkills(tmpDir);
|
||||
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: tmpDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
fs.writeFileSync(path.join(tmpDir, 'app.ts'), '// base\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
run('git', ['commit', '-m', 'add base app']);
|
||||
run('git', ['checkout', '-b', 'feature/add-waitlist']);
|
||||
fs.writeFileSync(path.join(tmpDir, 'app.ts'), '// updated with waitlist feature\nimport { WaitlistService } from "./waitlist";\n');
|
||||
fs.writeFileSync(path.join(tmpDir, 'waitlist.ts'), 'export class WaitlistService {\n async addParty(name: string, size: number) {\n // TODO: implement\n }\n}\n');
|
||||
@@ -384,17 +410,14 @@ export default app;
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-ship', async () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'routing-ship-'));
|
||||
const tmpDir = createRoutingWorkDir('ship');
|
||||
try {
|
||||
initGitRepo(tmpDir);
|
||||
installSkills(tmpDir);
|
||||
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: tmpDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
fs.writeFileSync(path.join(tmpDir, 'app.ts'), '// base\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
run('git', ['commit', '-m', 'add base app']);
|
||||
run('git', ['checkout', '-b', 'feature/waitlist']);
|
||||
fs.writeFileSync(path.join(tmpDir, 'app.ts'), '// waitlist feature\n');
|
||||
run('git', ['add', '.']);
|
||||
@@ -426,11 +449,8 @@ export default app;
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-docs', async () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'routing-docs-'));
|
||||
const tmpDir = createRoutingWorkDir('docs');
|
||||
try {
|
||||
initGitRepo(tmpDir);
|
||||
installSkills(tmpDir);
|
||||
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: tmpDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
@@ -466,11 +486,8 @@ export default app;
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-retro', async () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'routing-retro-'));
|
||||
const tmpDir = createRoutingWorkDir('retro');
|
||||
try {
|
||||
initGitRepo(tmpDir);
|
||||
installSkills(tmpDir);
|
||||
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: tmpDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
@@ -512,17 +529,8 @@ export default app;
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-design-system', async () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'routing-design-system-'));
|
||||
const tmpDir = createRoutingWorkDir('design-system');
|
||||
try {
|
||||
initGitRepo(tmpDir);
|
||||
installSkills(tmpDir);
|
||||
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: tmpDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
fs.writeFileSync(path.join(tmpDir, 'package.json'), JSON.stringify({ name: 'waitlist-app' }, null, 2));
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
const testName = 'journey-design-system';
|
||||
const expectedSkill = 'design-consultation';
|
||||
@@ -550,11 +558,8 @@ export default app;
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-visual-qa', async () => {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'routing-visual-qa-'));
|
||||
const tmpDir = createRoutingWorkDir('visual-qa');
|
||||
try {
|
||||
initGitRepo(tmpDir);
|
||||
installSkills(tmpDir);
|
||||
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: tmpDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
|
||||
@@ -1362,6 +1362,18 @@ describe('Codex skill', () => {
|
||||
expect(content).toContain('codex exec');
|
||||
});
|
||||
|
||||
test('/review persists a review-log entry for ship readiness', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('"skill":"review"');
|
||||
expect(content).toContain('"issues_found":N');
|
||||
expect(content).toContain('Persist Eng Review result');
|
||||
});
|
||||
|
||||
test('/ship gate suggests /review or /plan-eng-review when Eng Review is missing', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Abort — run /review or /plan-eng-review first');
|
||||
});
|
||||
|
||||
test('Review Readiness Dashboard includes Adversarial Review row', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Adversarial');
|
||||
|
||||
Reference in New Issue
Block a user