mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-07 05:56:41 +02:00
Merge remote-tracking branch 'origin/main' into garrytan/gstack-eval-optimization
This commit is contained in:
@@ -343,9 +343,10 @@ describe('REVIEW_DASHBOARD resolver', () => {
|
||||
test('resolver output contains key dashboard elements', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('VERDICT');
|
||||
expect(content).toContain('CLEARED TO SHIP');
|
||||
expect(content).toContain('NOT YET RUN');
|
||||
expect(content).toContain('CLEARED');
|
||||
expect(content).toContain('Eng Review');
|
||||
expect(content).toContain('7 days');
|
||||
expect(content).toContain('Design Review');
|
||||
expect(content).toContain('skip_eng_review');
|
||||
});
|
||||
});
|
||||
|
||||
+348
-2
@@ -894,6 +894,89 @@ Focus on reviewing the plan content: architecture, error handling, security, and
|
||||
}, 420_000);
|
||||
});
|
||||
|
||||
// --- Plan CEO Review (SELECTIVE EXPANSION) E2E ---
|
||||
|
||||
describeE2E('Plan CEO Review SELECTIVE EXPANSION E2E', () => {
|
||||
let planDir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
planDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-plan-ceo-sel-'));
|
||||
const { spawnSync } = require('child_process');
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: planDir, stdio: 'pipe', timeout: 5000 });
|
||||
|
||||
run('git', ['init']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
|
||||
fs.writeFileSync(path.join(planDir, 'plan.md'), `# Plan: Add User Dashboard
|
||||
|
||||
## Context
|
||||
We're building a new user dashboard that shows recent activity, notifications, and quick actions.
|
||||
|
||||
## Changes
|
||||
1. New React component \`UserDashboard\` in \`src/components/\`
|
||||
2. REST API endpoint \`GET /api/dashboard\` returning user stats
|
||||
3. PostgreSQL query for activity aggregation
|
||||
4. Redis cache layer for dashboard data (5min TTL)
|
||||
|
||||
## Architecture
|
||||
- Frontend: React + TailwindCSS
|
||||
- Backend: Express.js REST API
|
||||
- Database: PostgreSQL with existing user/activity tables
|
||||
- Cache: Redis for dashboard aggregates
|
||||
|
||||
## Open questions
|
||||
- Should we use WebSocket for real-time updates?
|
||||
- How do we handle users with 100k+ activity records?
|
||||
`);
|
||||
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'add plan']);
|
||||
|
||||
fs.mkdirSync(path.join(planDir, 'plan-ceo-review'), { recursive: true });
|
||||
fs.copyFileSync(
|
||||
path.join(ROOT, 'plan-ceo-review', 'SKILL.md'),
|
||||
path.join(planDir, 'plan-ceo-review', 'SKILL.md'),
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/plan-ceo-review SELECTIVE EXPANSION produces structured review output', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read plan-ceo-review/SKILL.md for the review workflow.
|
||||
|
||||
Read plan.md — that's the plan to review. This is a standalone plan document, not a codebase — skip any codebase exploration or system audit steps.
|
||||
|
||||
Choose SELECTIVE EXPANSION mode. Skip any AskUserQuestion calls — this is non-interactive.
|
||||
For the cherry-pick ceremony, accept all expansion proposals automatically.
|
||||
Write your complete review directly to ${planDir}/review-output-selective.md
|
||||
|
||||
Focus on reviewing the plan content: architecture, error handling, security, and performance.`,
|
||||
workingDirectory: planDir,
|
||||
maxTurns: 15,
|
||||
timeout: 360_000,
|
||||
testName: 'plan-ceo-review-selective',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/plan-ceo-review (SELECTIVE)', result);
|
||||
recordE2E('/plan-ceo-review-selective', 'Plan CEO Review SELECTIVE EXPANSION E2E', result, {
|
||||
passed: ['success', 'error_max_turns'].includes(result.exitReason),
|
||||
});
|
||||
expect(['success', 'error_max_turns']).toContain(result.exitReason);
|
||||
|
||||
const reviewPath = path.join(planDir, 'review-output-selective.md');
|
||||
if (fs.existsSync(reviewPath)) {
|
||||
const review = fs.readFileSync(reviewPath, 'utf-8');
|
||||
expect(review.length).toBeGreaterThan(200);
|
||||
}
|
||||
}, 420_000);
|
||||
});
|
||||
|
||||
// --- Plan Eng Review E2E ---
|
||||
|
||||
describeIfSelected('Plan Eng Review E2E', ['plan-eng-review'], () => {
|
||||
@@ -962,7 +1045,7 @@ Replace session-cookie auth with JWT tokens. Currently using express-session + R
|
||||
|
||||
Read plan.md — that's the plan to review. This is a standalone plan document, not a codebase — skip any codebase exploration steps.
|
||||
|
||||
Choose SMALL CHANGE mode. Skip any AskUserQuestion calls — this is non-interactive.
|
||||
Proceed directly to the full review. Skip any AskUserQuestion calls — this is non-interactive.
|
||||
Write your complete review directly to ${planDir}/review-output.md
|
||||
|
||||
Focus on architecture, code quality, tests, and performance sections.`,
|
||||
@@ -1363,7 +1446,7 @@ export function main() { return Dashboard(); }
|
||||
|
||||
Read plan.md — that's the plan to review. This is a standalone plan with source code in app.ts and dashboard.ts.
|
||||
|
||||
Choose SMALL CHANGE mode. Skip any AskUserQuestion calls — this is non-interactive.
|
||||
Proceed directly to the full review. Skip any AskUserQuestion calls — this is non-interactive.
|
||||
|
||||
IMPORTANT: After your review, you MUST write the test-plan artifact as described in the "Test Plan Artifact" section of SKILL.md. The remote-slug shim is at ${planDir}/browse/bin/remote-slug.
|
||||
|
||||
@@ -2261,6 +2344,269 @@ Review the site at ${serverUrl}. Use --quick mode. Skip any AskUserQuestion call
|
||||
}, 420_000);
|
||||
});
|
||||
|
||||
// --- Test Bootstrap E2E ---
|
||||
|
||||
describeE2E('Test Bootstrap E2E', () => {
|
||||
let bootstrapDir: string;
|
||||
let bootstrapServer: ReturnType<typeof Bun.serve>;
|
||||
|
||||
beforeAll(() => {
|
||||
bootstrapDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-bootstrap-'));
|
||||
setupBrowseShims(bootstrapDir);
|
||||
|
||||
// Copy qa skill files
|
||||
copyDirSync(path.join(ROOT, 'qa'), path.join(bootstrapDir, 'qa'));
|
||||
|
||||
// Create a minimal Node.js project with NO test framework
|
||||
fs.writeFileSync(path.join(bootstrapDir, 'package.json'), JSON.stringify({
|
||||
name: 'test-bootstrap-app',
|
||||
version: '1.0.0',
|
||||
type: 'module',
|
||||
}, null, 2));
|
||||
|
||||
// Create a simple app file with a bug
|
||||
fs.writeFileSync(path.join(bootstrapDir, 'app.js'), `
|
||||
export function add(a, b) { return a + b; }
|
||||
export function subtract(a, b) { return a - b; }
|
||||
export function divide(a, b) { return a / b; } // BUG: no zero check
|
||||
`);
|
||||
|
||||
// Create a simple HTML page with a bug
|
||||
fs.writeFileSync(path.join(bootstrapDir, 'index.html'), `<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head><meta charset="utf-8"><title>Bootstrap Test</title></head>
|
||||
<body>
|
||||
<h1>Test App</h1>
|
||||
<a href="/nonexistent-page">Broken Link</a>
|
||||
<script>console.error("ReferenceError: undefinedVar is not defined");</script>
|
||||
</body>
|
||||
</html>
|
||||
`);
|
||||
|
||||
// Init git repo
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: bootstrapDir, stdio: 'pipe', timeout: 5000 });
|
||||
run('git', ['init']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial commit']);
|
||||
|
||||
// Serve from working directory
|
||||
bootstrapServer = Bun.serve({
|
||||
port: 0,
|
||||
hostname: '127.0.0.1',
|
||||
fetch(req) {
|
||||
const url = new URL(req.url);
|
||||
let filePath = url.pathname === '/' ? '/index.html' : url.pathname;
|
||||
filePath = filePath.replace(/^\//, '');
|
||||
const fullPath = path.join(bootstrapDir, filePath);
|
||||
if (!fs.existsSync(fullPath)) {
|
||||
return new Response('Not Found', { status: 404 });
|
||||
}
|
||||
const content = fs.readFileSync(fullPath, 'utf-8');
|
||||
return new Response(content, {
|
||||
headers: { 'Content-Type': 'text/html' },
|
||||
});
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
bootstrapServer?.stop();
|
||||
try { fs.rmSync(bootstrapDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/qa bootstrap + regression test on zero-test project', async () => {
|
||||
const serverUrl = `http://127.0.0.1:${bootstrapServer!.port}`;
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `You have a browse binary at ${browseBin}. Assign it to B variable like: B="${browseBin}"
|
||||
|
||||
Read the file qa/SKILL.md for the QA workflow instructions.
|
||||
|
||||
Run a Quick-tier QA test on ${serverUrl}
|
||||
The source code for this page is at ${bootstrapDir}/index.html — you can fix bugs there.
|
||||
Do NOT use AskUserQuestion — for any AskUserQuestion prompts, choose the RECOMMENDED option automatically.
|
||||
Write your report to ${bootstrapDir}/qa-reports/qa-report.md
|
||||
|
||||
This project has NO test framework. When the bootstrap asks, pick vitest (option A).
|
||||
This is a test+fix loop: find bugs, fix them, write regression tests, commit each fix.`,
|
||||
workingDirectory: bootstrapDir,
|
||||
maxTurns: 50,
|
||||
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Glob', 'Grep'],
|
||||
timeout: 420_000,
|
||||
testName: 'qa-bootstrap',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/qa bootstrap', result);
|
||||
recordE2E('/qa bootstrap + regression test', 'Test Bootstrap E2E', result, {
|
||||
passed: ['success', 'error_max_turns'].includes(result.exitReason),
|
||||
});
|
||||
|
||||
expect(['success', 'error_max_turns']).toContain(result.exitReason);
|
||||
|
||||
// Verify bootstrap created test infrastructure
|
||||
const hasTestConfig = fs.existsSync(path.join(bootstrapDir, 'vitest.config.ts'))
|
||||
|| fs.existsSync(path.join(bootstrapDir, 'vitest.config.js'))
|
||||
|| fs.existsSync(path.join(bootstrapDir, 'jest.config.js'))
|
||||
|| fs.existsSync(path.join(bootstrapDir, 'jest.config.ts'));
|
||||
console.log(`Test config created: ${hasTestConfig}`);
|
||||
|
||||
const hasTestingMd = fs.existsSync(path.join(bootstrapDir, 'TESTING.md'));
|
||||
console.log(`TESTING.md created: ${hasTestingMd}`);
|
||||
|
||||
// Check for bootstrap commit
|
||||
const gitLog = spawnSync('git', ['log', '--oneline', '--grep=bootstrap'], {
|
||||
cwd: bootstrapDir, stdio: 'pipe',
|
||||
});
|
||||
const bootstrapCommits = gitLog.stdout.toString().trim();
|
||||
console.log(`Bootstrap commits: ${bootstrapCommits || 'none'}`);
|
||||
|
||||
// Check for regression test commits
|
||||
const regressionLog = spawnSync('git', ['log', '--oneline', '--grep=test(qa)'], {
|
||||
cwd: bootstrapDir, stdio: 'pipe',
|
||||
});
|
||||
const regressionCommits = regressionLog.stdout.toString().trim();
|
||||
console.log(`Regression test commits: ${regressionCommits || 'none'}`);
|
||||
|
||||
// Verify at least the bootstrap happened (fix commits are bonus)
|
||||
const allCommits = spawnSync('git', ['log', '--oneline'], {
|
||||
cwd: bootstrapDir, stdio: 'pipe',
|
||||
});
|
||||
const totalCommits = allCommits.stdout.toString().trim().split('\n').length;
|
||||
console.log(`Total commits: ${totalCommits}`);
|
||||
expect(totalCommits).toBeGreaterThan(1); // At least initial + bootstrap
|
||||
}, 420_000);
|
||||
});
|
||||
|
||||
// --- Test Coverage Audit E2E ---
|
||||
|
||||
describeE2E('Test Coverage Audit E2E', () => {
|
||||
let coverageDir: string;
|
||||
|
||||
beforeAll(() => {
|
||||
coverageDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-coverage-'));
|
||||
|
||||
// Copy ship skill files
|
||||
copyDirSync(path.join(ROOT, 'ship'), path.join(coverageDir, 'ship'));
|
||||
copyDirSync(path.join(ROOT, 'review'), path.join(coverageDir, 'review'));
|
||||
|
||||
// Create a Node.js project WITH test framework but coverage gaps
|
||||
fs.writeFileSync(path.join(coverageDir, 'package.json'), JSON.stringify({
|
||||
name: 'test-coverage-app',
|
||||
version: '1.0.0',
|
||||
type: 'module',
|
||||
scripts: { test: 'echo "no tests yet"' },
|
||||
devDependencies: { vitest: '^1.0.0' },
|
||||
}, null, 2));
|
||||
|
||||
// Create vitest config
|
||||
fs.writeFileSync(path.join(coverageDir, 'vitest.config.ts'),
|
||||
`import { defineConfig } from 'vitest/config';\nexport default defineConfig({ test: {} });\n`);
|
||||
|
||||
fs.writeFileSync(path.join(coverageDir, 'VERSION'), '0.1.0.0\n');
|
||||
fs.writeFileSync(path.join(coverageDir, 'CHANGELOG.md'), '# Changelog\n');
|
||||
|
||||
// Create source file with multiple code paths
|
||||
fs.mkdirSync(path.join(coverageDir, 'src'), { recursive: true });
|
||||
fs.writeFileSync(path.join(coverageDir, 'src', 'billing.ts'), `
|
||||
export function processPayment(amount: number, currency: string) {
|
||||
if (amount <= 0) throw new Error('Invalid amount');
|
||||
if (currency !== 'USD' && currency !== 'EUR') throw new Error('Unsupported currency');
|
||||
return { status: 'success', amount, currency };
|
||||
}
|
||||
|
||||
export function refundPayment(paymentId: string, reason: string) {
|
||||
if (!paymentId) throw new Error('Payment ID required');
|
||||
if (!reason) throw new Error('Reason required');
|
||||
return { status: 'refunded', paymentId, reason };
|
||||
}
|
||||
`);
|
||||
|
||||
// Create a test directory with ONE test (partial coverage)
|
||||
fs.mkdirSync(path.join(coverageDir, 'test'), { recursive: true });
|
||||
fs.writeFileSync(path.join(coverageDir, 'test', 'billing.test.ts'), `
|
||||
import { describe, test, expect } from 'vitest';
|
||||
import { processPayment } from '../src/billing';
|
||||
|
||||
describe('processPayment', () => {
|
||||
test('processes valid payment', () => {
|
||||
const result = processPayment(100, 'USD');
|
||||
expect(result.status).toBe('success');
|
||||
});
|
||||
// GAP: no test for invalid amount
|
||||
// GAP: no test for unsupported currency
|
||||
// GAP: refundPayment not tested at all
|
||||
});
|
||||
`);
|
||||
|
||||
// Init git repo with main branch
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: coverageDir, stdio: 'pipe', timeout: 5000 });
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial commit']);
|
||||
|
||||
// Create feature branch
|
||||
run('git', ['checkout', '-b', 'feature/billing']);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
try { fs.rmSync(coverageDir, { recursive: true, force: true }); } catch {}
|
||||
});
|
||||
|
||||
test('/ship Step 3.4 produces coverage diagram', async () => {
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read the file ship/SKILL.md for the ship workflow instructions.
|
||||
|
||||
You are on the feature/billing branch. The base branch is main.
|
||||
This is a test project — there is no remote, no PR to create.
|
||||
|
||||
ONLY run Step 3.4 (Test Coverage Audit) from the ship workflow.
|
||||
Skip all other steps (tests, evals, review, version, changelog, commit, push, PR).
|
||||
|
||||
The source code is in ${coverageDir}/src/billing.ts.
|
||||
Existing tests are in ${coverageDir}/test/billing.test.ts.
|
||||
The test command is: echo "tests pass" (mocked — just pretend tests pass).
|
||||
|
||||
Produce the ASCII coverage diagram showing which code paths are tested and which have gaps.
|
||||
Do NOT generate new tests — just produce the diagram and coverage summary.
|
||||
Output the diagram directly.`,
|
||||
workingDirectory: coverageDir,
|
||||
maxTurns: 15,
|
||||
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Glob', 'Grep'],
|
||||
timeout: 120_000,
|
||||
testName: 'ship-coverage-audit',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('/ship coverage audit', result);
|
||||
recordE2E('/ship Step 3.4 coverage audit', 'Test Coverage Audit E2E', result, {
|
||||
passed: result.exitReason === 'success',
|
||||
});
|
||||
|
||||
expect(result.exitReason).toBe('success');
|
||||
|
||||
// Check output contains coverage diagram elements
|
||||
const output = result.output || '';
|
||||
const hasGap = output.includes('GAP') || output.includes('gap') || output.includes('NO TEST');
|
||||
const hasTested = output.includes('TESTED') || output.includes('tested') || output.includes('✓');
|
||||
const hasCoverage = output.includes('COVERAGE') || output.includes('coverage') || output.includes('paths tested');
|
||||
|
||||
console.log(`Output has GAP markers: ${hasGap}`);
|
||||
console.log(`Output has TESTED markers: ${hasTested}`);
|
||||
console.log(`Output has coverage summary: ${hasCoverage}`);
|
||||
|
||||
// At minimum, the agent should have read the source and test files
|
||||
const readCalls = result.toolCalls.filter(tc => tc.tool === 'Read');
|
||||
expect(readCalls.length).toBeGreaterThan(0);
|
||||
}, 180_000);
|
||||
});
|
||||
|
||||
// Module-level afterAll — finalize eval collector after all tests complete
|
||||
afterAll(async () => {
|
||||
if (evalCollector) {
|
||||
|
||||
@@ -666,6 +666,36 @@ describe('Planted-bug fixture validation', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// --- CEO review mode validation ---
|
||||
|
||||
describe('CEO review mode validation', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8');
|
||||
|
||||
test('has all four CEO review modes defined', () => {
|
||||
const modes = ['SCOPE EXPANSION', 'SELECTIVE EXPANSION', 'HOLD SCOPE', 'SCOPE REDUCTION'];
|
||||
for (const mode of modes) {
|
||||
expect(content).toContain(mode);
|
||||
}
|
||||
});
|
||||
|
||||
test('has CEO plan persistence step', () => {
|
||||
expect(content).toContain('ceo-plans');
|
||||
expect(content).toContain('status: ACTIVE');
|
||||
});
|
||||
|
||||
test('has docs/designs promotion section', () => {
|
||||
expect(content).toContain('docs/designs');
|
||||
expect(content).toContain('PROMOTED');
|
||||
});
|
||||
|
||||
test('mode quick reference has four columns', () => {
|
||||
expect(content).toContain('EXPANSION');
|
||||
expect(content).toContain('SELECTIVE');
|
||||
expect(content).toContain('HOLD SCOPE');
|
||||
expect(content).toContain('REDUCTION');
|
||||
});
|
||||
});
|
||||
|
||||
// --- gstack-slug helper ---
|
||||
|
||||
describe('gstack-slug', () => {
|
||||
@@ -707,3 +737,225 @@ describe('gstack-slug', () => {
|
||||
expect(lines[1]).toMatch(/^BRANCH=.+/);
|
||||
});
|
||||
});
|
||||
|
||||
// --- Test Bootstrap validation ---
|
||||
|
||||
describe('Test Bootstrap ({{TEST_BOOTSTRAP}}) integration', () => {
|
||||
test('TEST_BOOTSTRAP resolver produces valid content', () => {
|
||||
const qaContent = fs.readFileSync(path.join(ROOT, 'qa', 'SKILL.md'), 'utf-8');
|
||||
expect(qaContent).toContain('Test Framework Bootstrap');
|
||||
expect(qaContent).toContain('RUNTIME:ruby');
|
||||
expect(qaContent).toContain('RUNTIME:node');
|
||||
expect(qaContent).toContain('RUNTIME:python');
|
||||
expect(qaContent).toContain('no-test-bootstrap');
|
||||
expect(qaContent).toContain('BOOTSTRAP_DECLINED');
|
||||
});
|
||||
|
||||
test('TEST_BOOTSTRAP appears in qa/SKILL.md', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'qa', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Test Framework Bootstrap');
|
||||
expect(content).toContain('TESTING.md');
|
||||
expect(content).toContain('CLAUDE.md');
|
||||
});
|
||||
|
||||
test('TEST_BOOTSTRAP appears in ship/SKILL.md', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Test Framework Bootstrap');
|
||||
expect(content).toContain('Step 2.5');
|
||||
});
|
||||
|
||||
test('TEST_BOOTSTRAP appears in qa-design-review/SKILL.md', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'qa-design-review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Test Framework Bootstrap');
|
||||
});
|
||||
|
||||
test('TEST_BOOTSTRAP does NOT appear in qa-only/SKILL.md', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'qa-only', 'SKILL.md'), 'utf-8');
|
||||
expect(content).not.toContain('Test Framework Bootstrap');
|
||||
// But should have the recommendation note
|
||||
expect(content).toContain('No test framework detected');
|
||||
expect(content).toContain('Run `/qa` to bootstrap');
|
||||
});
|
||||
|
||||
test('bootstrap includes framework knowledge table', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'qa', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('vitest');
|
||||
expect(content).toContain('minitest');
|
||||
expect(content).toContain('pytest');
|
||||
expect(content).toContain('cargo test');
|
||||
expect(content).toContain('phpunit');
|
||||
expect(content).toContain('ExUnit');
|
||||
});
|
||||
|
||||
test('bootstrap includes CI/CD pipeline generation', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'qa', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('.github/workflows/test.yml');
|
||||
expect(content).toContain('GitHub Actions');
|
||||
});
|
||||
|
||||
test('bootstrap includes first real tests step', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'qa', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('First real tests');
|
||||
expect(content).toContain('git log --since=30.days');
|
||||
expect(content).toContain('Prioritize by risk');
|
||||
});
|
||||
|
||||
test('bootstrap includes vibe coding philosophy', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'qa', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('vibe coding');
|
||||
expect(content).toContain('100% test coverage');
|
||||
});
|
||||
|
||||
test('WebSearch is in allowed-tools for qa, ship, qa-design-review', () => {
|
||||
const qa = fs.readFileSync(path.join(ROOT, 'qa', 'SKILL.md'), 'utf-8');
|
||||
const ship = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
const qaDesign = fs.readFileSync(path.join(ROOT, 'qa-design-review', 'SKILL.md'), 'utf-8');
|
||||
expect(qa).toContain('WebSearch');
|
||||
expect(ship).toContain('WebSearch');
|
||||
expect(qaDesign).toContain('WebSearch');
|
||||
});
|
||||
});
|
||||
|
||||
// --- Phase 8e.5 regression test validation ---
|
||||
|
||||
describe('Phase 8e.5 regression test generation', () => {
|
||||
test('qa/SKILL.md contains Phase 8e.5', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'qa', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('8e.5. Regression Test');
|
||||
expect(content).toContain('test(qa): regression test');
|
||||
expect(content).toContain('WTF-likelihood exclusion');
|
||||
});
|
||||
|
||||
test('qa/SKILL.md Rule 13 is amended for regression tests', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'qa', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Only modify tests when generating regression tests in Phase 8e.5');
|
||||
expect(content).not.toContain('Never modify tests or CI configuration');
|
||||
});
|
||||
|
||||
test('qa-design-review has CSS-aware Phase 8e.5 variant', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'qa-design-review', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('8e.5. Regression Test (design-review variant)');
|
||||
expect(content).toContain('CSS-only');
|
||||
expect(content).toContain('test(design): regression test');
|
||||
});
|
||||
|
||||
test('regression test includes full attribution comment format', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'qa', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('// Regression: ISSUE-NNN');
|
||||
expect(content).toContain('// Found by /qa on');
|
||||
expect(content).toContain('// Report: .gstack/qa-reports/');
|
||||
});
|
||||
|
||||
test('regression test uses auto-incrementing names', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'qa', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('auto-incrementing');
|
||||
expect(content).toContain('max number + 1');
|
||||
});
|
||||
});
|
||||
|
||||
// --- Step 3.4 coverage audit validation ---
|
||||
|
||||
describe('Step 3.4 test coverage audit', () => {
|
||||
test('ship/SKILL.md contains Step 3.4', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Step 3.4: Test Coverage Audit');
|
||||
expect(content).toContain('CODE PATH COVERAGE');
|
||||
});
|
||||
|
||||
test('Step 3.4 includes quality scoring rubric', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('★★★');
|
||||
expect(content).toContain('★★');
|
||||
expect(content).toContain('edge cases AND error paths');
|
||||
expect(content).toContain('happy path only');
|
||||
});
|
||||
|
||||
test('Step 3.4 includes before/after test count', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Count test files before');
|
||||
expect(content).toContain('Count test files after');
|
||||
});
|
||||
|
||||
test('ship PR body includes Test Coverage section', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('## Test Coverage');
|
||||
});
|
||||
|
||||
test('ship rules include test generation rule', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Step 3.4 generates coverage tests');
|
||||
expect(content).toContain('Never commit failing tests');
|
||||
});
|
||||
|
||||
test('Step 3.4 includes vibe coding philosophy', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('vibe coding becomes yolo coding');
|
||||
});
|
||||
|
||||
test('Step 3.4 traces actual codepaths, not just syntax', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Trace every codepath');
|
||||
expect(content).toContain('Trace data flow');
|
||||
expect(content).toContain('Diagram the execution');
|
||||
});
|
||||
|
||||
test('Step 3.4 maps user flows and interaction edge cases', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Map user flows');
|
||||
expect(content).toContain('Interaction edge cases');
|
||||
expect(content).toContain('Double-click');
|
||||
expect(content).toContain('Navigate away');
|
||||
expect(content).toContain('Error states the user can see');
|
||||
expect(content).toContain('Empty/zero/boundary states');
|
||||
});
|
||||
|
||||
test('Step 3.4 diagram includes USER FLOW COVERAGE section', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('USER FLOW COVERAGE');
|
||||
expect(content).toContain('Code paths:');
|
||||
expect(content).toContain('User flows:');
|
||||
});
|
||||
});
|
||||
|
||||
// --- Retro test health validation ---
|
||||
|
||||
describe('Retro test health tracking', () => {
|
||||
test('retro/SKILL.md has test health data gathering commands', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'retro', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('# 10. Test file count');
|
||||
expect(content).toContain('# 11. Regression test commits');
|
||||
expect(content).toContain('# 12. Test files changed');
|
||||
});
|
||||
|
||||
test('retro/SKILL.md has Test Health metrics row', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'retro', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('Test Health');
|
||||
expect(content).toContain('regression tests');
|
||||
});
|
||||
|
||||
test('retro/SKILL.md has Test Health narrative section', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'retro', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('### Test Health');
|
||||
expect(content).toContain('Total test files');
|
||||
expect(content).toContain('vibe coding safe');
|
||||
});
|
||||
|
||||
test('retro JSON schema includes test_health field', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'retro', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('test_health');
|
||||
expect(content).toContain('total_test_files');
|
||||
expect(content).toContain('regression_test_commits');
|
||||
});
|
||||
});
|
||||
|
||||
// --- QA report template regression tests section ---
|
||||
|
||||
describe('QA report template', () => {
|
||||
test('qa-report-template.md has Regression Tests section', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'qa', 'templates', 'qa-report-template.md'), 'utf-8');
|
||||
expect(content).toContain('## Regression Tests');
|
||||
expect(content).toContain('committed / deferred / skipped');
|
||||
expect(content).toContain('### Deferred Tests');
|
||||
expect(content).toContain('**Precondition:**');
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user