diff --git a/test/gemini-e2e.test.ts b/test/gemini-e2e.test.ts index bd69919f..18a23a8b 100644 --- a/test/gemini-e2e.test.ts +++ b/test/gemini-e2e.test.ts @@ -76,7 +76,7 @@ if (evalsEnabled && !process.env.EVALS_ALL) { /** Skip an individual test if not selected by diff-based selection. */ function testIfSelected(testName: string, fn: () => Promise, timeout: number) { const shouldRun = selectedTests === null || selectedTests.includes(testName); - (shouldRun ? test : test.skip)(testName, fn, timeout); + (shouldRun ? test.concurrent : test.skip)(testName, fn, timeout); } // --- Eval result collector --- diff --git a/test/skill-e2e-deploy.test.ts b/test/skill-e2e-deploy.test.ts index 055fada5..61a32a70 100644 --- a/test/skill-e2e-deploy.test.ts +++ b/test/skill-e2e-deploy.test.ts @@ -44,7 +44,7 @@ describeIfSelected('Land-and-Deploy skill E2E', ['land-and-deploy-workflow'], () try { fs.rmSync(landDir, { recursive: true, force: true }); } catch {} }); - test('/land-and-deploy detects Fly.io platform and produces deploy report structure', async () => { + testConcurrentIfSelected('land-and-deploy-workflow', async () => { const result = await runSkillTest({ prompt: `Read land-and-deploy/SKILL.md for the /land-and-deploy skill instructions. @@ -110,7 +110,7 @@ describeIfSelected('Canary skill E2E', ['canary-workflow'], () => { try { fs.rmSync(canaryDir, { recursive: true, force: true }); } catch {} }); - test('/canary skill produces monitoring report structure', async () => { + testConcurrentIfSelected('canary-workflow', async () => { const result = await runSkillTest({ prompt: `Read canary/SKILL.md for the /canary skill instructions. @@ -171,7 +171,7 @@ describeIfSelected('Benchmark skill E2E', ['benchmark-workflow'], () => { try { fs.rmSync(benchDir, { recursive: true, force: true }); } catch {} }); - test('/benchmark skill produces performance report structure', async () => { + testConcurrentIfSelected('benchmark-workflow', async () => { const result = await runSkillTest({ prompt: `Read benchmark/SKILL.md for the /benchmark skill instructions. @@ -237,7 +237,7 @@ describeIfSelected('Setup-Deploy skill E2E', ['setup-deploy-workflow'], () => { try { fs.rmSync(setupDir, { recursive: true, force: true }); } catch {} }); - test('/setup-deploy detects Fly.io and writes config to CLAUDE.md', async () => { + testConcurrentIfSelected('setup-deploy-workflow', async () => { const result = await runSkillTest({ prompt: `Read setup-deploy/SKILL.md for the /setup-deploy skill instructions. diff --git a/test/skill-e2e-design.test.ts b/test/skill-e2e-design.test.ts index c1e2825c..a207965f 100644 --- a/test/skill-e2e-design.test.ts +++ b/test/skill-e2e-design.test.ts @@ -560,7 +560,7 @@ describeIfSelected('Design Review E2E', ['design-review-fix'], () => { try { fs.rmSync(qaDesignDir, { recursive: true, force: true }); } catch {} }); - test('Test 7: /design-review audits and fixes design issues', async () => { + testConcurrentIfSelected('design-review-fix', async () => { const serverUrl = `http://localhost:${(qaDesignServer as any)?.port}`; const result = await runSkillTest({ diff --git a/test/skill-e2e-plan.test.ts b/test/skill-e2e-plan.test.ts index 1fc5b968..099af77b 100644 --- a/test/skill-e2e-plan.test.ts +++ b/test/skill-e2e-plan.test.ts @@ -66,7 +66,7 @@ We're building a new user dashboard that shows recent activity, notifications, a try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {} }); - test('/plan-ceo-review produces structured review output', async () => { + testConcurrentIfSelected('plan-ceo-review', async () => { const result = await runSkillTest({ prompt: `Read plan-ceo-review/SKILL.md for the review workflow. @@ -150,7 +150,7 @@ We're building a new user dashboard that shows recent activity, notifications, a try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {} }); - test('/plan-ceo-review SELECTIVE EXPANSION produces structured review output', async () => { + testConcurrentIfSelected('plan-ceo-review-selective', async () => { const result = await runSkillTest({ prompt: `Read plan-ceo-review/SKILL.md for the review workflow. @@ -244,7 +244,7 @@ Replace session-cookie auth with JWT tokens. Currently using express-session + R try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {} }); - test('/plan-eng-review produces structured review output', async () => { + testConcurrentIfSelected('plan-eng-review', async () => { const result = await runSkillTest({ prompt: `Read plan-eng-review/SKILL.md for the review workflow. @@ -364,7 +364,7 @@ export function main() { return Dashboard(); } } catch {} }); - test('/plan-eng-review writes test-plan artifact to ~/.gstack/projects/', async () => { + testConcurrentIfSelected('plan-eng-review-artifact', async () => { // Count existing test-plan files before const beforeFiles = fs.readdirSync(projectDir).filter(f => f.includes('test-plan')); @@ -442,7 +442,7 @@ describeIfSelected('Office Hours Spec Review E2E', ['office-hours-spec-review'], try { fs.rmSync(ohDir, { recursive: true, force: true }); } catch {} }); - test('/office-hours SKILL.md contains spec review loop', async () => { + testConcurrentIfSelected('office-hours-spec-review', async () => { const result = await runSkillTest({ prompt: `Read office-hours/SKILL.md. I want to understand the spec review loop. @@ -502,7 +502,7 @@ describeIfSelected('Plan CEO Review Benefits-From E2E', ['plan-ceo-review-benefi try { fs.rmSync(benefitsDir, { recursive: true, force: true }); } catch {} }); - test('/plan-ceo-review SKILL.md contains prerequisite skill offer', async () => { + testConcurrentIfSelected('plan-ceo-review-benefits', async () => { const result = await runSkillTest({ prompt: `Read plan-ceo-review/SKILL.md. Search for sections about "Prerequisite" or "office-hours" or "design doc found". diff --git a/test/skill-e2e-qa-bugs.test.ts b/test/skill-e2e-qa-bugs.test.ts index b93e97c0..f9fa8a67 100644 --- a/test/skill-e2e-qa-bugs.test.ts +++ b/test/skill-e2e-qa-bugs.test.ts @@ -4,7 +4,7 @@ import { outcomeJudge } from './helpers/llm-judge'; import { judgePassed } from './helpers/eval-store'; import { ROOT, browseBin, runId, evalsEnabled, selectedTests, hasApiKey, - describeIfSelected, describeE2E, + describeIfSelected, describeE2E, testConcurrentIfSelected, copyDirSync, setupBrowseShims, logCost, recordE2E, dumpOutcomeDiagnostic, createEvalCollector, finalizeEvalCollector, } from './helpers/e2e-helpers'; @@ -172,17 +172,17 @@ CRITICAL RULES: } // B6: Static dashboard — broken link, disabled submit, overflow, missing alt, console error - test('/qa finds >= 2 of 5 planted bugs (static)', async () => { + testConcurrentIfSelected('qa-b6-static', async () => { await runPlantedBugEval('qa-eval.html', 'qa-eval-ground-truth.json', 'b6-static'); }, 360_000); // B7: SPA — broken route, stale state, async race, missing aria, console warning - test('/qa finds >= 2 of 5 planted SPA bugs', async () => { + testConcurrentIfSelected('qa-b7-spa', async () => { await runPlantedBugEval('qa-eval-spa.html', 'qa-eval-spa-ground-truth.json', 'b7-spa'); }, 360_000); // B8: Checkout — email regex, NaN total, CC overflow, missing required, stripe error - test('/qa finds >= 2 of 5 planted checkout bugs', async () => { + testConcurrentIfSelected('qa-b8-checkout', async () => { await runPlantedBugEval('qa-eval-checkout.html', 'qa-eval-checkout-ground-truth.json', 'b8-checkout'); }, 360_000); diff --git a/test/skill-e2e-qa-workflow.test.ts b/test/skill-e2e-qa-workflow.test.ts index 840c3944..516cf178 100644 --- a/test/skill-e2e-qa-workflow.test.ts +++ b/test/skill-e2e-qa-workflow.test.ts @@ -37,7 +37,7 @@ describeIfSelected('QA skill E2E', ['qa-quick'], () => { try { fs.rmSync(qaDir, { recursive: true, force: true }); } catch {} }); - test('/qa quick completes without browse errors', async () => { + testConcurrentIfSelected('qa-quick', async () => { const result = await runSkillTest({ prompt: `B="${browseBin}" @@ -108,7 +108,7 @@ describeIfSelected('QA-Only skill E2E', ['qa-only-no-fix'], () => { try { fs.rmSync(qaOnlyDir, { recursive: true, force: true }); } catch {} }); - test('/qa-only produces report without using Edit tool', async () => { + testConcurrentIfSelected('qa-only-no-fix', async () => { const result = await runSkillTest({ prompt: `IMPORTANT: The browse binary is already assigned below as B. Do NOT search for it or run the SKILL.md setup block — just use $B directly. @@ -227,7 +227,7 @@ describeIfSelected('QA Fix Loop E2E', ['qa-fix-loop'], () => { try { fs.rmSync(qaFixDir, { recursive: true, force: true }); } catch {} }); - test('/qa fix loop finds bugs and commits fixes', async () => { + testConcurrentIfSelected('qa-fix-loop', async () => { const qaFixUrl = `http://127.0.0.1:${qaFixServer!.port}`; const result = await runSkillTest({ diff --git a/test/skill-e2e-review.test.ts b/test/skill-e2e-review.test.ts index 103c6c9c..b1d5442d 100644 --- a/test/skill-e2e-review.test.ts +++ b/test/skill-e2e-review.test.ts @@ -51,7 +51,7 @@ describeIfSelected('Review skill E2E', ['review-sql-injection'], () => { try { fs.rmSync(reviewDir, { recursive: true, force: true }); } catch {} }); - test('/review produces findings on SQL injection branch', async () => { + testConcurrentIfSelected('review-sql-injection', async () => { const result = await runSkillTest({ prompt: `You are in a git repo on a feature branch with changes against main. Read review-SKILL.md for the review workflow instructions. @@ -125,7 +125,7 @@ describeIfSelected('Review enum completeness E2E', ['review-enum-completeness'], try { fs.rmSync(enumDir, { recursive: true, force: true }); } catch {} }); - test('/review catches missing enum handlers for new status value', async () => { + testConcurrentIfSelected('review-enum-completeness', async () => { const result = await runSkillTest({ prompt: `You are in a git repo on branch feature/add-returned-status with changes against main. Read review-SKILL.md for the review workflow instructions. @@ -200,7 +200,7 @@ describeIfSelected('Review design lite E2E', ['review-design-lite'], () => { try { fs.rmSync(designDir, { recursive: true, force: true }); } catch {} }); - test('/review catches design anti-patterns in CSS/HTML diff', async () => { + testConcurrentIfSelected('review-design-lite', async () => { const result = await runSkillTest({ prompt: `You are in a git repo on branch feature/add-landing-page with changes against main. Read review-SKILL.md for the review workflow instructions. @@ -497,7 +497,7 @@ describeIfSelected('Retro E2E', ['retro'], () => { try { fs.rmSync(retroDir, { recursive: true, force: true }); } catch {} }); - test('/retro produces analysis from git history', async () => { + testConcurrentIfSelected('retro', async () => { const result = await runSkillTest({ prompt: `Read retro/SKILL.md for instructions on how to run a retrospective. diff --git a/test/skill-e2e-workflow.test.ts b/test/skill-e2e-workflow.test.ts index 70ed7311..55fb4e64 100644 --- a/test/skill-e2e-workflow.test.ts +++ b/test/skill-e2e-workflow.test.ts @@ -60,7 +60,7 @@ describeIfSelected('Document-Release skill E2E', ['document-release'], () => { try { fs.rmSync(docReleaseDir, { recursive: true, force: true }); } catch {} }); - test('/document-release updates docs without clobbering CHANGELOG', async () => { + testConcurrentIfSelected('document-release', async () => { const result = await runSkillTest({ prompt: `Read the file document-release/SKILL.md for the document-release workflow instructions. @@ -461,7 +461,7 @@ describe('processPayment', () => { try { fs.rmSync(coverageDir, { recursive: true, force: true }); } catch {} }); - test('/ship Step 3.4 produces coverage diagram', async () => { + testConcurrentIfSelected('ship-coverage-audit', async () => { const result = await runSkillTest({ prompt: `Read the file ship/SKILL.md for the ship workflow instructions. @@ -544,7 +544,7 @@ describeIfSelected('Codex skill E2E', ['codex-review'], () => { try { fs.rmSync(codexDir, { recursive: true, force: true }); } catch {} }); - test('/codex review produces findings and GATE verdict', async () => { + testConcurrentIfSelected('codex-review', async () => { // Check codex is available — skip if not installed const codexCheck = spawnSync('which', ['codex'], { stdio: 'pipe', timeout: 3000 }); if (codexCheck.status !== 0) { diff --git a/test/skill-llm-eval.test.ts b/test/skill-llm-eval.test.ts index 5208836a..ddfa963e 100644 --- a/test/skill-llm-eval.test.ts +++ b/test/skill-llm-eval.test.ts @@ -56,7 +56,7 @@ function describeIfSelected(name: string, testNames: string[], fn: () => void) { /** Skip an individual test if not selected (for multi-test describe blocks). */ function testIfSelected(testName: string, fn: () => Promise, timeout: number) { const shouldRun = selectedTests === null || selectedTests.includes(testName); - (shouldRun ? test : test.skip)(testName, fn, timeout); + (shouldRun ? test.concurrent : test.skip)(testName, fn, timeout); } describeIfSelected('LLM-as-judge quality evals', [