diff --git a/test/skill-e2e-workflow.test.ts b/test/skill-e2e-workflow.test.ts index 6165eb27..c290f6ab 100644 --- a/test/skill-e2e-workflow.test.ts +++ b/test/skill-e2e-workflow.test.ts @@ -175,9 +175,10 @@ describeIfSelected('Ship workflow E2E', ['ship-local-workflow'], () => { logCost('/ship local workflow', result); - // Check push succeeded - const remoteLog = spawnSync('git', ['log', '--oneline'], { cwd: shipRemoteDir, stdio: 'pipe' }); - const remoteCommits = remoteLog.stdout.toString().trim().split('\n').length; + // Check push succeeded — check the feature branch on the bare remote + // (bare repo HEAD points to main which only has 1 commit; the push goes to feature/ship-test) + const remoteLog = spawnSync('git', ['log', '--oneline', '--all'], { cwd: shipRemoteDir, stdio: 'pipe' }); + const remoteCommits = remoteLog.stdout.toString().trim().split('\n').filter(l => l.length > 0).length; // Check VERSION was bumped const versionContent = fs.existsSync(path.join(shipWorkDir, 'VERSION')) @@ -217,12 +218,14 @@ describeIfSelected('Setup Browser Cookies E2E', ['setup-cookies-detect'], () => const result = await runSkillTest({ prompt: `Read setup-browser-cookies/SKILL.md for the cookie import workflow. -This is a test environment. List which browsers you can detect on this system by checking for their cookie database files. -Write the detected browsers to ${cookieDir}/detected-browsers.md. +This is a test environment. Check which browsers exist on this system by looking for their cookie database files. +IMPORTANT: You MUST write a file called ${cookieDir}/detected-browsers.md with your findings. +If you find browsers, list them. If you find NO browsers, write "No browsers detected" to the file. +The file must always be created regardless of results. Do NOT launch the cookie picker UI — just detect and report.`, workingDirectory: cookieDir, - maxTurns: 5, - timeout: 45_000, + maxTurns: 8, + timeout: 60_000, testName: 'setup-cookies-detect', runId, }); @@ -233,17 +236,21 @@ Do NOT launch the cookie picker UI — just detect and report.`, const detectExists = fs.existsSync(detectPath); const detectContent = detectExists ? fs.readFileSync(detectPath, 'utf-8') : ''; const hasBrowserName = /chrome|arc|brave|edge|comet|safari|firefox/i.test(detectContent); + const hasNoBrowsers = /no browser|none|not found|not detected|could not|couldn't/i.test(detectContent); + + // On CI (headless Ubuntu), no browsers are installed — "no browsers detected" is valid + const contentValid = hasBrowserName || hasNoBrowsers; recordE2E(evalCollector, '/setup-browser-cookies detect', 'Setup Browser Cookies E2E', result, { - passed: detectExists && hasBrowserName && ['success', 'error_max_turns'].includes(result.exitReason), + passed: detectExists && contentValid && ['success', 'error_max_turns'].includes(result.exitReason), }); expect(['success', 'error_max_turns']).toContain(result.exitReason); expect(detectExists).toBe(true); if (detectExists) { - expect(hasBrowserName).toBe(true); + expect(contentValid).toBe(true); } - }, 60_000); + }, 90_000); }); // --- gstack-upgrade E2E --- diff --git a/test/skill-routing-e2e.test.ts b/test/skill-routing-e2e.test.ts index 375b6388..2f220270 100644 --- a/test/skill-routing-e2e.test.ts +++ b/test/skill-routing-e2e.test.ts @@ -3,7 +3,7 @@ import { runSkillTest } from './helpers/session-runner'; import type { SkillTestResult } from './helpers/session-runner'; import { EvalCollector } from './helpers/eval-store'; import type { EvalTestEntry } from './helpers/eval-store'; -import { selectTests, detectBaseBranch, getChangedFiles, E2E_TOUCHFILES, GLOBAL_TOUCHFILES } from './helpers/touchfiles'; +import { selectTests, detectBaseBranch, getChangedFiles, E2E_TOUCHFILES, E2E_TIERS, GLOBAL_TOUCHFILES } from './helpers/touchfiles'; import { spawnSync } from 'child_process'; import * as fs from 'fs'; import * as path from 'path'; @@ -42,6 +42,21 @@ if (evalsEnabled && !process.env.EVALS_ALL) { } } +// Apply EVALS_TIER filter (same logic as e2e-helpers.ts) +if (evalsEnabled && process.env.EVALS_TIER) { + const tier = process.env.EVALS_TIER as 'gate' | 'periodic'; + const tierTests = Object.entries(E2E_TIERS) + .filter(([, t]) => t === tier) + .map(([name]) => name); + + if (selectedTests === null) { + selectedTests = tierTests; + } else { + selectedTests = selectedTests.filter(t => tierTests.includes(t)); + } + process.stderr.write(`Routing EVALS_TIER=${tier}: ${selectedTests.length} tests\n\n`); +} + // --- Helper functions --- /** Copy all SKILL.md files for auto-discovery. @@ -140,6 +155,15 @@ function recordRouting(name: string, result: SkillTestResult, expectedSkill: str }); } +// Skip individual tests based on selectedTests (diff + tier filtering) +const testIfSelected = (name: string, fn: () => Promise, timeout?: number) => { + if (selectedTests !== null && !selectedTests.includes(name)) { + test.skip(name, () => {}); + } else { + test.concurrent(name, fn, timeout); + } +}; + // --- Tests --- describeE2E('Skill Routing E2E — Developer Journey', () => { @@ -147,7 +171,7 @@ describeE2E('Skill Routing E2E — Developer Journey', () => { evalCollector?.finalize(); }); - test.concurrent('journey-ideation', async () => { + testIfSelected('journey-ideation', async () => { const tmpDir = createRoutingWorkDir('ideation'); try { @@ -176,7 +200,7 @@ describeE2E('Skill Routing E2E — Developer Journey', () => { } }, 150_000); - test.concurrent('journey-plan-eng', async () => { + testIfSelected('journey-plan-eng', async () => { const tmpDir = createRoutingWorkDir('plan-eng'); try { fs.writeFileSync(path.join(tmpDir, 'plan.md'), `# Waitlist App Architecture @@ -226,7 +250,7 @@ describeE2E('Skill Routing E2E — Developer Journey', () => { } }, 150_000); - test.concurrent('journey-think-bigger', async () => { + testIfSelected('journey-think-bigger', async () => { const tmpDir = createRoutingWorkDir('think-bigger'); try { fs.writeFileSync(path.join(tmpDir, 'plan.md'), `# Waitlist App Architecture @@ -277,7 +301,7 @@ describeE2E('Skill Routing E2E — Developer Journey', () => { } }, 180_000); - test.concurrent('journey-debug', async () => { + testIfSelected('journey-debug', async () => { const tmpDir = createRoutingWorkDir('debug'); try { const run = (cmd: string, args: string[]) => @@ -335,7 +359,7 @@ export default app; } }, 150_000); - test.concurrent('journey-qa', async () => { + testIfSelected('journey-qa', async () => { const tmpDir = createRoutingWorkDir('qa'); try { fs.writeFileSync(path.join(tmpDir, 'package.json'), JSON.stringify({ name: 'waitlist-app', scripts: { dev: 'next dev' } }, null, 2)); @@ -371,7 +395,7 @@ export default app; } }, 150_000); - test.concurrent('journey-code-review', async () => { + testIfSelected('journey-code-review', async () => { const tmpDir = createRoutingWorkDir('code-review'); try { const run = (cmd: string, args: string[]) => @@ -411,7 +435,7 @@ export default app; } }, 150_000); - test.concurrent('journey-ship', async () => { + testIfSelected('journey-ship', async () => { const tmpDir = createRoutingWorkDir('ship'); try { const run = (cmd: string, args: string[]) => @@ -450,7 +474,7 @@ export default app; } }, 150_000); - test.concurrent('journey-docs', async () => { + testIfSelected('journey-docs', async () => { const tmpDir = createRoutingWorkDir('docs'); try { const run = (cmd: string, args: string[]) => @@ -487,7 +511,7 @@ export default app; } }, 150_000); - test.concurrent('journey-retro', async () => { + testIfSelected('journey-retro', async () => { const tmpDir = createRoutingWorkDir('retro'); try { const run = (cmd: string, args: string[]) => @@ -530,7 +554,7 @@ export default app; } }, 150_000); - test.concurrent('journey-design-system', async () => { + testIfSelected('journey-design-system', async () => { const tmpDir = createRoutingWorkDir('design-system'); try { @@ -559,7 +583,7 @@ export default app; } }, 150_000); - test.concurrent('journey-visual-qa', async () => { + testIfSelected('journey-visual-qa', async () => { const tmpDir = createRoutingWorkDir('visual-qa'); try { const run = (cmd: string, args: string[]) =>