mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-05 05:05:08 +02:00
fix: three flaky E2E test fixes
ship-local-workflow: Use `git log --all` on bare remote so we count commits on feature/ship-test, not just HEAD (main). setup-cookies-detect: Accept "no browsers detected" as valid on CI (headless Ubuntu has no browser cookie databases). Increase maxTurns from 5→8 and make prompt explicit about always writing the file. routing tests: Apply EVALS_TIER filtering — all routing tests are periodic but the file had no tier awareness, so they ran under EVALS_TIER=gate in CI and failed non-deterministically. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -175,9 +175,10 @@ describeIfSelected('Ship workflow E2E', ['ship-local-workflow'], () => {
|
||||
|
||||
logCost('/ship local workflow', result);
|
||||
|
||||
// Check push succeeded
|
||||
const remoteLog = spawnSync('git', ['log', '--oneline'], { cwd: shipRemoteDir, stdio: 'pipe' });
|
||||
const remoteCommits = remoteLog.stdout.toString().trim().split('\n').length;
|
||||
// Check push succeeded — check the feature branch on the bare remote
|
||||
// (bare repo HEAD points to main which only has 1 commit; the push goes to feature/ship-test)
|
||||
const remoteLog = spawnSync('git', ['log', '--oneline', '--all'], { cwd: shipRemoteDir, stdio: 'pipe' });
|
||||
const remoteCommits = remoteLog.stdout.toString().trim().split('\n').filter(l => l.length > 0).length;
|
||||
|
||||
// Check VERSION was bumped
|
||||
const versionContent = fs.existsSync(path.join(shipWorkDir, 'VERSION'))
|
||||
@@ -217,12 +218,14 @@ describeIfSelected('Setup Browser Cookies E2E', ['setup-cookies-detect'], () =>
|
||||
const result = await runSkillTest({
|
||||
prompt: `Read setup-browser-cookies/SKILL.md for the cookie import workflow.
|
||||
|
||||
This is a test environment. List which browsers you can detect on this system by checking for their cookie database files.
|
||||
Write the detected browsers to ${cookieDir}/detected-browsers.md.
|
||||
This is a test environment. Check which browsers exist on this system by looking for their cookie database files.
|
||||
IMPORTANT: You MUST write a file called ${cookieDir}/detected-browsers.md with your findings.
|
||||
If you find browsers, list them. If you find NO browsers, write "No browsers detected" to the file.
|
||||
The file must always be created regardless of results.
|
||||
Do NOT launch the cookie picker UI — just detect and report.`,
|
||||
workingDirectory: cookieDir,
|
||||
maxTurns: 5,
|
||||
timeout: 45_000,
|
||||
maxTurns: 8,
|
||||
timeout: 60_000,
|
||||
testName: 'setup-cookies-detect',
|
||||
runId,
|
||||
});
|
||||
@@ -233,17 +236,21 @@ Do NOT launch the cookie picker UI — just detect and report.`,
|
||||
const detectExists = fs.existsSync(detectPath);
|
||||
const detectContent = detectExists ? fs.readFileSync(detectPath, 'utf-8') : '';
|
||||
const hasBrowserName = /chrome|arc|brave|edge|comet|safari|firefox/i.test(detectContent);
|
||||
const hasNoBrowsers = /no browser|none|not found|not detected|could not|couldn't/i.test(detectContent);
|
||||
|
||||
// On CI (headless Ubuntu), no browsers are installed — "no browsers detected" is valid
|
||||
const contentValid = hasBrowserName || hasNoBrowsers;
|
||||
|
||||
recordE2E(evalCollector, '/setup-browser-cookies detect', 'Setup Browser Cookies E2E', result, {
|
||||
passed: detectExists && hasBrowserName && ['success', 'error_max_turns'].includes(result.exitReason),
|
||||
passed: detectExists && contentValid && ['success', 'error_max_turns'].includes(result.exitReason),
|
||||
});
|
||||
|
||||
expect(['success', 'error_max_turns']).toContain(result.exitReason);
|
||||
expect(detectExists).toBe(true);
|
||||
if (detectExists) {
|
||||
expect(hasBrowserName).toBe(true);
|
||||
expect(contentValid).toBe(true);
|
||||
}
|
||||
}, 60_000);
|
||||
}, 90_000);
|
||||
});
|
||||
|
||||
// --- gstack-upgrade E2E ---
|
||||
|
||||
@@ -3,7 +3,7 @@ import { runSkillTest } from './helpers/session-runner';
|
||||
import type { SkillTestResult } from './helpers/session-runner';
|
||||
import { EvalCollector } from './helpers/eval-store';
|
||||
import type { EvalTestEntry } from './helpers/eval-store';
|
||||
import { selectTests, detectBaseBranch, getChangedFiles, E2E_TOUCHFILES, GLOBAL_TOUCHFILES } from './helpers/touchfiles';
|
||||
import { selectTests, detectBaseBranch, getChangedFiles, E2E_TOUCHFILES, E2E_TIERS, GLOBAL_TOUCHFILES } from './helpers/touchfiles';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
@@ -42,6 +42,21 @@ if (evalsEnabled && !process.env.EVALS_ALL) {
|
||||
}
|
||||
}
|
||||
|
||||
// Apply EVALS_TIER filter (same logic as e2e-helpers.ts)
|
||||
if (evalsEnabled && process.env.EVALS_TIER) {
|
||||
const tier = process.env.EVALS_TIER as 'gate' | 'periodic';
|
||||
const tierTests = Object.entries(E2E_TIERS)
|
||||
.filter(([, t]) => t === tier)
|
||||
.map(([name]) => name);
|
||||
|
||||
if (selectedTests === null) {
|
||||
selectedTests = tierTests;
|
||||
} else {
|
||||
selectedTests = selectedTests.filter(t => tierTests.includes(t));
|
||||
}
|
||||
process.stderr.write(`Routing EVALS_TIER=${tier}: ${selectedTests.length} tests\n\n`);
|
||||
}
|
||||
|
||||
// --- Helper functions ---
|
||||
|
||||
/** Copy all SKILL.md files for auto-discovery.
|
||||
@@ -140,6 +155,15 @@ function recordRouting(name: string, result: SkillTestResult, expectedSkill: str
|
||||
});
|
||||
}
|
||||
|
||||
// Skip individual tests based on selectedTests (diff + tier filtering)
|
||||
const testIfSelected = (name: string, fn: () => Promise<void>, timeout?: number) => {
|
||||
if (selectedTests !== null && !selectedTests.includes(name)) {
|
||||
test.skip(name, () => {});
|
||||
} else {
|
||||
test.concurrent(name, fn, timeout);
|
||||
}
|
||||
};
|
||||
|
||||
// --- Tests ---
|
||||
|
||||
describeE2E('Skill Routing E2E — Developer Journey', () => {
|
||||
@@ -147,7 +171,7 @@ describeE2E('Skill Routing E2E — Developer Journey', () => {
|
||||
evalCollector?.finalize();
|
||||
});
|
||||
|
||||
test.concurrent('journey-ideation', async () => {
|
||||
testIfSelected('journey-ideation', async () => {
|
||||
const tmpDir = createRoutingWorkDir('ideation');
|
||||
try {
|
||||
|
||||
@@ -176,7 +200,7 @@ describeE2E('Skill Routing E2E — Developer Journey', () => {
|
||||
}
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-plan-eng', async () => {
|
||||
testIfSelected('journey-plan-eng', async () => {
|
||||
const tmpDir = createRoutingWorkDir('plan-eng');
|
||||
try {
|
||||
fs.writeFileSync(path.join(tmpDir, 'plan.md'), `# Waitlist App Architecture
|
||||
@@ -226,7 +250,7 @@ describeE2E('Skill Routing E2E — Developer Journey', () => {
|
||||
}
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-think-bigger', async () => {
|
||||
testIfSelected('journey-think-bigger', async () => {
|
||||
const tmpDir = createRoutingWorkDir('think-bigger');
|
||||
try {
|
||||
fs.writeFileSync(path.join(tmpDir, 'plan.md'), `# Waitlist App Architecture
|
||||
@@ -277,7 +301,7 @@ describeE2E('Skill Routing E2E — Developer Journey', () => {
|
||||
}
|
||||
}, 180_000);
|
||||
|
||||
test.concurrent('journey-debug', async () => {
|
||||
testIfSelected('journey-debug', async () => {
|
||||
const tmpDir = createRoutingWorkDir('debug');
|
||||
try {
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
@@ -335,7 +359,7 @@ export default app;
|
||||
}
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-qa', async () => {
|
||||
testIfSelected('journey-qa', async () => {
|
||||
const tmpDir = createRoutingWorkDir('qa');
|
||||
try {
|
||||
fs.writeFileSync(path.join(tmpDir, 'package.json'), JSON.stringify({ name: 'waitlist-app', scripts: { dev: 'next dev' } }, null, 2));
|
||||
@@ -371,7 +395,7 @@ export default app;
|
||||
}
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-code-review', async () => {
|
||||
testIfSelected('journey-code-review', async () => {
|
||||
const tmpDir = createRoutingWorkDir('code-review');
|
||||
try {
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
@@ -411,7 +435,7 @@ export default app;
|
||||
}
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-ship', async () => {
|
||||
testIfSelected('journey-ship', async () => {
|
||||
const tmpDir = createRoutingWorkDir('ship');
|
||||
try {
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
@@ -450,7 +474,7 @@ export default app;
|
||||
}
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-docs', async () => {
|
||||
testIfSelected('journey-docs', async () => {
|
||||
const tmpDir = createRoutingWorkDir('docs');
|
||||
try {
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
@@ -487,7 +511,7 @@ export default app;
|
||||
}
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-retro', async () => {
|
||||
testIfSelected('journey-retro', async () => {
|
||||
const tmpDir = createRoutingWorkDir('retro');
|
||||
try {
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
@@ -530,7 +554,7 @@ export default app;
|
||||
}
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-design-system', async () => {
|
||||
testIfSelected('journey-design-system', async () => {
|
||||
const tmpDir = createRoutingWorkDir('design-system');
|
||||
try {
|
||||
|
||||
@@ -559,7 +583,7 @@ export default app;
|
||||
}
|
||||
}, 150_000);
|
||||
|
||||
test.concurrent('journey-visual-qa', async () => {
|
||||
testIfSelected('journey-visual-qa', async () => {
|
||||
const tmpDir = createRoutingWorkDir('visual-qa');
|
||||
try {
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
|
||||
Reference in New Issue
Block a user