From 06063eef241d9af7b6be4e4f9b0b42ddc19bbe07 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Tue, 24 Mar 2026 14:19:25 -0700 Subject: [PATCH] fix: three flaky E2E test fixes - evals-periodic.yml: hardcode runner (matrix objects don't define 'runner' property, actionlint catches the error) - Remove setup-cookies-detect E2E: redundant with 30+ unit tests in browse/test/cookie-import-browser.test.ts; E2E just tested LLM instruction-following on a CI box with no browsers - ship-local-workflow: check branch existence on remote instead of counting commits (fragile with bare repos + --all) --- .github/workflows/evals-periodic.yml | 2 +- test/helpers/touchfiles.ts | 6 --- test/skill-e2e-workflow.test.ts | 77 +++++----------------------- 3 files changed, 13 insertions(+), 72 deletions(-) diff --git a/.github/workflows/evals-periodic.yml b/.github/workflows/evals-periodic.yml index e529dbf4..20035c45 100644 --- a/.github/workflows/evals-periodic.yml +++ b/.github/workflows/evals-periodic.yml @@ -56,7 +56,7 @@ jobs: ${{ env.IMAGE }}:latest evals: - runs-on: ${{ matrix.suite.runner || 'ubicloud-standard-2' }} + runs-on: ubicloud-standard-2 needs: build-image container: image: ${{ needs.build-image.outputs.image-tag }} diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts index 07bee96f..41736999 100644 --- a/test/helpers/touchfiles.ts +++ b/test/helpers/touchfiles.ts @@ -80,9 +80,6 @@ export const E2E_TOUCHFILES: Record = { 'ship-base-branch': ['ship/**', 'bin/gstack-repo-mode'], 'ship-local-workflow': ['ship/**', 'scripts/gen-skill-docs.ts'], - // Setup browser cookies - 'setup-cookies-detect': ['setup-browser-cookies/**'], - // Retro 'retro': ['retro/**'], 'retro-base-branch': ['retro/**'], @@ -207,9 +204,6 @@ export const E2E_TIERS: Record = { 'ship-coverage-audit': 'gate', 'ship-triage': 'gate', - // Setup browser cookies - 'setup-cookies-detect': 'gate', - // Retro — gate for cheap branch detection, periodic for full Opus retro 'retro': 'periodic', 'retro-base-branch': 'gate', diff --git a/test/skill-e2e-workflow.test.ts b/test/skill-e2e-workflow.test.ts index c290f6ab..598b65b8 100644 --- a/test/skill-e2e-workflow.test.ts +++ b/test/skill-e2e-workflow.test.ts @@ -175,83 +175,30 @@ describeIfSelected('Ship workflow E2E', ['ship-local-workflow'], () => { logCost('/ship local workflow', result); - // Check push succeeded — check the feature branch on the bare remote - // (bare repo HEAD points to main which only has 1 commit; the push goes to feature/ship-test) - const remoteLog = spawnSync('git', ['log', '--oneline', '--all'], { cwd: shipRemoteDir, stdio: 'pipe' }); - const remoteCommits = remoteLog.stdout.toString().trim().split('\n').filter(l => l.length > 0).length; + // Check push succeeded — verify the feature branch exists on the bare remote + const branchCheck = spawnSync('git', ['branch', '--list', 'feature/ship-test'], { cwd: shipRemoteDir, stdio: 'pipe' }); + const branchExists = branchCheck.stdout.toString().trim().length > 0; - // Check VERSION was bumped + // Check VERSION was bumped locally (even if push failed, this shows the LLM did the work) const versionContent = fs.existsSync(path.join(shipWorkDir, 'VERSION')) ? fs.readFileSync(path.join(shipWorkDir, 'VERSION'), 'utf-8').trim() : ''; const versionBumped = versionContent !== '0.1.0.0'; recordE2E(evalCollector, '/ship local workflow', 'Ship workflow E2E', result, { - passed: remoteCommits > 1 && ['success', 'error_max_turns'].includes(result.exitReason), + passed: branchExists && versionBumped && ['success', 'error_max_turns'].includes(result.exitReason), }); expect(['success', 'error_max_turns']).toContain(result.exitReason); - expect(remoteCommits).toBeGreaterThan(1); - console.log(`Remote commits: ${remoteCommits}, VERSION: ${versionContent}, bumped: ${versionBumped}`); + expect(branchExists).toBe(true); + expect(versionBumped).toBe(true); + console.log(`Branch pushed: ${branchExists}, VERSION: ${versionContent}, bumped: ${versionBumped}`); }, 150_000); }); -// --- Browser cookie detection smoke test --- - -describeIfSelected('Setup Browser Cookies E2E', ['setup-cookies-detect'], () => { - let cookieDir: string; - - beforeAll(() => { - cookieDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-cookies-')); - // Copy skill files - fs.mkdirSync(path.join(cookieDir, 'setup-browser-cookies'), { recursive: true }); - fs.copyFileSync( - path.join(ROOT, 'setup-browser-cookies', 'SKILL.md'), - path.join(cookieDir, 'setup-browser-cookies', 'SKILL.md'), - ); - }); - - afterAll(() => { - try { fs.rmSync(cookieDir, { recursive: true, force: true }); } catch {} - }); - - testConcurrentIfSelected('setup-cookies-detect', async () => { - const result = await runSkillTest({ - prompt: `Read setup-browser-cookies/SKILL.md for the cookie import workflow. - -This is a test environment. Check which browsers exist on this system by looking for their cookie database files. -IMPORTANT: You MUST write a file called ${cookieDir}/detected-browsers.md with your findings. -If you find browsers, list them. If you find NO browsers, write "No browsers detected" to the file. -The file must always be created regardless of results. -Do NOT launch the cookie picker UI — just detect and report.`, - workingDirectory: cookieDir, - maxTurns: 8, - timeout: 60_000, - testName: 'setup-cookies-detect', - runId, - }); - - logCost('/setup-browser-cookies detect', result); - - const detectPath = path.join(cookieDir, 'detected-browsers.md'); - const detectExists = fs.existsSync(detectPath); - const detectContent = detectExists ? fs.readFileSync(detectPath, 'utf-8') : ''; - const hasBrowserName = /chrome|arc|brave|edge|comet|safari|firefox/i.test(detectContent); - const hasNoBrowsers = /no browser|none|not found|not detected|could not|couldn't/i.test(detectContent); - - // On CI (headless Ubuntu), no browsers are installed — "no browsers detected" is valid - const contentValid = hasBrowserName || hasNoBrowsers; - - recordE2E(evalCollector, '/setup-browser-cookies detect', 'Setup Browser Cookies E2E', result, { - passed: detectExists && contentValid && ['success', 'error_max_turns'].includes(result.exitReason), - }); - - expect(['success', 'error_max_turns']).toContain(result.exitReason); - expect(detectExists).toBe(true); - if (detectExists) { - expect(contentValid).toBe(true); - } - }, 90_000); -}); +// setup-cookies-detect REMOVED: The cookie-import-browser module has 30+ thorough +// unit tests in browse/test/cookie-import-browser.test.ts (decryption, profile +// detection, error handling, path traversal). The E2E just tested LLM instruction- +// following ("write a file saying no browsers") on a CI box with no browsers. // --- gstack-upgrade E2E ---