Merge remote-tracking branch 'origin/main' into garrytan/fix-startup-noise

This commit is contained in:
Garry Tan
2026-03-24 13:41:09 -07:00
12 changed files with 429 additions and 46 deletions
+1 -1
View File
@@ -69,7 +69,7 @@ The server writes `.gstack/browse.json` (atomic write via tmp + rename, mode 0o6
{ "pid": 12345, "port": 34567, "token": "uuid-v4", "startedAt": "...", "binaryVersion": "abc123" }
```
The CLI reads this file to find the server. If the file is missing, stale, or the PID is dead, the CLI spawns a new server.
The CLI reads this file to find the server. If the file is missing or the server fails an HTTP health check, the CLI spawns a new server. On Windows, PID-based process detection is unreliable in Bun binaries, so the health check (GET /health) is the primary liveness signal on all platforms.
### Port selection
+26
View File
@@ -1,5 +1,31 @@
# Changelog
## [0.11.15.0] - 2026-03-24 — E2E Test Coverage for Plan Reviews & Codex
### Added
- **E2E tests verify plan review reports appear at the bottom of plans.** The `/plan-eng-review` review report is now tested end-to-end — if it stops writing `## GSTACK REVIEW REPORT` to the plan file, the test catches it.
- **E2E tests verify Codex is offered in every plan skill.** Four new lightweight tests confirm that `/office-hours`, `/plan-ceo-review`, `/plan-design-review`, and `/plan-eng-review` all check for Codex availability, prompt the user, and handle the fallback when Codex is unavailable.
### For contributors
- New E2E tests in `test/skill-e2e-plan.test.ts`: `plan-review-report`, `codex-offered-eng-review`, `codex-offered-ceo-review`, `codex-offered-office-hours`, `codex-offered-design-review`
- Updated touchfile mappings and selection count assertions
- Added `touchfiles` to the documented global touchfile list in CLAUDE.md
## [0.11.14.0] - 2026-03-24 — Windows Browse Fix
### Fixed
- **Browse engine now works on Windows.** Three compounding bugs blocked all Windows `/browse` users: the server process died when the CLI exited (Bun's `unref()` doesn't truly detach on Windows), the health check never ran because `process.kill(pid, 0)` is broken in Bun binaries on Windows, and Chromium's sandbox failed when spawned through the Bun→Node process chain. All three are now fixed. Credits to @fqueiro (PR #191) for identifying the `detached: true` approach.
- **Health check runs first on all platforms.** `ensureServer()` now tries an HTTP health check before falling back to PID-based detection — more reliable on every OS, not just Windows.
- **Startup errors are logged to disk.** When the server fails to start, errors are written to `~/.gstack/browse-startup-error.log` so Windows users (who lose stderr due to process detachment) can debug.
- **Chromium sandbox disabled on Windows.** Chromium's sandbox requires elevated privileges when spawned through the Bun→Node chain — now disabled on Windows only.
### For contributors
- New tests for `isServerHealthy()` and startup error logging in `browse/test/config.test.ts`
## [0.11.13.0] - 2026-03-24 — Worktree Isolation + Infrastructure Elegance
### Added
+1 -1
View File
@@ -29,7 +29,7 @@ against the previous run.
**Diff-based test selection:** `test:evals` and `test:e2e` auto-select tests based
on `git diff` against the base branch. Each test declares its file dependencies in
`test/helpers/touchfiles.ts`. Changes to global touchfiles (session-runner, eval-store,
llm-judge, gen-skill-docs) trigger all tests. Use `EVALS_ALL=1` or the `:all` script
llm-judge, gen-skill-docs, touchfiles) trigger all tests. Use `EVALS_ALL=1` or the `:all` script
variants to force all tests. Run `eval:select` to preview which tests would run.
## Testing
+1 -1
View File
@@ -1 +1 @@
0.11.13.0
0.11.15.0
+9 -1
View File
@@ -89,6 +89,10 @@ export class BrowserManager {
this.browser = await chromium.launch({
headless: useHeadless,
// On Windows, Chromium's sandbox fails when the server is spawned through
// the Bun→Node process chain (GitHub #276). Disable it — local daemon
// browsing user-specified URLs has marginal sandbox benefit.
chromiumSandbox: process.platform !== 'win32',
...(launchArgs.length > 0 ? { args: launchArgs } : {}),
});
@@ -492,7 +496,11 @@ export class BrowserManager {
// 2. Launch new headed browser (try-catch — if this fails, headless stays running)
let newBrowser: Browser;
try {
newBrowser = await chromium.launch({ headless: false, timeout: 15000 });
newBrowser = await chromium.launch({
headless: false,
timeout: 15000,
chromiumSandbox: process.platform !== 'win32',
});
} catch (err: unknown) {
const msg = err instanceof Error ? err.message : String(err);
return `ERROR: Cannot open headed browser — ${msg}. Headless browser still running.`;
+105 -39
View File
@@ -76,6 +76,13 @@ export function resolveNodeServerScript(
const NODE_SERVER_SCRIPT = IS_WINDOWS ? resolveNodeServerScript() : null;
// On Windows, hard-fail if server-node.mjs is missing — the Bun path is known broken.
if (IS_WINDOWS && !NODE_SERVER_SCRIPT) {
throw new Error(
'server-node.mjs not found. Run `bun run build` to generate the Windows server bundle.'
);
}
interface ServerState {
pid: number;
port: number;
@@ -96,6 +103,19 @@ function readState(): ServerState | null {
}
function isProcessAlive(pid: number): boolean {
if (IS_WINDOWS) {
// Bun's compiled binary can't signal Windows PIDs (always throws ESRCH).
// Use tasklist as a fallback. Only for one-shot calls — too slow for polling loops.
try {
const result = Bun.spawnSync(
['tasklist', '/FI', `PID eq ${pid}`, '/NH', '/FO', 'CSV'],
{ stdout: 'pipe', stderr: 'pipe', timeout: 3000 }
);
return result.stdout.toString().includes(`"${pid}"`);
} catch {
return false;
}
}
try {
process.kill(pid, 0);
return true;
@@ -104,10 +124,42 @@ function isProcessAlive(pid: number): boolean {
}
}
/**
* HTTP health check definitive proof the server is alive and responsive.
* Used in all polling loops instead of isProcessAlive() (which is slow on Windows).
*/
export async function isServerHealthy(port: number): Promise<boolean> {
try {
const resp = await fetch(`http://127.0.0.1:${port}/health`, {
signal: AbortSignal.timeout(2000),
});
if (!resp.ok) return false;
const health = await resp.json() as any;
return health.status === 'healthy';
} catch {
return false;
}
}
// ─── Process Management ─────────────────────────────────────────
async function killServer(pid: number): Promise<void> {
if (!isProcessAlive(pid)) return;
if (IS_WINDOWS) {
// taskkill /T /F kills the process tree (Node + Chromium)
try {
Bun.spawnSync(
['taskkill', '/PID', String(pid), '/T', '/F'],
{ stdout: 'pipe', stderr: 'pipe', timeout: 5000 }
);
} catch {}
const deadline = Date.now() + 2000;
while (Date.now() < deadline && isProcessAlive(pid)) {
await Bun.sleep(100);
}
return;
}
try { process.kill(pid, 'SIGTERM'); } catch { return; }
// Wait up to 2s for graceful shutdown
@@ -127,6 +179,10 @@ async function killServer(pid: number): Promise<void> {
* Verifies PID ownership before sending signals.
*/
function cleanupLegacyState(): void {
// No legacy state on Windows — /tmp and `ps` don't exist, and gstack
// never ran on Windows before the Node.js fallback was added.
if (IS_WINDOWS) return;
try {
const files = fs.readdirSync('/tmp').filter(f => f.startsWith('browse-server') && f.endsWith('.json'));
for (const file of files) {
@@ -164,44 +220,65 @@ function cleanupLegacyState(): void {
async function startServer(): Promise<ServerState> {
ensureStateDir(config);
// Clean up stale state file
// Clean up stale state file and error log
try { fs.unlinkSync(config.stateFile); } catch {}
try { fs.unlinkSync(path.join(config.stateDir, 'browse-startup-error.log')); } catch {}
// Start server as detached background process.
// On Windows, Bun can't launch/connect to Playwright's Chromium (oven-sh/bun#4253, #9911).
// Fall back to running the server under Node.js with Bun API polyfills.
const useNode = IS_WINDOWS && NODE_SERVER_SCRIPT;
const serverCmd = useNode
? ['node', NODE_SERVER_SCRIPT]
: ['bun', 'run', SERVER_SCRIPT];
const proc = Bun.spawn(serverCmd, {
stdio: ['ignore', 'pipe', 'pipe'],
env: { ...process.env, BROWSE_STATE_FILE: config.stateFile },
});
let proc: any = null;
// Don't hold the CLI open
proc.unref();
if (IS_WINDOWS && NODE_SERVER_SCRIPT) {
// Windows: Bun.spawn() + proc.unref() doesn't truly detach on Windows —
// when the CLI exits, the server dies with it. Use Node's child_process.spawn
// with { detached: true } instead, which is the gold standard for Windows
// process independence. Credit: PR #191 by @fqueiro.
const launcherCode =
`const{spawn}=require('child_process');` +
`spawn(process.execPath,[${JSON.stringify(NODE_SERVER_SCRIPT)}],` +
`{detached:true,stdio:'ignore',env:Object.assign({},process.env,` +
`{BROWSE_STATE_FILE:${JSON.stringify(config.stateFile)}})}).unref()`;
Bun.spawnSync(['node', '-e', launcherCode], { stdio: 'ignore' });
} else {
// macOS/Linux: Bun.spawn + unref works correctly
proc = Bun.spawn(['bun', 'run', SERVER_SCRIPT], {
stdio: ['ignore', 'pipe', 'pipe'],
env: { ...process.env, BROWSE_STATE_FILE: config.stateFile },
});
proc.unref();
}
// Wait for state file to appear
// Wait for server to become healthy.
// Use HTTP health check (not isProcessAlive) — it's fast (~instant ECONNREFUSED)
// and works reliably on all platforms including Windows.
const start = Date.now();
while (Date.now() - start < MAX_START_WAIT) {
const state = readState();
if (state && isProcessAlive(state.pid)) {
if (state && await isServerHealthy(state.port)) {
return state;
}
await Bun.sleep(100);
}
// If we get here, server didn't start in time
// Try to read stderr for error message
const stderr = proc.stderr;
if (stderr) {
const reader = stderr.getReader();
// Server didn't start in time — try to get error details
if (proc?.stderr) {
// macOS/Linux: read stderr from the spawned process
const reader = proc.stderr.getReader();
const { value } = await reader.read();
if (value) {
const errText = new TextDecoder().decode(value);
throw new Error(`Server failed to start:\n${errText}`);
}
} else {
// Windows: check startup error log (server writes errors to disk since
// stderr is unavailable due to stdio: 'ignore' for detachment)
const errorLogPath = path.join(config.stateDir, 'browse-startup-error.log');
try {
const errorLog = fs.readFileSync(errorLogPath, 'utf-8').trim();
if (errorLog) {
throw new Error(`Server failed to start:\n${errorLog}`);
}
} catch (e: any) {
if (e.code !== 'ENOENT') throw e;
}
}
throw new Error(`Server failed to start within ${MAX_START_WAIT / 1000}s`);
}
@@ -237,7 +314,10 @@ function acquireServerLock(): (() => void) | null {
async function ensureServer(): Promise<ServerState> {
const state = readState();
if (state && isProcessAlive(state.pid)) {
// Health-check-first: HTTP is definitive proof the server is alive and responsive.
// This replaces the PID-gated approach which breaks on Windows (Bun's process.kill
// always throws ESRCH for Windows PIDs in compiled binaries).
if (state && await isServerHealthy(state.port)) {
// Check for binary version mismatch (auto-restart on update)
const currentVersion = readVersionHash();
if (currentVersion && state.binaryVersion && currentVersion !== state.binaryVersion) {
@@ -245,21 +325,7 @@ async function ensureServer(): Promise<ServerState> {
await killServer(state.pid);
return startServer();
}
// Server appears alive — do a health check
try {
const resp = await fetch(`http://127.0.0.1:${state.port}/health`, {
signal: AbortSignal.timeout(2000),
});
if (resp.ok) {
const health = await resp.json() as any;
if (health.status === 'healthy') {
return state;
}
}
} catch {
// Health check failed — server is dead or unhealthy
}
return state;
}
// Ensure state directory exists before lock acquisition (lock file lives there)
@@ -273,7 +339,7 @@ async function ensureServer(): Promise<ServerState> {
const start = Date.now();
while (Date.now() - start < MAX_START_WAIT) {
const freshState = readState();
if (freshState && isProcessAlive(freshState.pid)) return freshState;
if (freshState && await isServerHealthy(freshState.port)) return freshState;
await Bun.sleep(200);
}
throw new Error('Timed out waiting for another instance to start the server');
@@ -282,7 +348,7 @@ async function ensureServer(): Promise<ServerState> {
try {
// Re-read state under lock in case another process just started the server
const freshState = readState();
if (freshState && isProcessAlive(freshState.pid)) {
if (freshState && await isServerHealthy(freshState.port)) {
return freshState;
}
+16
View File
@@ -286,6 +286,13 @@ async function shutdown() {
// Handle signals
process.on('SIGTERM', shutdown);
process.on('SIGINT', shutdown);
// Windows: taskkill /F bypasses SIGTERM, but 'exit' fires for some shutdown paths.
// Defense-in-depth — primary cleanup is the CLI's stale-state detection via health check.
if (process.platform === 'win32') {
process.on('exit', () => {
try { fs.unlinkSync(config.stateFile); } catch {}
});
}
// ─── Start ─────────────────────────────────────────────────────
async function start() {
@@ -365,5 +372,14 @@ async function start() {
start().catch((err) => {
console.error(`[browse] Failed to start: ${err.message}`);
// Write error to disk for the CLI to read — on Windows, the CLI can't capture
// stderr because the server is launched with detached: true, stdio: 'ignore'.
try {
const errorLogPath = path.join(config.stateDir, 'browse-startup-error.log');
fs.mkdirSync(config.stateDir, { recursive: true });
fs.writeFileSync(errorLogPath, `${new Date().toISOString()} ${err.message}\n${err.stack || ''}\n`);
} catch {
// stateDir may not exist — nothing more we can do
}
process.exit(1);
});
+66
View File
@@ -248,3 +248,69 @@ describe('version mismatch detection', () => {
expect(shouldRestart).toBe(false);
});
});
describe('isServerHealthy', () => {
const { isServerHealthy } = require('../src/cli');
const http = require('http');
test('returns true for a healthy server', async () => {
const server = http.createServer((_req: any, res: any) => {
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ status: 'healthy' }));
});
await new Promise<void>(resolve => server.listen(0, resolve));
const port = server.address().port;
try {
expect(await isServerHealthy(port)).toBe(true);
} finally {
server.close();
}
});
test('returns false for an unhealthy server', async () => {
const server = http.createServer((_req: any, res: any) => {
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ status: 'unhealthy' }));
});
await new Promise<void>(resolve => server.listen(0, resolve));
const port = server.address().port;
try {
expect(await isServerHealthy(port)).toBe(false);
} finally {
server.close();
}
});
test('returns false when server is not running', async () => {
// Use a port that's almost certainly not in use
expect(await isServerHealthy(59999)).toBe(false);
});
test('returns false on non-200 response', async () => {
const server = http.createServer((_req: any, res: any) => {
res.writeHead(500);
res.end('Internal Server Error');
});
await new Promise<void>(resolve => server.listen(0, resolve));
const port = server.address().port;
try {
expect(await isServerHealthy(port)).toBe(false);
} finally {
server.close();
}
});
});
describe('startup error log', () => {
test('write and read error log', () => {
const tmpDir = path.join(os.tmpdir(), `browse-error-log-test-${Date.now()}`);
fs.mkdirSync(tmpDir, { recursive: true });
const errorLogPath = path.join(tmpDir, 'browse-startup-error.log');
const errorMsg = 'Cannot find module playwright';
fs.writeFileSync(errorLogPath, `2026-03-23T00:00:00.000Z ${errorMsg}\n`);
const content = fs.readFileSync(errorLogPath, 'utf-8').trim();
expect(content).toContain(errorMsg);
expect(content).toMatch(/^\d{4}-\d{2}-\d{2}T/); // ISO timestamp prefix
fs.rmSync(tmpDir, { recursive: true, force: true });
});
});
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "gstack",
"version": "0.11.13.0",
"version": "0.11.14.0",
"description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.",
"license": "MIT",
"type": "module",
+7
View File
@@ -68,6 +68,13 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
'plan-ceo-review-benefits': ['plan-ceo-review/**', 'scripts/gen-skill-docs.ts'],
'plan-eng-review': ['plan-eng-review/**'],
'plan-eng-review-artifact': ['plan-eng-review/**'],
'plan-review-report': ['plan-eng-review/**', 'scripts/gen-skill-docs.ts'],
// Codex offering verification
'codex-offered-office-hours': ['office-hours/**', 'scripts/gen-skill-docs.ts'],
'codex-offered-ceo-review': ['plan-ceo-review/**', 'scripts/gen-skill-docs.ts'],
'codex-offered-design-review': ['plan-design-review/**', 'scripts/gen-skill-docs.ts'],
'codex-offered-eng-review': ['plan-eng-review/**', 'scripts/gen-skill-docs.ts'],
// Ship
'ship-base-branch': ['ship/**', 'bin/gstack-repo-mode'],
+193
View File
@@ -535,6 +535,199 @@ Write your summary to ${benefitsDir}/benefits-summary.md`,
}, 180_000);
});
// --- Plan Review Report E2E ---
// Verifies that plan-eng-review writes a "## GSTACK REVIEW REPORT" section
// to the bottom of the plan file (the living review status footer).
describeIfSelected('Plan Review Report E2E', ['plan-review-report'], () => {
let planDir: string;
beforeAll(() => {
planDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-review-report-'));
const run = (cmd: string, args: string[]) =>
spawnSync(cmd, args, { cwd: planDir, stdio: 'pipe', timeout: 5000 });
run('git', ['init', '-b', 'main']);
run('git', ['config', 'user.email', 'test@test.com']);
run('git', ['config', 'user.name', 'Test']);
fs.writeFileSync(path.join(planDir, 'plan.md'), `# Plan: Add Notifications System
## Context
We're building a real-time notification system for our SaaS app.
## Changes
1. WebSocket server for push notifications
2. Notification preferences API
3. Email digest fallback for offline users
4. PostgreSQL table for notification storage
## Architecture
- WebSocket: Socket.io on Express
- Queue: Bull + Redis for email digests
- Storage: PostgreSQL notifications table
- Frontend: React toast component
## Open questions
- Retry policy for failed WebSocket delivery?
- Max notifications stored per user?
`);
run('git', ['add', '.']);
run('git', ['commit', '-m', 'add plan']);
// Copy plan-eng-review skill
fs.mkdirSync(path.join(planDir, 'plan-eng-review'), { recursive: true });
fs.copyFileSync(
path.join(ROOT, 'plan-eng-review', 'SKILL.md'),
path.join(planDir, 'plan-eng-review', 'SKILL.md'),
);
});
afterAll(() => {
try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {}
});
test('/plan-eng-review writes GSTACK REVIEW REPORT to plan file', async () => {
const result = await runSkillTest({
prompt: `Read plan-eng-review/SKILL.md for the review workflow.
Read plan.md that's the plan to review. This is a standalone plan document, not a codebase skip any codebase exploration steps.
Proceed directly to the full review. Skip any AskUserQuestion calls this is non-interactive.
Skip the preamble bash block, lake intro, telemetry, and contributor mode sections.
CRITICAL REQUIREMENT: plan.md IS the plan file for this review session. After completing your review, you MUST write a "## GSTACK REVIEW REPORT" section to the END of plan.md, exactly as described in the "Plan File Review Report" section of SKILL.md. If gstack-review-read is not available or returns NO_REVIEWS, write the placeholder table with all four review rows (CEO, Codex, Eng, Design). Use the Edit tool to append to plan.md do NOT overwrite the existing plan content.
This review report at the bottom of the plan is the MOST IMPORTANT deliverable of this test.`,
workingDirectory: planDir,
maxTurns: 20,
timeout: 360_000,
testName: 'plan-review-report',
runId,
model: 'claude-opus-4-6',
});
logCost('/plan-eng-review report', result);
recordE2E(evalCollector, '/plan-review-report', 'Plan Review Report E2E', result, {
passed: ['success', 'error_max_turns'].includes(result.exitReason),
});
expect(['success', 'error_max_turns']).toContain(result.exitReason);
// Verify the review report was written to the plan file
const planContent = fs.readFileSync(path.join(planDir, 'plan.md'), 'utf-8');
// Original plan content should still be present
expect(planContent).toContain('# Plan: Add Notifications System');
expect(planContent).toContain('WebSocket');
// Review report section must exist
expect(planContent).toContain('## GSTACK REVIEW REPORT');
// Report should be at the bottom of the file
const reportIndex = planContent.lastIndexOf('## GSTACK REVIEW REPORT');
const afterReport = planContent.slice(reportIndex);
// Should contain the review table with standard rows
expect(afterReport).toMatch(/\|\s*Review\s*\|/);
expect(afterReport).toContain('CEO Review');
expect(afterReport).toContain('Eng Review');
expect(afterReport).toContain('Design Review');
console.log('Plan review report found at bottom of plan.md');
}, 420_000);
});
// --- Codex Offering E2E ---
// Verifies that Codex is properly offered (with availability check, user prompt,
// and fallback) in office-hours, plan-ceo-review, plan-design-review, plan-eng-review.
describeIfSelected('Codex Offering E2E', [
'codex-offered-office-hours', 'codex-offered-ceo-review',
'codex-offered-design-review', 'codex-offered-eng-review',
], () => {
let testDir: string;
beforeAll(() => {
testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-codex-offer-'));
const run = (cmd: string, args: string[]) =>
spawnSync(cmd, args, { cwd: testDir, stdio: 'pipe', timeout: 5000 });
run('git', ['init', '-b', 'main']);
run('git', ['config', 'user.email', 'test@test.com']);
run('git', ['config', 'user.name', 'Test']);
fs.writeFileSync(path.join(testDir, 'README.md'), '# Test Project\n');
run('git', ['add', '.']);
run('git', ['commit', '-m', 'init']);
// Copy all 4 SKILL.md files
for (const skill of ['office-hours', 'plan-ceo-review', 'plan-design-review', 'plan-eng-review']) {
fs.mkdirSync(path.join(testDir, skill), { recursive: true });
fs.copyFileSync(
path.join(ROOT, skill, 'SKILL.md'),
path.join(testDir, skill, 'SKILL.md'),
);
}
});
afterAll(() => {
try { fs.rmSync(testDir, { recursive: true, force: true }); } catch {}
});
async function checkCodexOffering(skill: string, testName: string, featureName: string) {
const result = await runSkillTest({
prompt: `Read ${skill}/SKILL.md. Search for ALL sections related to "codex", "outside voice", or "second opinion".
Summarize the Codex/${featureName} integration answer these specific questions:
1. How is Codex availability checked? (what exact bash command?)
2. How is the user prompted? (via AskUserQuestion? what are the options?)
3. What happens when Codex is NOT available? (fallback to subagent? skip entirely?)
4. Is this step blocking (gates the workflow) or optional (can be skipped)?
5. What prompt/context is sent to Codex?
Write your summary to ${testDir}/${testName}-summary.md`,
workingDirectory: testDir,
maxTurns: 8,
timeout: 120_000,
testName,
runId,
});
logCost(`/${skill} codex offering`, result);
recordE2E(evalCollector, `/${testName}`, 'Codex Offering E2E', result);
expect(result.exitReason).toBe('success');
const summaryPath = path.join(testDir, `${testName}-summary.md`);
expect(fs.existsSync(summaryPath)).toBe(true);
const summary = fs.readFileSync(summaryPath, 'utf-8').toLowerCase();
// All skills should have codex availability check
expect(summary).toMatch(/which codex/);
// All skills should have fallback behavior
expect(summary).toMatch(/fallback|subagent|unavailable|not available|skip/);
// All skills should show it's optional/non-blocking
expect(summary).toMatch(/optional|non.?blocking|skip|not.*required/);
console.log(`${skill}: Codex offering verified`);
}
testConcurrentIfSelected('codex-offered-office-hours', async () => {
await checkCodexOffering('office-hours', 'codex-offered-office-hours', 'second opinion');
}, 180_000);
testConcurrentIfSelected('codex-offered-ceo-review', async () => {
await checkCodexOffering('plan-ceo-review', 'codex-offered-ceo-review', 'outside voice');
}, 180_000);
testConcurrentIfSelected('codex-offered-design-review', async () => {
await checkCodexOffering('plan-design-review', 'codex-offered-design-review', 'design outside voices');
}, 180_000);
testConcurrentIfSelected('codex-offered-eng-review', async () => {
await checkCodexOffering('plan-eng-review', 'codex-offered-eng-review', 'outside voice');
}, 180_000);
});
// Module-level afterAll — finalize eval collector after all tests complete
afterAll(async () => {
await finalizeEvalCollector(evalCollector);
+3 -2
View File
@@ -80,8 +80,9 @@ describe('selectTests', () => {
expect(result.selected).toContain('plan-ceo-review-selective');
expect(result.selected).toContain('plan-ceo-review-benefits');
expect(result.selected).toContain('autoplan-core');
expect(result.selected.length).toBe(4);
expect(result.skipped.length).toBe(Object.keys(E2E_TOUCHFILES).length - 4);
expect(result.selected).toContain('codex-offered-ceo-review');
expect(result.selected.length).toBe(5);
expect(result.skipped.length).toBe(Object.keys(E2E_TOUCHFILES).length - 5);
});
test('global touchfile triggers ALL tests', () => {