mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 11:45:20 +02:00
feat: run Gemini and Codex E2E tests in worktrees
Switch both test suites from cwd: ROOT to worktree isolation. Gemini (--yolo) no longer pollutes the working tree. Codex (read-only) gets worktree for consistency. Useful changes are harvested as patches for cherry-picking. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+16
-6
@@ -13,12 +13,13 @@
|
||||
* Skips gracefully when prerequisites are not met.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, afterAll } from 'bun:test';
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { runCodexSkill, parseCodexJSONL, installSkillToTempHome } from './helpers/codex-session-runner';
|
||||
import type { CodexResult } from './helpers/codex-session-runner';
|
||||
import { EvalCollector } from './helpers/eval-store';
|
||||
import type { EvalTestEntry } from './helpers/eval-store';
|
||||
import { selectTests, detectBaseBranch, getChangedFiles, E2E_TOUCHFILES, GLOBAL_TOUCHFILES } from './helpers/touchfiles';
|
||||
import { createTestWorktree, harvestAndCleanup } from './helpers/e2e-helpers';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
@@ -118,16 +119,25 @@ afterAll(async () => {
|
||||
// --- Tests ---
|
||||
|
||||
describeCodex('Codex E2E', () => {
|
||||
let testWorktree: string;
|
||||
|
||||
beforeAll(() => {
|
||||
testWorktree = createTestWorktree('codex');
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
harvestAndCleanup('codex');
|
||||
});
|
||||
|
||||
testIfSelected('codex-discover-skill', async () => {
|
||||
// Install gstack-review skill to a temp HOME and ask Codex to list skills
|
||||
const skillDir = path.join(ROOT, '.agents', 'skills', 'gstack-review');
|
||||
const skillDir = path.join(testWorktree, '.agents', 'skills', 'gstack-review');
|
||||
|
||||
const result = await runCodexSkill({
|
||||
skillDir,
|
||||
prompt: 'List any skills or instructions you have available. Just list the names.',
|
||||
timeoutMs: 60_000,
|
||||
cwd: ROOT,
|
||||
cwd: testWorktree,
|
||||
skillName: 'gstack-review',
|
||||
});
|
||||
|
||||
@@ -153,14 +163,14 @@ describeCodex('Codex E2E', () => {
|
||||
// code review, and produce structured review output with findings/issues.
|
||||
// Accepts Codex timeout (exit 124/137) as non-failure since that's a CLI perf issue.
|
||||
testIfSelected('codex-review-findings', async () => {
|
||||
// Install gstack-review skill and ask Codex to review the current repo
|
||||
const skillDir = path.join(ROOT, '.agents', 'skills', 'gstack-review');
|
||||
// Install gstack-review skill and ask Codex to review the worktree
|
||||
const skillDir = path.join(testWorktree, '.agents', 'skills', 'gstack-review');
|
||||
|
||||
const result = await runCodexSkill({
|
||||
skillDir,
|
||||
prompt: 'Run the gstack-review skill on this repository. Review the current branch diff and report your findings.',
|
||||
timeoutMs: 540_000,
|
||||
cwd: ROOT,
|
||||
cwd: testWorktree,
|
||||
skillName: 'gstack-review',
|
||||
});
|
||||
|
||||
|
||||
+15
-5
@@ -13,11 +13,12 @@
|
||||
* Skips gracefully when prerequisites are not met.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, afterAll } from 'bun:test';
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { runGeminiSkill } from './helpers/gemini-session-runner';
|
||||
import type { GeminiResult } from './helpers/gemini-session-runner';
|
||||
import { EvalCollector } from './helpers/eval-store';
|
||||
import { selectTests, detectBaseBranch, getChangedFiles, GLOBAL_TOUCHFILES } from './helpers/touchfiles';
|
||||
import { createTestWorktree, harvestAndCleanup } from './helpers/e2e-helpers';
|
||||
import * as path from 'path';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
@@ -114,13 +115,22 @@ afterAll(async () => {
|
||||
// --- Tests ---
|
||||
|
||||
describeGemini('Gemini E2E', () => {
|
||||
let testWorktree: string;
|
||||
|
||||
beforeAll(() => {
|
||||
testWorktree = createTestWorktree('gemini');
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
harvestAndCleanup('gemini');
|
||||
});
|
||||
|
||||
testIfSelected('gemini-discover-skill', async () => {
|
||||
// Run Gemini in the repo root where .agents/skills/ exists
|
||||
// Run Gemini in an isolated worktree (has .agents/skills/ copied from ROOT)
|
||||
const result = await runGeminiSkill({
|
||||
prompt: 'List any skills or instructions you have available. Just list the names.',
|
||||
timeoutMs: 60_000,
|
||||
cwd: ROOT,
|
||||
cwd: testWorktree,
|
||||
});
|
||||
|
||||
logGeminiCost('gemini-discover-skill', result);
|
||||
@@ -139,11 +149,11 @@ describeGemini('Gemini E2E', () => {
|
||||
}, 120_000);
|
||||
|
||||
testIfSelected('gemini-review-findings', async () => {
|
||||
// Run gstack-review skill via Gemini on this repo
|
||||
// Run gstack-review skill via Gemini on worktree (isolated from main working tree)
|
||||
const result = await runGeminiSkill({
|
||||
prompt: 'Run the gstack-review skill on this repository. Review the current branch diff and report your findings.',
|
||||
timeoutMs: 540_000,
|
||||
cwd: ROOT,
|
||||
cwd: testWorktree,
|
||||
});
|
||||
|
||||
logGeminiCost('gemini-review-findings', result);
|
||||
|
||||
Reference in New Issue
Block a user