Merge remote-tracking branch 'origin/main' into garrytan/e2e-test-triage

# Conflicts:
#	CLAUDE.md
This commit is contained in:
Garry Tan
2026-03-24 08:16:27 -07:00
6 changed files with 225 additions and 4 deletions
+13
View File
@@ -1,5 +1,18 @@
# Changelog
## [0.11.15.0] - 2026-03-24 — E2E Test Coverage for Plan Reviews & Codex
### Added
- **E2E tests verify plan review reports appear at the bottom of plans.** The `/plan-eng-review` review report is now tested end-to-end — if it stops writing `## GSTACK REVIEW REPORT` to the plan file, the test catches it.
- **E2E tests verify Codex is offered in every plan skill.** Four new lightweight tests confirm that `/office-hours`, `/plan-ceo-review`, `/plan-design-review`, and `/plan-eng-review` all check for Codex availability, prompt the user, and handle the fallback when Codex is unavailable.
### For contributors
- New E2E tests in `test/skill-e2e-plan.test.ts`: `plan-review-report`, `codex-offered-eng-review`, `codex-offered-ceo-review`, `codex-offered-office-hours`, `codex-offered-design-review`
- Updated touchfile mappings and selection count assertions
- Added `touchfiles` to the documented global touchfile list in CLAUDE.md
## [0.11.14.0] - 2026-03-24 — Windows Browse Fix
### Fixed
+1 -1
View File
@@ -1 +1 @@
0.11.14.0
0.11.15.0
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "gstack",
"version": "0.11.14.0",
"version": "0.11.15.0",
"description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.",
"license": "MIT",
"type": "module",
+14
View File
@@ -68,6 +68,13 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
'plan-ceo-review-benefits': ['plan-ceo-review/**', 'scripts/gen-skill-docs.ts'],
'plan-eng-review': ['plan-eng-review/**'],
'plan-eng-review-artifact': ['plan-eng-review/**'],
'plan-review-report': ['plan-eng-review/**', 'scripts/gen-skill-docs.ts'],
// Codex offering verification
'codex-offered-office-hours': ['office-hours/**', 'scripts/gen-skill-docs.ts'],
'codex-offered-ceo-review': ['plan-ceo-review/**', 'scripts/gen-skill-docs.ts'],
'codex-offered-design-review': ['plan-design-review/**', 'scripts/gen-skill-docs.ts'],
'codex-offered-eng-review': ['plan-eng-review/**', 'scripts/gen-skill-docs.ts'],
// Ship
'ship-base-branch': ['ship/**', 'bin/gstack-repo-mode'],
@@ -186,6 +193,13 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
'plan-eng-review': 'periodic',
'plan-eng-review-artifact': 'periodic',
'plan-eng-coverage-audit': 'gate',
'plan-review-report': 'gate',
// Codex offering verification
'codex-offered-office-hours': 'gate',
'codex-offered-ceo-review': 'gate',
'codex-offered-design-review': 'gate',
'codex-offered-eng-review': 'gate',
// Ship — gate (end-to-end ship path)
'ship-base-branch': 'gate',
+193
View File
@@ -535,6 +535,199 @@ Write your summary to ${benefitsDir}/benefits-summary.md`,
}, 180_000);
});
// --- Plan Review Report E2E ---
// Verifies that plan-eng-review writes a "## GSTACK REVIEW REPORT" section
// to the bottom of the plan file (the living review status footer).
describeIfSelected('Plan Review Report E2E', ['plan-review-report'], () => {
let planDir: string;
beforeAll(() => {
planDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-review-report-'));
const run = (cmd: string, args: string[]) =>
spawnSync(cmd, args, { cwd: planDir, stdio: 'pipe', timeout: 5000 });
run('git', ['init', '-b', 'main']);
run('git', ['config', 'user.email', 'test@test.com']);
run('git', ['config', 'user.name', 'Test']);
fs.writeFileSync(path.join(planDir, 'plan.md'), `# Plan: Add Notifications System
## Context
We're building a real-time notification system for our SaaS app.
## Changes
1. WebSocket server for push notifications
2. Notification preferences API
3. Email digest fallback for offline users
4. PostgreSQL table for notification storage
## Architecture
- WebSocket: Socket.io on Express
- Queue: Bull + Redis for email digests
- Storage: PostgreSQL notifications table
- Frontend: React toast component
## Open questions
- Retry policy for failed WebSocket delivery?
- Max notifications stored per user?
`);
run('git', ['add', '.']);
run('git', ['commit', '-m', 'add plan']);
// Copy plan-eng-review skill
fs.mkdirSync(path.join(planDir, 'plan-eng-review'), { recursive: true });
fs.copyFileSync(
path.join(ROOT, 'plan-eng-review', 'SKILL.md'),
path.join(planDir, 'plan-eng-review', 'SKILL.md'),
);
});
afterAll(() => {
try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {}
});
test('/plan-eng-review writes GSTACK REVIEW REPORT to plan file', async () => {
const result = await runSkillTest({
prompt: `Read plan-eng-review/SKILL.md for the review workflow.
Read plan.md — that's the plan to review. This is a standalone plan document, not a codebase — skip any codebase exploration steps.
Proceed directly to the full review. Skip any AskUserQuestion calls — this is non-interactive.
Skip the preamble bash block, lake intro, telemetry, and contributor mode sections.
CRITICAL REQUIREMENT: plan.md IS the plan file for this review session. After completing your review, you MUST write a "## GSTACK REVIEW REPORT" section to the END of plan.md, exactly as described in the "Plan File Review Report" section of SKILL.md. If gstack-review-read is not available or returns NO_REVIEWS, write the placeholder table with all four review rows (CEO, Codex, Eng, Design). Use the Edit tool to append to plan.md — do NOT overwrite the existing plan content.
This review report at the bottom of the plan is the MOST IMPORTANT deliverable of this test.`,
workingDirectory: planDir,
maxTurns: 20,
timeout: 360_000,
testName: 'plan-review-report',
runId,
model: 'claude-opus-4-6',
});
logCost('/plan-eng-review report', result);
recordE2E(evalCollector, '/plan-review-report', 'Plan Review Report E2E', result, {
passed: ['success', 'error_max_turns'].includes(result.exitReason),
});
expect(['success', 'error_max_turns']).toContain(result.exitReason);
// Verify the review report was written to the plan file
const planContent = fs.readFileSync(path.join(planDir, 'plan.md'), 'utf-8');
// Original plan content should still be present
expect(planContent).toContain('# Plan: Add Notifications System');
expect(planContent).toContain('WebSocket');
// Review report section must exist
expect(planContent).toContain('## GSTACK REVIEW REPORT');
// Report should be at the bottom of the file
const reportIndex = planContent.lastIndexOf('## GSTACK REVIEW REPORT');
const afterReport = planContent.slice(reportIndex);
// Should contain the review table with standard rows
expect(afterReport).toMatch(/\|\s*Review\s*\|/);
expect(afterReport).toContain('CEO Review');
expect(afterReport).toContain('Eng Review');
expect(afterReport).toContain('Design Review');
console.log('Plan review report found at bottom of plan.md');
}, 420_000);
});
// --- Codex Offering E2E ---
// Verifies that Codex is properly offered (with availability check, user prompt,
// and fallback) in office-hours, plan-ceo-review, plan-design-review, plan-eng-review.
describeIfSelected('Codex Offering E2E', [
'codex-offered-office-hours', 'codex-offered-ceo-review',
'codex-offered-design-review', 'codex-offered-eng-review',
], () => {
let testDir: string;
beforeAll(() => {
testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-codex-offer-'));
const run = (cmd: string, args: string[]) =>
spawnSync(cmd, args, { cwd: testDir, stdio: 'pipe', timeout: 5000 });
run('git', ['init', '-b', 'main']);
run('git', ['config', 'user.email', 'test@test.com']);
run('git', ['config', 'user.name', 'Test']);
fs.writeFileSync(path.join(testDir, 'README.md'), '# Test Project\n');
run('git', ['add', '.']);
run('git', ['commit', '-m', 'init']);
// Copy all 4 SKILL.md files
for (const skill of ['office-hours', 'plan-ceo-review', 'plan-design-review', 'plan-eng-review']) {
fs.mkdirSync(path.join(testDir, skill), { recursive: true });
fs.copyFileSync(
path.join(ROOT, skill, 'SKILL.md'),
path.join(testDir, skill, 'SKILL.md'),
);
}
});
afterAll(() => {
try { fs.rmSync(testDir, { recursive: true, force: true }); } catch {}
});
async function checkCodexOffering(skill: string, testName: string, featureName: string) {
const result = await runSkillTest({
prompt: `Read ${skill}/SKILL.md. Search for ALL sections related to "codex", "outside voice", or "second opinion".
Summarize the Codex/${featureName} integration — answer these specific questions:
1. How is Codex availability checked? (what exact bash command?)
2. How is the user prompted? (via AskUserQuestion? what are the options?)
3. What happens when Codex is NOT available? (fallback to subagent? skip entirely?)
4. Is this step blocking (gates the workflow) or optional (can be skipped)?
5. What prompt/context is sent to Codex?
Write your summary to ${testDir}/${testName}-summary.md`,
workingDirectory: testDir,
maxTurns: 8,
timeout: 120_000,
testName,
runId,
});
logCost(`/${skill} codex offering`, result);
recordE2E(evalCollector, `/${testName}`, 'Codex Offering E2E', result);
expect(result.exitReason).toBe('success');
const summaryPath = path.join(testDir, `${testName}-summary.md`);
expect(fs.existsSync(summaryPath)).toBe(true);
const summary = fs.readFileSync(summaryPath, 'utf-8').toLowerCase();
// All skills should have codex availability check
expect(summary).toMatch(/which codex/);
// All skills should have fallback behavior
expect(summary).toMatch(/fallback|subagent|unavailable|not available|skip/);
// All skills should show it's optional/non-blocking
expect(summary).toMatch(/optional|non.?blocking|skip|not.*required/);
console.log(`${skill}: Codex offering verified`);
}
testConcurrentIfSelected('codex-offered-office-hours', async () => {
await checkCodexOffering('office-hours', 'codex-offered-office-hours', 'second opinion');
}, 180_000);
testConcurrentIfSelected('codex-offered-ceo-review', async () => {
await checkCodexOffering('plan-ceo-review', 'codex-offered-ceo-review', 'outside voice');
}, 180_000);
testConcurrentIfSelected('codex-offered-design-review', async () => {
await checkCodexOffering('plan-design-review', 'codex-offered-design-review', 'design outside voices');
}, 180_000);
testConcurrentIfSelected('codex-offered-eng-review', async () => {
await checkCodexOffering('plan-eng-review', 'codex-offered-eng-review', 'outside voice');
}, 180_000);
});
// Module-level afterAll — finalize eval collector after all tests complete
afterAll(async () => {
await finalizeEvalCollector(evalCollector);
+3 -2
View File
@@ -81,8 +81,9 @@ describe('selectTests', () => {
expect(result.selected).toContain('plan-ceo-review-selective');
expect(result.selected).toContain('plan-ceo-review-benefits');
expect(result.selected).toContain('autoplan-core');
expect(result.selected.length).toBe(4);
expect(result.skipped.length).toBe(Object.keys(E2E_TOUCHFILES).length - 4);
expect(result.selected).toContain('codex-offered-ceo-review');
expect(result.selected.length).toBe(5);
expect(result.skipped.length).toBe(Object.keys(E2E_TOUCHFILES).length - 5);
});
test('global touchfile triggers ALL tests', () => {