diff --git a/test/skill-e2e.test.ts b/test/skill-e2e.test.ts index 13539278..d8cbc36a 100644 --- a/test/skill-e2e.test.ts +++ b/test/skill-e2e.test.ts @@ -2841,6 +2841,76 @@ Output the diagram directly.`, }, 180_000); }); +// --- Codex skill E2E --- + +describeIfSelected('Codex skill E2E', ['codex-review'], () => { + let codexDir: string; + + beforeAll(() => { + codexDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-codex-')); + + const run = (cmd: string, args: string[]) => + spawnSync(cmd, args, { cwd: codexDir, stdio: 'pipe', timeout: 5000 }); + + run('git', ['init']); + run('git', ['config', 'user.email', 'test@test.com']); + run('git', ['config', 'user.name', 'Test']); + + // Commit a clean base on main + fs.writeFileSync(path.join(codexDir, 'app.rb'), '# clean base\nclass App\nend\n'); + run('git', ['add', 'app.rb']); + run('git', ['commit', '-m', 'initial commit']); + + // Create feature branch with vulnerable code (reuse review fixture) + run('git', ['checkout', '-b', 'feature/add-vuln']); + const vulnContent = fs.readFileSync(path.join(ROOT, 'test', 'fixtures', 'review-eval-vuln.rb'), 'utf-8'); + fs.writeFileSync(path.join(codexDir, 'user_controller.rb'), vulnContent); + run('git', ['add', 'user_controller.rb']); + run('git', ['commit', '-m', 'add vulnerable controller']); + + // Copy the codex skill file + fs.copyFileSync(path.join(ROOT, 'codex', 'SKILL.md'), path.join(codexDir, 'codex-SKILL.md')); + }); + + afterAll(() => { + try { fs.rmSync(codexDir, { recursive: true, force: true }); } catch {} + }); + + test('/codex review produces findings and GATE verdict', async () => { + // Check codex is available — skip if not installed + const codexCheck = spawnSync('which', ['codex'], { stdio: 'pipe', timeout: 3000 }); + if (codexCheck.status !== 0) { + console.warn('codex CLI not installed — skipping E2E test'); + return; + } + + const result = await runSkillTest({ + prompt: `You are in a git repo on branch feature/add-vuln with changes against main. +Read codex-SKILL.md for the /codex skill instructions. +Run /codex review to review the current diff against main. +Write the full output (including the GATE verdict) to ${codexDir}/codex-output.md`, + workingDirectory: codexDir, + maxTurns: 10, + timeout: 300_000, + testName: 'codex-review', + runId, + }); + + logCost('/codex review', result); + recordE2E('/codex review', 'Codex skill E2E', result); + expect(result.exitReason).toBe('success'); + + // Check that output file was created with review content + const outputPath = path.join(codexDir, 'codex-output.md'); + if (fs.existsSync(outputPath)) { + const output = fs.readFileSync(outputPath, 'utf-8'); + // Should contain the CODEX SAYS header or GATE verdict + const hasCodexOutput = output.includes('CODEX') || output.includes('GATE') || output.includes('codex'); + expect(hasCodexOutput).toBe(true); + } + }, 360_000); +}); + // Module-level afterAll — finalize eval collector after all tests complete afterAll(async () => { if (evalCollector) { diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts index b26f7c31..422e4932 100644 --- a/test/skill-validation.test.ts +++ b/test/skill-validation.test.ts @@ -1121,3 +1121,95 @@ describe('QA report template', () => { expect(content).toContain('**Precondition:**'); }); }); + +// --- Codex skill validation --- + +describe('Codex skill', () => { + test('codex/SKILL.md exists and has correct frontmatter', () => { + const content = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md'), 'utf-8'); + expect(content).toContain('name: codex'); + expect(content).toContain('version: 1.0.0'); + expect(content).toContain('allowed-tools:'); + }); + + test('codex/SKILL.md contains all three modes', () => { + const content = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md'), 'utf-8'); + expect(content).toContain('Step 2A: Review Mode'); + expect(content).toContain('Step 2B: Challenge'); + expect(content).toContain('Step 2C: Consult Mode'); + }); + + test('codex/SKILL.md contains gate verdict logic', () => { + const content = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md'), 'utf-8'); + expect(content).toContain('[P1]'); + expect(content).toContain('GATE: PASS'); + expect(content).toContain('GATE: FAIL'); + }); + + test('codex/SKILL.md contains session continuity', () => { + const content = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md'), 'utf-8'); + expect(content).toContain('codex-session-id'); + expect(content).toContain('codex exec resume'); + }); + + test('codex/SKILL.md contains cost tracking', () => { + const content = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md'), 'utf-8'); + expect(content).toContain('tokens used'); + expect(content).toContain('Est. cost'); + }); + + test('codex/SKILL.md contains cross-model comparison', () => { + const content = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md'), 'utf-8'); + expect(content).toContain('CROSS-MODEL ANALYSIS'); + expect(content).toContain('Agreement rate'); + }); + + test('codex/SKILL.md contains review log persistence', () => { + const content = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md'), 'utf-8'); + expect(content).toContain('codex-review'); + expect(content).toContain('reviews.jsonl'); + }); + + test('codex/SKILL.md uses which for binary discovery, not hardcoded path', () => { + const content = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md'), 'utf-8'); + expect(content).toContain('which codex'); + expect(content).not.toContain('/opt/homebrew/bin/codex'); + }); + + test('codex/SKILL.md contains error handling for missing binary and API key', () => { + const content = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md'), 'utf-8'); + expect(content).toContain('NOT_FOUND'); + expect(content).toContain('OPENAI_API_KEY'); + }); + + test('codex/SKILL.md uses mktemp for temp files', () => { + const content = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md'), 'utf-8'); + expect(content).toContain('mktemp'); + }); + + test('codex integration in /review offers second opinion', () => { + const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8'); + expect(content).toContain('Codex second opinion'); + expect(content).toContain('codex review'); + expect(content).toContain('adversarial'); + }); + + test('codex integration in /ship offers review gate', () => { + const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); + expect(content).toContain('Codex'); + expect(content).toContain('codex review'); + expect(content).toContain('codex-review'); + }); + + test('codex integration in /plan-eng-review offers plan critique', () => { + const content = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8'); + expect(content).toContain('Codex'); + expect(content).toContain('codex exec'); + }); + + test('Review Readiness Dashboard includes Codex Review row', () => { + const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); + expect(content).toContain('Codex Review'); + expect(content).toContain('codex-review'); + }); +});