gstack/test/skill-e2e-cso.test.ts

import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
import { runSkillTest } from './helpers/session-runner';
import {
  ROOT, runId, evalsEnabled,
  describeIfSelected, logCost, recordE2E,
  createEvalCollector, finalizeEvalCollector,
} from './helpers/e2e-helpers';
import { spawnSync } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';

const evalCollector = createEvalCollector('e2e-cso');

afterAll(() => {
  finalizeEvalCollector(evalCollector);
});

// --- CSO v2 E2E Tests ---

describeIfSelected('CSO v2 — full audit', ['cso-full-audit'], () => {
  let csoDir: string;

  beforeAll(() => {
    csoDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-cso-'));

    const run = (cmd: string, args: string[]) =>
      spawnSync(cmd, args, { cwd: csoDir, stdio: 'pipe', timeout: 5000 });

    run('git', ['init', '-b', 'main']);
    run('git', ['config', 'user.email', 'test@test.com']);
    run('git', ['config', 'user.name', 'Test']);

    // Create a minimal app with a planted vulnerability
    fs.writeFileSync(path.join(csoDir, 'package.json'), JSON.stringify({
      name: 'cso-test-app',
      version: '1.0.0',
      dependencies: { express: '4.18.0' },
    }, null, 2));

    // Planted vuln: hardcoded API key
    fs.writeFileSync(path.join(csoDir, 'server.ts'), `
import express from 'express';
const app = express();
const API_KEY = "sk-1234567890abcdef1234567890abcdef";
app.get('/api/data', (req, res) => {
  const id = req.query.id;
  res.json({ data: \`result for \${id}\` });
});
app.listen(3000);
`);

    // Planted vuln: .env tracked by git
    fs.writeFileSync(path.join(csoDir, '.env'), 'DATABASE_URL=postgres://admin:secretpass@prod.db.example.com:5432/myapp\n');

    run('git', ['add', '.']);
    run('git', ['commit', '-m', 'initial']);
  });

  afterAll(() => {
    try { fs.rmSync(csoDir, { recursive: true, force: true }); } catch {}
  });

  test('/cso finds planted vulnerabilities', async () => {
    const result = await runSkillTest({
      prompt: `Read the file ${path.join(ROOT, 'cso', 'SKILL.md')} for the CSO skill instructions.

Run /cso on this repo (full daily audit, no flags).

IMPORTANT:
- Do NOT use AskUserQuestion — skip any interactive prompts.
- Focus on finding the planted vulnerabilities in this small repo.
- Produce the SECURITY FINDINGS table.
- Save the report to .gstack/security-reports/.`,
      workingDirectory: csoDir,
      maxTurns: 30,
      allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob', 'Agent'],
      timeout: 300_000,
    });

    logCost('cso', result);
    expect(result.exitReason).toBe('success');

    // Should detect hardcoded API key
    const output = result.output.toLowerCase();
    expect(
      output.includes('sk-') || output.includes('hardcoded') || output.includes('api key') || output.includes('api_key')
    ).toBe(true);

    // Should detect .env tracked by git
    expect(
      output.includes('.env') && (output.includes('tracked') || output.includes('gitignore'))
    ).toBe(true);

    // Should produce a findings table
    expect(
      output.includes('security findings') || output.includes('SECURITY FINDINGS')
    ).toBe(true);

    // Should save a report
    const reportDir = path.join(csoDir, '.gstack', 'security-reports');
    const reportExists = fs.existsSync(reportDir);
    if (reportExists) {
      const reports = fs.readdirSync(reportDir).filter(f => f.endsWith('.json'));
      expect(reports.length).toBeGreaterThanOrEqual(1);
    }

    recordE2E(evalCollector, 'cso-full-audit', 'e2e-cso', result);
  }, 300_000);
});

describeIfSelected('CSO v2 — diff mode', ['cso-diff-mode'], () => {
  let csoDiffDir: string;

  beforeAll(() => {
    csoDiffDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-cso-diff-'));

    const run = (cmd: string, args: string[]) =>
      spawnSync(cmd, args, { cwd: csoDiffDir, stdio: 'pipe', timeout: 5000 });

    run('git', ['init', '-b', 'main']);
    run('git', ['config', 'user.email', 'test@test.com']);
    run('git', ['config', 'user.name', 'Test']);

    // Clean initial commit
    fs.writeFileSync(path.join(csoDiffDir, 'package.json'), JSON.stringify({
      name: 'cso-diff-test', version: '1.0.0',
    }, null, 2));
    fs.writeFileSync(path.join(csoDiffDir, 'app.ts'), 'console.log("hello");\n');
    run('git', ['add', '.']);
    run('git', ['commit', '-m', 'initial']);

    // Feature branch with a vuln
    run('git', ['checkout', '-b', 'feat/add-webhook']);
    fs.writeFileSync(path.join(csoDiffDir, 'webhook.ts'), `
import express from 'express';
const app = express();
// No signature verification!
app.post('/webhook/stripe', (req, res) => {
  const event = req.body;
  processPayment(event);
  res.sendStatus(200);
});
`);
    run('git', ['add', '.']);
    run('git', ['commit', '-m', 'feat: add webhook']);
  });

  afterAll(() => {
    try { fs.rmSync(csoDiffDir, { recursive: true, force: true }); } catch {}
  });

  test('/cso --diff scopes to branch changes', async () => {
    const result = await runSkillTest({
      prompt: `Read the file ${path.join(ROOT, 'cso', 'SKILL.md')} for the CSO skill instructions.

Run /cso --diff on this repo. The base branch is "main".

IMPORTANT:
- Do NOT use AskUserQuestion — skip any interactive prompts.
- Focus on changes in the current branch vs main.
- The webhook.ts file was added on this branch — it should be analyzed.`,
      workingDirectory: csoDiffDir,
      maxTurns: 25,
      allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob', 'Agent'],
      timeout: 240_000,
    });

    logCost('cso', result);
    expect(result.exitReason).toBe('success');

    const output = result.output.toLowerCase();
    // Should mention webhook and missing signature verification
    expect(
      output.includes('webhook') && (output.includes('signature') || output.includes('verify'))
    ).toBe(true);

    recordE2E(evalCollector, 'cso-diff-mode', 'e2e-cso', result);
  }, 240_000);
});

describeIfSelected('CSO v2 — infra scope', ['cso-infra-scope'], () => {
  let csoInfraDir: string;

  beforeAll(() => {
    csoInfraDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-cso-infra-'));

    const run = (cmd: string, args: string[]) =>
      spawnSync(cmd, args, { cwd: csoInfraDir, stdio: 'pipe', timeout: 5000 });

    run('git', ['init', '-b', 'main']);
    run('git', ['config', 'user.email', 'test@test.com']);
    run('git', ['config', 'user.name', 'Test']);

    // CI workflow with unpinned action
    fs.mkdirSync(path.join(csoInfraDir, '.github', 'workflows'), { recursive: true });
    fs.writeFileSync(path.join(csoInfraDir, '.github', 'workflows', 'ci.yml'), `
name: CI
on: [push]
jobs:
  build:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: some-third-party/action@main
      - run: echo "Building..."
`);

    // Dockerfile running as root
    fs.writeFileSync(path.join(csoInfraDir, 'Dockerfile'), `
FROM node:20
WORKDIR /app
COPY . .
RUN npm install
EXPOSE 3000
CMD ["node", "server.js"]
`);

    run('git', ['add', '.']);
    run('git', ['commit', '-m', 'initial']);
  });

  afterAll(() => {
    try { fs.rmSync(csoInfraDir, { recursive: true, force: true }); } catch {}
  });

  test('/cso --infra runs infrastructure phases only', async () => {
    const result = await runSkillTest({
      prompt: `Read the file ${path.join(ROOT, 'cso', 'SKILL.md')} for the CSO skill instructions.

Run /cso --infra on this repo. This should run infrastructure-only phases (0-6, 12-14).

IMPORTANT:
- Do NOT use AskUserQuestion — skip any interactive prompts.
- This is a TINY repo with only 3 files: .github/workflows/ci.yml, Dockerfile, and package.json. Do NOT waste turns exploring — just read those files directly and audit them.
- The Dockerfile has no USER directive (runs as root). The CI workflow uses an unpinned third-party GitHub Action (some-third-party/action@main).
- Focus on infrastructure findings, NOT code-level OWASP scanning.
- Skip the preamble (gstack-update-check, telemetry, etc.) — go straight to the audit.
- Do NOT use the Agent tool for exploration or verification — read the files yourself. This repo is too small to need subagents.`,
      workingDirectory: csoInfraDir,
      maxTurns: 30,
      allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
      timeout: 360_000,
    });

    logCost('cso', result);
    expect(result.exitReason).toBe('success');

    const output = result.output.toLowerCase();
    // Should mention unpinned action or Dockerfile issues
    expect(
      output.includes('unpinned') || output.includes('third-party') ||
      output.includes('user directive') || output.includes('root')
    ).toBe(true);

    recordE2E(evalCollector, 'cso-infra-scope', 'e2e-cso', result);
  }, 360_000);
});