chore: merge origin/main (resolve CHANGELOG/VERSION/SKILL.md conflicts)

2026-07-06 16:17:56 +02:00 · 2026-03-23 09:10:26 -07:00
parent 55f9bb46c9 ffd9ab29b9
commit ab7b6d5ce0
69 changed files with 1735 additions and 531 deletions
@@ -139,6 +139,9 @@ describeCodex('Codex E2E', () => {

    expect(result.exitCode).toBe(0);
    expect(result.output.length).toBeGreaterThan(0);
+    // Skill loading errors mean our generated SKILL.md files are broken
+    expect(result.stderr).not.toContain('invalid');
+    expect(result.stderr).not.toContain('Skipped loading');
    // The output should reference the skill name in some form
    const outputLower = result.output.toLowerCase();
    expect(
@@ -139,6 +139,25 @@ describe('gen-skill-docs', () => {
    }
  });

+  test(`every Codex SKILL.md description stays within ${MAX_SKILL_DESCRIPTION_LENGTH} chars`, () => {
+    const agentsDir = path.join(ROOT, '.agents', 'skills');
+    if (!fs.existsSync(agentsDir)) return; // skip if not generated
+    for (const entry of fs.readdirSync(agentsDir, { withFileTypes: true })) {
+      if (!entry.isDirectory()) continue;
+      const skillMd = path.join(agentsDir, entry.name, 'SKILL.md');
+      if (!fs.existsSync(skillMd)) continue;
+      const content = fs.readFileSync(skillMd, 'utf-8');
+      const description = extractDescription(content);
+      expect(description.length).toBeLessThanOrEqual(MAX_SKILL_DESCRIPTION_LENGTH);
+    }
+  });
+
+  test('package.json version matches VERSION file', () => {
+    const pkg = JSON.parse(fs.readFileSync(path.join(ROOT, 'package.json'), 'utf-8'));
+    const version = fs.readFileSync(path.join(ROOT, 'VERSION'), 'utf-8').trim();
+    expect(pkg.version).toBe(version);
+  });
+
  test('generated files are fresh (match --dry-run)', () => {
    const result = Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--dry-run'], {
      cwd: ROOT,
@@ -206,6 +225,17 @@ describe('gen-skill-docs', () => {
    expect(content).toContain('~/.gstack/analytics');
  });

+  test('preamble .pending-* glob is zsh-safe (uses find, not shell glob)', () => {
+    for (const skill of ALL_SKILLS) {
+      const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
+      if (!content.includes('.pending-')) continue;
+      // Must NOT have a bare shell glob ".pending-*" outside of find's -name argument
+      expect(content).not.toMatch(/for _PF in [^\n]*\/\.pending-\*/);
+      // Must use find to avoid zsh NOMATCH error on glob expansion
+      expect(content).toContain("find ~/.gstack/analytics -maxdepth 1 -name '.pending-*'");
+    }
+  });
+
  test('preamble-using skills have correct skill name in telemetry', () => {
    const PREAMBLE_SKILLS = [
      { dir: '.', name: 'gstack' },
@@ -399,6 +429,20 @@ describe('REVIEW_DASHBOARD resolver', () => {
    expect(content).toContain('REVIEW READINESS DASHBOARD');
  });

+  test('dashboard treats review as a valid Eng Review source', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
+    expect(content).toContain('plan-eng-review, review, plan-design-review');
+    expect(content).toContain('`review` (diff-scoped pre-landing review)');
+    expect(content).toContain('`plan-eng-review` (plan-stage architecture review)');
+    expect(content).toContain('from either \\`review\\` or \\`plan-eng-review\\`');
+  });
+
+  test('shared dashboard propagates review source to plan-eng-review', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'plan-eng-review', 'SKILL.md'), 'utf-8');
+    expect(content).toContain('plan-eng-review, review, plan-design-review');
+    expect(content).toContain('`review` (diff-scoped pre-landing review)');
+  });
+
  test('resolver output contains key dashboard elements', () => {
    const content = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8');
    expect(content).toContain('VERDICT');
@@ -928,6 +972,14 @@ describe('Codex generation (--host codex)', () => {
    }
  });

+  test('root gstack bundle has OpenAI metadata for Codex skill browsing', () => {
+    const rootMetadata = path.join(ROOT, 'agents', 'openai.yaml');
+    expect(fs.existsSync(rootMetadata)).toBe(true);
+    const content = fs.readFileSync(rootMetadata, 'utf-8');
+    expect(content).toContain('display_name: "gstack"');
+    expect(content).toContain('Use $gstack to locate the bundled gstack skills.');
+  });
+
  test('codexSkillName mapping: root is gstack, others are gstack-{dir}', () => {
    // Root → gstack
    expect(fs.existsSync(path.join(AGENTS_DIR, 'gstack', 'SKILL.md'))).toBe(true);
@@ -957,6 +1009,17 @@ describe('Codex generation (--host codex)', () => {
    }
  });

+  test('all Codex skills have agents/openai.yaml metadata', () => {
+    for (const skill of CODEX_SKILLS) {
+      const metadata = path.join(AGENTS_DIR, skill.codexName, 'agents', 'openai.yaml');
+      expect(fs.existsSync(metadata)).toBe(true);
+      const content = fs.readFileSync(metadata, 'utf-8');
+      expect(content).toContain(`display_name: "${skill.codexName}"`);
+      expect(content).toContain('short_description:');
+      expect(content).toContain('allow_implicit_invocation: true');
+    }
+  });
+
  test('no .claude/skills/ in Codex output', () => {
    for (const skill of CODEX_SKILLS) {
      const content = fs.readFileSync(path.join(AGENTS_DIR, skill.codexName, 'SKILL.md'), 'utf-8');
@@ -27,6 +27,7 @@ export interface CodexResult {
  durationMs: number;       // Wall clock time
  sessionId: string | null; // Thread ID for session continuity
  rawLines: string[];       // Raw JSONL lines for debugging
+  stderr: string;           // Stderr output (skill loading errors, auth failures)
 }

 // --- JSONL parser (ported from Python in codex/SKILL.md.tmpl) ---
@@ -98,7 +99,8 @@ export function parseCodexJSONL(lines: string[]): ParsedCodexJSONL {

 /**
 * Install a SKILL.md into a temp HOME directory for Codex to discover.
- * Creates ~/.codex/skills/{skillName}/SKILL.md in the temp HOME.
+ * Creates ~/.codex/skills/{skillName}/SKILL.md in the temp HOME and copies
+ * agents/openai.yaml when present so Codex sees the same metadata as a real install.
 *
 * Returns the temp HOME path. Caller is responsible for cleanup.
 */
@@ -116,6 +118,13 @@ export function installSkillToTempHome(
    fs.copyFileSync(srcSkill, path.join(destDir, 'SKILL.md'));
  }

+  const srcOpenAIYaml = path.join(skillDir, 'agents', 'openai.yaml');
+  if (fs.existsSync(srcOpenAIYaml)) {
+    const destAgentsDir = path.join(destDir, 'agents');
+    fs.mkdirSync(destAgentsDir, { recursive: true });
+    fs.copyFileSync(srcOpenAIYaml, path.join(destAgentsDir, 'openai.yaml'));
+  }
+
  return home;
 }

@@ -159,6 +168,7 @@ export async function runCodexSkill(opts: {
      durationMs: Date.now() - startTime,
      sessionId: null,
      rawLines: [],
+      stderr: '',
    };
  }

@@ -274,6 +284,7 @@ export async function runCodexSkill(opts: {
      durationMs,
      sessionId: parsed.sessionId,
      rawLines: collectedLines,
+      stderr,
    };
  } finally {
    // Clean up temp HOME
@@ -83,6 +83,11 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
  // Global discover
  'global-discover':   ['bin/gstack-global-discover.ts', 'test/global-discover.test.ts'],

+  // CSO
+  'cso-full-audit':   ['cso/**'],
+  'cso-diff-mode':    ['cso/**'],
+  'cso-infra-scope':  ['cso/**'],
+
  // Document-release
  'document-release': ['document-release/**'],

@@ -0,0 +1,258 @@
+import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
+import { runSkillTest } from './helpers/session-runner';
+import {
+  ROOT, runId, evalsEnabled,
+  describeIfSelected, logCost, recordE2E,
+  createEvalCollector, finalizeEvalCollector,
+} from './helpers/e2e-helpers';
+import { spawnSync } from 'child_process';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+
+const evalCollector = createEvalCollector('e2e-cso');
+
+afterAll(() => {
+  finalizeEvalCollector(evalCollector);
+});
+
+// --- CSO v2 E2E Tests ---
+
+describeIfSelected('CSO v2 — full audit', ['cso-full-audit'], () => {
+  let csoDir: string;
+
+  beforeAll(() => {
+    csoDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-cso-'));
+
+    const run = (cmd: string, args: string[]) =>
+      spawnSync(cmd, args, { cwd: csoDir, stdio: 'pipe', timeout: 5000 });
+
+    run('git', ['init', '-b', 'main']);
+    run('git', ['config', 'user.email', 'test@test.com']);
+    run('git', ['config', 'user.name', 'Test']);
+
+    // Create a minimal app with a planted vulnerability
+    fs.writeFileSync(path.join(csoDir, 'package.json'), JSON.stringify({
+      name: 'cso-test-app',
+      version: '1.0.0',
+      dependencies: { express: '4.18.0' },
+    }, null, 2));
+
+    // Planted vuln: hardcoded API key
+    fs.writeFileSync(path.join(csoDir, 'server.ts'), `
+import express from 'express';
+const app = express();
+const API_KEY = "sk-1234567890abcdef1234567890abcdef";
+app.get('/api/data', (req, res) => {
+  const id = req.query.id;
+  res.json({ data: \`result for \${id}\` });
+});
+app.listen(3000);
+`);
+
+    // Planted vuln: .env tracked by git
+    fs.writeFileSync(path.join(csoDir, '.env'), 'DATABASE_URL=postgres://admin:secretpass@prod.db.example.com:5432/myapp\n');
+
+    run('git', ['add', '.']);
+    run('git', ['commit', '-m', 'initial']);
+  });
+
+  afterAll(() => {
+    try { fs.rmSync(csoDir, { recursive: true, force: true }); } catch {}
+  });
+
+  test('/cso finds planted vulnerabilities', async () => {
+    const result = await runSkillTest({
+      prompt: `Read the file ${path.join(ROOT, 'cso', 'SKILL.md')} for the CSO skill instructions.
+
+Run /cso on this repo (full daily audit, no flags).
+
+IMPORTANT:
+- Do NOT use AskUserQuestion — skip any interactive prompts.
+- Focus on finding the planted vulnerabilities in this small repo.
+- Produce the SECURITY FINDINGS table.
+- Save the report to .gstack/security-reports/.`,
+      workingDirectory: csoDir,
+      maxTurns: 30,
+      allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob', 'Agent'],
+      timeout: 300_000,
+    });
+
+    logCost('cso', result);
+    expect(result.exitReason).toBe('success');
+
+    // Should detect hardcoded API key
+    const output = result.output.toLowerCase();
+    expect(
+      output.includes('sk-') || output.includes('hardcoded') || output.includes('api key') || output.includes('api_key')
+    ).toBe(true);
+
+    // Should detect .env tracked by git
+    expect(
+      output.includes('.env') && (output.includes('tracked') || output.includes('gitignore'))
+    ).toBe(true);
+
+    // Should produce a findings table
+    expect(
+      output.includes('security findings') || output.includes('SECURITY FINDINGS')
+    ).toBe(true);
+
+    // Should save a report
+    const reportDir = path.join(csoDir, '.gstack', 'security-reports');
+    const reportExists = fs.existsSync(reportDir);
+    if (reportExists) {
+      const reports = fs.readdirSync(reportDir).filter(f => f.endsWith('.json'));
+      expect(reports.length).toBeGreaterThanOrEqual(1);
+    }
+
+    recordE2E(evalCollector, 'cso-full-audit', 'e2e-cso', result);
+  }, 300_000);
+});
+
+describeIfSelected('CSO v2 — diff mode', ['cso-diff-mode'], () => {
+  let csoDiffDir: string;
+
+  beforeAll(() => {
+    csoDiffDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-cso-diff-'));
+
+    const run = (cmd: string, args: string[]) =>
+      spawnSync(cmd, args, { cwd: csoDiffDir, stdio: 'pipe', timeout: 5000 });
+
+    run('git', ['init', '-b', 'main']);
+    run('git', ['config', 'user.email', 'test@test.com']);
+    run('git', ['config', 'user.name', 'Test']);
+
+    // Clean initial commit
+    fs.writeFileSync(path.join(csoDiffDir, 'package.json'), JSON.stringify({
+      name: 'cso-diff-test', version: '1.0.0',
+    }, null, 2));
+    fs.writeFileSync(path.join(csoDiffDir, 'app.ts'), 'console.log("hello");\n');
+    run('git', ['add', '.']);
+    run('git', ['commit', '-m', 'initial']);
+
+    // Feature branch with a vuln
+    run('git', ['checkout', '-b', 'feat/add-webhook']);
+    fs.writeFileSync(path.join(csoDiffDir, 'webhook.ts'), `
+import express from 'express';
+const app = express();
+// No signature verification!
+app.post('/webhook/stripe', (req, res) => {
+  const event = req.body;
+  processPayment(event);
+  res.sendStatus(200);
+});
+`);
+    run('git', ['add', '.']);
+    run('git', ['commit', '-m', 'feat: add webhook']);
+  });
+
+  afterAll(() => {
+    try { fs.rmSync(csoDiffDir, { recursive: true, force: true }); } catch {}
+  });
+
+  test('/cso --diff scopes to branch changes', async () => {
+    const result = await runSkillTest({
+      prompt: `Read the file ${path.join(ROOT, 'cso', 'SKILL.md')} for the CSO skill instructions.
+
+Run /cso --diff on this repo. The base branch is "main".
+
+IMPORTANT:
+- Do NOT use AskUserQuestion — skip any interactive prompts.
+- Focus on changes in the current branch vs main.
+- The webhook.ts file was added on this branch — it should be analyzed.`,
+      workingDirectory: csoDiffDir,
+      maxTurns: 25,
+      allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob', 'Agent'],
+      timeout: 240_000,
+    });
+
+    logCost('cso', result);
+    expect(result.exitReason).toBe('success');
+
+    const output = result.output.toLowerCase();
+    // Should mention webhook and missing signature verification
+    expect(
+      output.includes('webhook') && (output.includes('signature') || output.includes('verify'))
+    ).toBe(true);
+
+    recordE2E(evalCollector, 'cso-diff-mode', 'e2e-cso', result);
+  }, 240_000);
+});
+
+describeIfSelected('CSO v2 — infra scope', ['cso-infra-scope'], () => {
+  let csoInfraDir: string;
+
+  beforeAll(() => {
+    csoInfraDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-cso-infra-'));
+
+    const run = (cmd: string, args: string[]) =>
+      spawnSync(cmd, args, { cwd: csoInfraDir, stdio: 'pipe', timeout: 5000 });
+
+    run('git', ['init', '-b', 'main']);
+    run('git', ['config', 'user.email', 'test@test.com']);
+    run('git', ['config', 'user.name', 'Test']);
+
+    // CI workflow with unpinned action
+    fs.mkdirSync(path.join(csoInfraDir, '.github', 'workflows'), { recursive: true });
+    fs.writeFileSync(path.join(csoInfraDir, '.github', 'workflows', 'ci.yml'), `
+name: CI
+on: [push]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: some-third-party/action@main
+      - run: echo "Building..."
+`);
+
+    // Dockerfile running as root
+    fs.writeFileSync(path.join(csoInfraDir, 'Dockerfile'), `
+FROM node:20
+WORKDIR /app
+COPY . .
+RUN npm install
+EXPOSE 3000
+CMD ["node", "server.js"]
+`);
+
+    run('git', ['add', '.']);
+    run('git', ['commit', '-m', 'initial']);
+  });
+
+  afterAll(() => {
+    try { fs.rmSync(csoInfraDir, { recursive: true, force: true }); } catch {}
+  });
+
+  test('/cso --infra runs infrastructure phases only', async () => {
+    const result = await runSkillTest({
+      prompt: `Read the file ${path.join(ROOT, 'cso', 'SKILL.md')} for the CSO skill instructions.
+
+Run /cso --infra on this repo. This should run infrastructure-only phases (0-6, 12-14).
+
+IMPORTANT:
+- Do NOT use AskUserQuestion — skip any interactive prompts.
+- This is a TINY repo with only 3 files: .github/workflows/ci.yml, Dockerfile, and package.json. Do NOT waste turns exploring — just read those files directly and audit them.
+- The Dockerfile has no USER directive (runs as root). The CI workflow uses an unpinned third-party GitHub Action (some-third-party/action@main).
+- Focus on infrastructure findings, NOT code-level OWASP scanning.
+- Skip the preamble (gstack-update-check, telemetry, etc.) — go straight to the audit.
+- Do NOT use the Agent tool for exploration or verification — read the files yourself. This repo is too small to need subagents.`,
+      workingDirectory: csoInfraDir,
+      maxTurns: 30,
+      allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
+      timeout: 360_000,
+    });
+
+    logCost('cso', result);
+    expect(result.exitReason).toBe('success');
+
+    const output = result.output.toLowerCase();
+    // Should mention unpinned action or Dockerfile issues
+    expect(
+      output.includes('unpinned') || output.includes('third-party') ||
+      output.includes('user directive') || output.includes('root')
+    ).toBe(true);
+
+    recordE2E(evalCollector, 'cso-infra-scope', 'e2e-cso', result);
+  }, 360_000);
+});
@@ -1369,6 +1369,18 @@ describe('Codex skill', () => {
    expect(content).toContain('codex exec');
  });

+  test('/review persists a review-log entry for ship readiness', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8');
+    expect(content).toContain('"skill":"review"');
+    expect(content).toContain('"issues_found":N');
+    expect(content).toContain('Persist Eng Review result');
+  });
+
+  test('/ship gate suggests /review or /plan-eng-review when Eng Review is missing', () => {
+    const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
+    expect(content).toContain('Abort — run /review or /plan-eng-review first');
+  });
+
  test('Review Readiness Dashboard includes Adversarial Review row', () => {
    const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8');
    expect(content).toContain('Adversarial');