/** * E2E: /office-hours brain-writeback path under fake gbrain CLI. * * The matched-pair check for v1.50.0.0's "brain-aware planning actually * works under Claude Code" headline: prove that when a user runs * /office-hours with gbrain on PATH, the agent actually calls * `gbrain put office-hours/` with valid frontmatter. * * Approach: * 1. Regenerate office-hours/SKILL.md with --respect-detection against * a temp GSTACK_HOME that has detected:true. Snapshot the rendered * content (which now contains the compressed SAVE_RESULTS block), * then restore the canonical no-gbrain version so the working tree * stays clean. * 2. Write the snapshot into a temp workdir's office-hours/SKILL.md. * Also write docs/gbrain-write-surfaces.md so the agent can read the * template on demand (the compact block points to it). * 3. Write a fake `gbrain` shell script into workdir/bin/ with robust * argv quoting (printf %q) so heredoc payloads in --content survive * shell-to-shell. The fake logs every invocation + writes payloads * to a per-slug file for inspection. * 4. Run /office-hours via runSkillTest with workdir/bin/ first on PATH. * Feed a deterministic founder pitch + auto-decide instructions. * 5. Assert the argv log contains `gbrain put office-hours/`, the * payload file exists with valid YAML frontmatter, and entity stubs * were created. * * Periodic tier (~$0.50-1/run via claude -p, matches nearby * setup-gbrain-path4-* tests at touchfiles.ts:496-498). * * NOT verified by this test (out of scope, owned by docs/gbrain-write-surfaces.md): * - That gbrain itself persists what `gbrain put` is told (gbrain's * own contract) * - That `.gbrain-source` doesn't re-route writes (gbrain's contract) * - Source-targeting (no way to fake source resolution in a stub CLI) */ import { describe, test, expect, beforeAll, afterAll } from 'bun:test'; import { execFileSync, spawnSync } from 'child_process'; import { chmodSync, copyFileSync, existsSync, mkdirSync, mkdtempSync, readFileSync, readdirSync, rmSync, writeFileSync, } from 'fs'; import { tmpdir } from 'os'; import { join } from 'path'; import { runSkillTest } from './helpers/session-runner'; import { ROOT, runId, describeIfSelected, testConcurrentIfSelected, logCost, recordE2E, createEvalCollector, } from './helpers/e2e-helpers'; const evalCollector = createEvalCollector('e2e-office-hours-brain-writeback'); describeIfSelected( 'Office Hours Brain Writeback E2E', ['office-hours-brain-writeback'], () => { let workDir: string; let callsLogPath: string; let payloadDir: string; beforeAll(() => { workDir = mkdtempSync(join(tmpdir(), 'skill-e2e-brain-writeback-')); const run = (cmd: string, args: string[]) => spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 }); run('git', ['init', '-b', 'main']); run('git', ['config', 'user.email', 'test@test.com']); run('git', ['config', 'user.name', 'Test']); // Copy the founder pitch fixture into the workdir. const briefSrc = join( ROOT, 'test', 'fixtures', 'office-hours-brain-writeback', 'brief.md', ); copyFileSync(briefSrc, join(workDir, 'pitch.md')); // Generate a brain-aware office-hours/SKILL.md (with --respect-detection // against a temp GSTACK_HOME). Snapshot the content, restore the // canonical version, write the snapshot into the workdir. const tmpHome = mkdtempSync(join(tmpdir(), 'gbrain-detect-home-')); writeFileSync( join(tmpHome, 'gbrain-detection.json'), JSON.stringify({ gbrain_local_status: 'ok', gbrain_on_path: true, gbrain_version: 'test-0.41.0', }), ); const skillPath = join(ROOT, 'office-hours', 'SKILL.md'); const originalSkill = readFileSync(skillPath, 'utf-8'); try { execFileSync( 'bun', [ 'run', 'scripts/gen-skill-docs.ts', '--host', 'claude', '--respect-detection', ], { cwd: ROOT, env: { ...process.env, GSTACK_HOME: tmpHome }, stdio: ['ignore', 'pipe', 'pipe'], timeout: 60_000, }, ); const brainAwareSkill = readFileSync(skillPath, 'utf-8'); if (!brainAwareSkill.includes('gbrain put "office-hours/')) { throw new Error( 'Regenerated office-hours/SKILL.md does not contain gbrain put block. ' + 'Detection override may be broken — see test/gbrain-detection-override.test.ts.', ); } mkdirSync(join(workDir, 'office-hours'), { recursive: true }); writeFileSync(join(workDir, 'office-hours', 'SKILL.md'), brainAwareSkill); } finally { // Always restore the canonical SKILL.md so the working tree stays clean. writeFileSync(skillPath, originalSkill); rmSync(tmpHome, { recursive: true, force: true }); } // Copy docs/gbrain-write-surfaces.md so the compact resolver block's // on-demand reference resolves (the agent may read it for the full // template; we don't require this read but make it available). const docsSrc = join(ROOT, 'docs', 'gbrain-write-surfaces.md'); const docsDst = join(workDir, 'docs', 'gbrain-write-surfaces.md'); mkdirSync(join(workDir, 'docs'), { recursive: true }); copyFileSync(docsSrc, docsDst); // Set up the fake gbrain CLI with robust argv quoting + payload capture. callsLogPath = join(workDir, 'gbrain-calls.log'); payloadDir = join(workDir, 'gbrain-payloads'); mkdirSync(payloadDir, { recursive: true }); const binDir = join(workDir, 'bin'); mkdirSync(binDir, { recursive: true }); const fakeGbrain = `#!/bin/bash # Fake gbrain CLI for E2E test. Logs every invocation with shell-safe quoting # (printf %q) so --content "$(cat <<'EOF' ... EOF)" payloads survive intact. { printf 'gbrain'; for a in "$@"; do printf ' %q' "$a"; done; printf '\\n'; } \\ >> "${callsLogPath}" case "$1" in --version) echo "gbrain test-0.41.0"; exit 0 ;; search) echo "[]"; exit 0 ;; get_page) echo ""; exit 0 ;; put) SLUG="$2" shift 2 while [ -n "$1" ]; do if [ "$1" = "--content" ]; then PAYLOAD_DIR="${payloadDir}" mkdir -p "$PAYLOAD_DIR/$(dirname "$SLUG")" printf '%s' "$2" > "$PAYLOAD_DIR/$SLUG.md" break fi shift done exit 0 ;; esac exit 0 `; const fakePath = join(binDir, 'gbrain'); writeFileSync(fakePath, fakeGbrain); chmodSync(fakePath, 0o755); run('git', ['add', '.']); run('git', ['commit', '-m', 'fixture']); }); afterAll(() => { try { rmSync(workDir, { recursive: true, force: true }); } catch { // best effort } }); testConcurrentIfSelected( 'office-hours-brain-writeback', async () => { const result = await runSkillTest({ prompt: `Read office-hours/SKILL.md for the workflow. Read pitch.md — that's a founder pitch coming to office hours. Select Startup Mode. Skip any AskUserQuestion — this is non-interactive; auto-decide the recommended option for any question. For the diagnostic, assume the founder confirmed Q1 (strongest evidence = "230 from a single tweet + 51 paying creators in 6 weeks"), Q2 (status quo = "creators write ad-hoc checks or use opaque Patreon-style platforms"), and Q3 (forcing question already asked). Generate the design doc per Phase 5. The feature-slug value to substitute into the SAVE_RESULTS template's \`\` placeholder is exactly 'pixel-fund' (no path prefix — the template already provides the prefix). The \`gbrain\` binary is on PATH at ${workDir}/bin/gbrain. Apply the SAVE_RESULTS template literally: the slug should land at \`/pixel-fund\` per the resolver shape, with the actual design doc markdown body in the --content payload. Then enrich entity stubs for any named people or companies mentioned in the pitch. This is a test of the brain-writeback path. Do NOT skip the gbrain save step under any circumstance — the runtime guard ("skip if gbrain not on PATH") does NOT apply here because gbrain IS available. Do NOT explore gbrain --help; follow the SAVE_RESULTS template's exact CLI shape. If you encounter any AskUserQuestion, auto-decide recommended.`, workingDirectory: workDir, maxTurns: 12, timeout: 360_000, testName: 'office-hours-brain-writeback', runId, model: 'claude-sonnet-4-6', extraEnv: { PATH: `${join(workDir, 'bin')}:${process.env.PATH || ''}`, }, }); logCost('/office-hours (BRAIN WRITEBACK)', result); recordE2E( evalCollector, '/office-hours-brain-writeback', 'Office Hours Brain Writeback E2E', result, { passed: ['success', 'error_max_turns'].includes(result.exitReason), }, ); expect(['success', 'error_max_turns']).toContain(result.exitReason); // The headline assertion: agent actually called gbrain put on the // expected slug. if (!existsSync(callsLogPath)) { throw new Error( `No gbrain calls log at ${callsLogPath}. ` + `Agent likely did NOT invoke gbrain at all. ` + `Check that office-hours/SKILL.md in the workdir contains the gbrain put block.`, ); } const callsLog = readFileSync(callsLogPath, 'utf-8'); console.log('--- gbrain calls log ---'); console.log(callsLog); console.log('--- end calls log ---'); expect(callsLog).toContain('gbrain put'); // Agent obedience: the slug should contain 'pixel-fund' somewhere // (preferably under the office-hours/ prefix). The strict slug // SHAPE (office-hours/) is already pinned by the resolver // unit test (test/resolvers-gbrain-save-results.test.ts); this // E2E proves the agent actually invokes gbrain put with the // payload, not the resolver's literal output shape. expect(callsLog).toMatch(/gbrain put .*pixel-fund/); // Payload file exists. Agent may write to office-hours/pixel-fund.md // (resolver-faithful) OR pixel-fund.md (agent dropped prefix); both // are acceptable here because the YAML frontmatter is the real // contract test. Search the payload tree for any *.md file that // contains 'pixel-fund' in the path. const findPayload = (dir: string): string | null => { if (!existsSync(dir)) return null; for (const entry of readdirSync(dir, { withFileTypes: true })) { const full = join(dir, entry.name); if (entry.isDirectory()) { const nested = findPayload(full); if (nested) return nested; } else if (entry.name.includes('pixel-fund')) { return full; } } return null; }; const payloadPath = findPayload(payloadDir); if (!payloadPath) { throw new Error( `Agent called gbrain put but no payload file with 'pixel-fund' ` + `in name was written to ${payloadDir}. Check the fake gbrain ` + `--content parser for argv quoting issues.`, ); } const payload = readFileSync(payloadPath, 'utf-8'); expect(payload).toMatch(/^---\s*\n/); expect(payload).toContain('title:'); expect(payload).toContain('tags:'); expect(payload.length).toBeGreaterThan(200); // Entity stubs: agents are inconsistent about whether they use // 'entities/' (resolver doc) or 'entity/' (singular). // We accept either — the test asserts that AT LEAST ONE entity // stub call exists, not the exact slug shape. const entityCallMatches = callsLog.match(/gbrain put entit(?:y|ies)\//g) || []; if (entityCallMatches.length === 0) { console.warn( 'No entity stub calls in gbrain calls log. Resolver instructs ' + 'entity extraction but it is best-effort.', ); } else { console.log( `Entity stub calls observed: ${entityCallMatches.length}`, ); } }, 420_000, ); }, );