diff --git a/test/gbrain-init-voyage-code-3.test.ts b/test/gbrain-init-voyage-code-3.test.ts new file mode 100644 index 000000000..9eb84b198 --- /dev/null +++ b/test/gbrain-init-voyage-code-3.test.ts @@ -0,0 +1,184 @@ +/** + * Tests the voyage-code-3 default contract in setup-gbrain's PGLite init + * sequences. The contract lives in the skill TEMPLATE (.tmpl), not in a TS + * helper — the skill follows AI-readable instructions. + * + * Contract (asserted here): + * 1. When VOYAGE_API_KEY is set, gstack's PGLite init passes + * --embedding-model voyage:voyage-code-3 --embedding-dimensions 1024 + * 2. When VOYAGE_API_KEY is unset, those flags are omitted (gbrain's + * auto-selected provider chain takes over) + * + * Why a separate file from gbrain-init-rollback.test.ts: that file owns the + * .bak-rollback contract (Step 1.5 / 4.5 plan D7). This file owns the + * embedding-model selection contract. Both extract bash from the skill + * template and execute it against a fake gbrain. + * + * The fake gbrain records argv to a sentinel file so the test can assert + * exact flags. No Voyage API calls are made. + */ + +import { describe, it, expect } from "bun:test"; +import { + mkdtempSync, + mkdirSync, + writeFileSync, + readFileSync, + existsSync, + rmSync, + chmodSync, +} from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; +import { spawnSync } from "child_process"; + +interface FakeEnv { + tmp: string; + home: string; + bindir: string; + argvLog: string; + cleanup: () => void; +} + +function makeFakeEnv(): FakeEnv { + const tmp = mkdtempSync(join(tmpdir(), "gbrain-voyage-init-")); + const home = join(tmp, "home"); + const bindir = join(tmp, "bin"); + const argvLog = join(tmp, "gbrain-argv.log"); + mkdirSync(join(home, ".gbrain"), { recursive: true }); + mkdirSync(bindir, { recursive: true }); + + // Fake gbrain logs every argv invocation to argvLog (one line per call), + // succeeds on init (writes a sentinel pglite config), and returns canned + // output for --version. Nothing else is needed for the shape test. + const fake = `#!/bin/sh +echo "$@" >> "${argvLog}" +case "$1" in + --version) + echo "gbrain 0.37.1.0" + exit 0 + ;; + init) + cat > "${home}/.gbrain/config.json" < rmSync(tmp, { recursive: true, force: true }), + }; +} + +/** + * Verbatim reimplementation of the skill template's voyage-code-3 + * conditional. The template (setup-gbrain/SKILL.md.tmpl Path 3, Step 1.5 + * inside the rollback wrapper, Step 4.5 Path 4 Yes branch) instructs the + * model to execute this bash; we execute the same bash here and assert the + * argv passed to gbrain matches the contract. + * + * If the template changes the flag set or the env-var name, this test + * should fail until the shell here is updated too — by design. + */ +function runInitWithVoyageGate(env: FakeEnv, voyageKey: string | undefined): string[] { + const script = ` +set -u +GBRAIN_EMBED_FLAGS="" +if [ -n "\${VOYAGE_API_KEY:-}" ]; then + GBRAIN_EMBED_FLAGS="--embedding-model voyage:voyage-code-3 --embedding-dimensions 1024" +fi +gbrain init --pglite --json $GBRAIN_EMBED_FLAGS +`; + const baseEnv: Record = { + ...process.env, + HOME: env.home, + PATH: `${env.bindir}:/usr/bin:/bin`, + }; + if (voyageKey === undefined) { + delete baseEnv.VOYAGE_API_KEY; + } else { + baseEnv.VOYAGE_API_KEY = voyageKey; + } + const result = spawnSync("bash", ["-c", script], { + encoding: "utf-8", + env: baseEnv, + }); + if (result.status !== 0) { + throw new Error(`init script exited ${result.status}: ${result.stderr}`); + } + return readFileSync(env.argvLog, "utf-8").trim().split("\n"); +} + +describe("voyage-code-3 default for gstack-driven PGLite init", () => { + it("passes voyage-code-3 flags when VOYAGE_API_KEY is set", () => { + const env = makeFakeEnv(); + try { + const calls = runInitWithVoyageGate(env, "vk_test_set"); + expect(calls.length).toBe(1); + const argv = calls[0]; + expect(argv).toContain("init --pglite --json"); + expect(argv).toContain("--embedding-model voyage:voyage-code-3"); + expect(argv).toContain("--embedding-dimensions 1024"); + } finally { + env.cleanup(); + } + }); + + it("omits voyage flags when VOYAGE_API_KEY is unset", () => { + const env = makeFakeEnv(); + try { + const calls = runInitWithVoyageGate(env, undefined); + expect(calls.length).toBe(1); + const argv = calls[0]; + expect(argv).toContain("init --pglite --json"); + expect(argv).not.toContain("voyage"); + expect(argv).not.toContain("--embedding-model"); + expect(argv).not.toContain("--embedding-dimensions"); + } finally { + env.cleanup(); + } + }); + + it("treats empty-string VOYAGE_API_KEY the same as unset (no false positive)", () => { + const env = makeFakeEnv(); + try { + const calls = runInitWithVoyageGate(env, ""); + expect(calls.length).toBe(1); + expect(calls[0]).not.toContain("voyage"); + } finally { + env.cleanup(); + } + }); +}); + +describe("template alignment: the .tmpl actually contains the voyage gate", () => { + // Belt-and-suspenders: if someone edits the template and drops the + // VOYAGE_API_KEY conditional without updating the test above, this catches + // it. The shell snippet under test must literally appear in the .tmpl. + const TEMPLATE_PATH = join(import.meta.dir, "..", "setup-gbrain", "SKILL.md.tmpl"); + const tmpl = readFileSync(TEMPLATE_PATH, "utf-8"); + + it("setup-gbrain template gates the embedding-model flag on VOYAGE_API_KEY", () => { + // Should appear at least once (currently 3 init sites use the same gate). + expect(tmpl).toContain('if [ -n "${VOYAGE_API_KEY:-}" ]; then'); + expect(tmpl).toContain("--embedding-model voyage:voyage-code-3"); + expect(tmpl).toContain("--embedding-dimensions 1024"); + }); + + it("setup-gbrain template uses the conditional gate at all 3 PGLite init sites", () => { + // Count the gate occurrences. If a future edit adds/removes a PGLite + // init site, update this expectation deliberately. + const matches = tmpl.match(/if \[ -n "\$\{VOYAGE_API_KEY:-\}" \]; then/g); + expect(matches?.length).toBe(3); + }); +}); diff --git a/test/gbrain-sync-voyage-code-3-integration.test.ts b/test/gbrain-sync-voyage-code-3-integration.test.ts new file mode 100644 index 000000000..268e5ec5b --- /dev/null +++ b/test/gbrain-sync-voyage-code-3-integration.test.ts @@ -0,0 +1,328 @@ +/** + * Real integration: gbrain PGLite + voyage-code-3 end-to-end. + * + * Inits a sandboxed PGLite engine with voyage-code-3 embeddings, registers a + * tiny code fixture as a source, syncs it (which triggers Voyage embedding + * generation), and queries it back. The whole point is to catch the failure + * modes that hit us in real life: + * + * - dimension mismatch between the configured embedding column and the + * model's actual output dim (the 1280-vs-1536 trap that gbrain doctor + * surfaces but `gbrain init` silently sets up) + * - voyage-code-3 unavailable via gbrain's openai-compat adapter + * - sync completes but embedding generation silently fails (0 chunks) + * + * We intentionally do NOT call `gbrain query` here — it produces correct + * output but doesn't exit cleanly on a fresh PGLite (~2 min hang after + * results print). The smoking-gun assertion for "embeddings worked" is the + * "N pages embedded" line from sync output: if that's >= 1, voyage-code-3 + * returned 1024-dim vectors and gbrain persisted them. Symbol-aware + * functionality is covered separately by the code-def test. + * + * Skips when: + * - `gbrain` is not on PATH (dev machine without it installed) + * - VOYAGE_API_KEY is unset (the test makes real Voyage API calls) + * + * Cost: ~$0.001 per run. The fixture is 3 tiny files, ~500 tokens total. + * Not gated on EVALS=1 because it's not an LLM eval — it's a deterministic + * integration test of the embedding pipeline. Always runs when the env + * supports it. + * + * Runtime: ~30-60s (gbrain init schema migrations + sync + Voyage round-trip). + * Long enough that `bun test` runs it serially with a per-test 120s timeout. + */ + +import { describe, test, expect } from "bun:test"; +import { + mkdtempSync, + mkdirSync, + writeFileSync, + rmSync, + existsSync, +} from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; +import { spawnSync } from "child_process"; + +const gbrainPath = spawnSync("which", ["gbrain"], { encoding: "utf-8" }).stdout.trim(); +const gbrainAvailable = gbrainPath.length > 0; +const voyageKey = process.env.VOYAGE_API_KEY?.trim() ?? ""; +const voyageKeyPresent = voyageKey.length > 0; + +const shouldRun = gbrainAvailable && voyageKeyPresent; +const skipReason = !gbrainAvailable + ? "gbrain not on PATH" + : !voyageKeyPresent + ? "VOYAGE_API_KEY not set (real Voyage API calls required)" + : ""; + +if (!shouldRun) { + console.log(`[gbrain-sync-voyage-code-3-integration] SKIP: ${skipReason}`); +} + +interface SandboxEnv { + root: string; + gbrainHome: string; + fixtureDir: string; + cleanup: () => void; +} + +function makeSandbox(): SandboxEnv { + const root = mkdtempSync(join(tmpdir(), "gbrain-voyage-int-")); + // GBRAIN_HOME points at the PARENT of .gbrain (per gbrain's configDir()); + // setting GBRAIN_HOME=/x means gbrain looks at /x/.gbrain/. + const gbrainHome = root; + const fixtureDir = join(root, "fixture-repo"); + mkdirSync(fixtureDir, { recursive: true }); + + // Tiny realistic fixture: three files exercising different file types so + // gbrain's code stage has something to extract symbols + embeddings from. + writeFileSync( + join(fixtureDir, "math.ts"), + `export function fibonacci(n: number): number { + if (n <= 1) return n; + return fibonacci(n - 1) + fibonacci(n - 2); +} + +export function isPrime(n: number): boolean { + if (n < 2) return false; + for (let i = 2; i * i <= n; i++) { + if (n % i === 0) return false; + } + return true; +} +`, + ); + writeFileSync( + join(fixtureDir, "queue.ts"), + `export class JobQueue { + private items: T[] = []; + enqueue(item: T): void { this.items.push(item); } + dequeue(): T | undefined { return this.items.shift(); } + size(): number { return this.items.length; } +} +`, + ); + writeFileSync( + join(fixtureDir, "README.md"), + `# Fixture repo + +Sample code for testing the voyage-code-3 embedding pipeline. +The math module exposes fibonacci and primality helpers. +The queue module is a simple FIFO job queue. +`, + ); + + // Make it a git repo because gbrain's code-sync strategy expects one. + const gitInit = spawnSync("git", ["init", "-q"], { cwd: fixtureDir, encoding: "utf-8" }); + if (gitInit.status !== 0) { + throw new Error(`git init failed: ${gitInit.stderr}`); + } + spawnSync("git", ["config", "user.email", "test@example.invalid"], { cwd: fixtureDir }); + spawnSync("git", ["config", "user.name", "test"], { cwd: fixtureDir }); + spawnSync("git", ["add", "."], { cwd: fixtureDir }); + spawnSync("git", ["commit", "-q", "-m", "fixture"], { cwd: fixtureDir }); + + return { + root, + gbrainHome, + fixtureDir, + cleanup: () => rmSync(root, { recursive: true, force: true }), + }; +} + +function gbrainEnv(s: SandboxEnv): NodeJS.ProcessEnv { + return { + ...process.env, + GBRAIN_HOME: s.gbrainHome, + VOYAGE_API_KEY: voyageKey, + }; +} + +function runGbrain(s: SandboxEnv, args: string[], opts: { timeout?: number } = {}) { + // cwd MUST be the sandbox root, not the test's parent CWD. If gbrain runs + // from inside the gstack worktree, it picks up the worktree's + // `.gbrain-source` pin and tries to sync that source too — which won't + // exist in the sandbox PGLite, and the resulting "not found" exits 1. + return spawnSync("gbrain", args, { + encoding: "utf-8", + env: gbrainEnv(s), + cwd: s.root, + timeout: opts.timeout ?? 120_000, + }); +} + +describe.skipIf(!shouldRun)( + "gbrain PGLite + voyage-code-3 end-to-end (real Voyage API)", + () => { + test( + "init with voyage-code-3 produces a 1024-dim-aligned PGLite config", + () => { + const s = makeSandbox(); + try { + const init = runGbrain(s, [ + "init", + "--pglite", + "--json", + "--embedding-model", + "voyage:voyage-code-3", + "--embedding-dimensions", + "1024", + ]); + expect(init.status).toBe(0); + // init prints JSON status line at the end; just sniff for success. + const out = (init.stdout || "") + (init.stderr || ""); + expect(out).toContain('"status":"success"'); + expect(out).toContain('"engine":"pglite"'); + + // doctor must agree the column width matches the live probe dim. + const doctor = runGbrain(s, ["doctor"]); + const dout = (doctor.stdout || "") + (doctor.stderr || ""); + // Doctor exits non-zero on error rows; warnings are OK. The + // critical assertion is no dimension mismatch. + expect(dout).not.toContain("DB dimension mismatch"); + // Should explicitly mention voyage-code-3 as the live provider. + expect(dout).toMatch(/voyage-code-3/); + // Width consistency check should be green for 1024d. + expect(dout).toMatch(/Schema width \(1024d\)/); + } finally { + s.cleanup(); + } + }, + 120_000, + ); + + test( + "sync --strategy code generates Voyage embeddings and registers pages + chunks", + () => { + const s = makeSandbox(); + try { + // 1. init voyage-code-3 PGLite + const init = runGbrain(s, [ + "init", + "--pglite", + "--json", + "--embedding-model", + "voyage:voyage-code-3", + "--embedding-dimensions", + "1024", + ]); + expect(init.status).toBe(0); + + // 2. register the fixture as a code source + const add = runGbrain(s, [ + "sources", + "add", + "fixture-code", + "--path", + s.fixtureDir, + ]); + expect(add.status).toBe(0); + + // 3. sync with code strategy — this is where Voyage embeddings get + // generated. Use --skip-failed so a single oversized file (which + // can happen in real repos) doesn't block the assertion. + const sync = runGbrain( + s, + [ + "sync", + "--source", + "fixture-code", + "--strategy", + "code", + "--skip-failed", + ], + { timeout: 180_000 }, + ); + if (sync.status !== 0) { + console.error(`[sync FAILED exit=${sync.status}]`); + console.error(`STDOUT:\n${sync.stdout}`); + console.error(`STDERR:\n${sync.stderr}`); + } + expect(sync.status).toBe(0); + const sout = (sync.stdout || "") + (sync.stderr || ""); + // The fixture has 3 files; gbrain should import at least the 2 .ts + // files (README.md may or may not be picked up by --strategy code + // depending on gbrain's file-type heuristics). + expect(sout).toMatch(/imported=[1-9]/); + // The "pages embedded" line is the smoking gun: if it's 0, + // embedding generation silently failed (voyage adapter broken, + // dimension mismatch, etc). Anything > 0 means voyage-code-3 + // returned 1024-dim vectors and gbrain wrote them. + expect(sout).toMatch(/[1-9]\d* pages embedded/); + + // 4. verify the source has pages and chunks + const list = runGbrain(s, ["sources", "list", "--json"]); + expect(list.status).toBe(0); + const sources = JSON.parse(list.stdout) as { + sources: Array<{ id: string; page_count: number }>; + }; + const fixture = sources.sources.find((x) => x.id === "fixture-code"); + expect(fixture).toBeDefined(); + expect(fixture!.page_count).toBeGreaterThanOrEqual(2); + } finally { + s.cleanup(); + } + }, + 300_000, + ); + + test( + "code-def finds symbols defined in the embedded fixture", + () => { + const s = makeSandbox(); + try { + runGbrain(s, [ + "init", + "--pglite", + "--json", + "--embedding-model", + "voyage:voyage-code-3", + "--embedding-dimensions", + "1024", + ]); + runGbrain(s, ["sources", "add", "fixture-code", "--path", s.fixtureDir]); + runGbrain( + s, + ["sync", "--source", "fixture-code", "--strategy", "code", "--skip-failed"], + { timeout: 180_000 }, + ); + + // code-def is the symbol-aware path. It doesn't strictly need + // embeddings (symbols are extracted by tree-sitter), but the JSON + // shape it returns is the contract gstack's CLAUDE.md guidance + // points the agent at. Verify it works against our PGLite + Voyage + // setup. + const result = runGbrain(s, ["code-def", "fibonacci"]); + expect(result.status).toBe(0); + const parsed = JSON.parse(result.stdout) as { + symbol: string; + count: number; + results: Array<{ file: string; symbol_type: string }>; + }; + expect(parsed.symbol).toBe("fibonacci"); + expect(parsed.count).toBeGreaterThanOrEqual(1); + expect(parsed.results[0].file).toContain("math.ts"); + } finally { + s.cleanup(); + } + }, + 300_000, + ); + }, +); + +// Lightweight always-on guard: even without the integration test running, we +// can still assert that the test file's `describe.skipIf` gate is correctly +// formed. This catches a future edit that accidentally inverts the gate. +test("integration test gate uses the correct skip predicate", () => { + // shouldRun must be the boolean AND of the two pre-checks. If a refactor + // makes it true when either piece is missing, the test below would attempt + // real API calls without a key — undefined behavior. + expect(shouldRun).toBe(gbrainAvailable && voyageKeyPresent); + // When skipping, we logged a reason — basic sanity that the reason string + // matches what shouldRun says. + if (!shouldRun) { + expect(skipReason.length).toBeGreaterThan(0); + } +});