mirror of
https://github.com/garrytan/gstack.git
synced 2026-06-19 16:20:09 +02:00
Merge remote-tracking branch 'origin/main' into garrytan/cairo-v3
# Conflicts: # CHANGELOG.md # VERSION # package.json
This commit is contained in:
@@ -0,0 +1,184 @@
|
||||
/**
|
||||
* Tests the voyage-code-3 default contract in setup-gbrain's PGLite init
|
||||
* sequences. The contract lives in the skill TEMPLATE (.tmpl), not in a TS
|
||||
* helper — the skill follows AI-readable instructions.
|
||||
*
|
||||
* Contract (asserted here):
|
||||
* 1. When VOYAGE_API_KEY is set, gstack's PGLite init passes
|
||||
* --embedding-model voyage:voyage-code-3 --embedding-dimensions 1024
|
||||
* 2. When VOYAGE_API_KEY is unset, those flags are omitted (gbrain's
|
||||
* auto-selected provider chain takes over)
|
||||
*
|
||||
* Why a separate file from gbrain-init-rollback.test.ts: that file owns the
|
||||
* .bak-rollback contract (Step 1.5 / 4.5 plan D7). This file owns the
|
||||
* embedding-model selection contract. Both extract bash from the skill
|
||||
* template and execute it against a fake gbrain.
|
||||
*
|
||||
* The fake gbrain records argv to a sentinel file so the test can assert
|
||||
* exact flags. No Voyage API calls are made.
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "bun:test";
|
||||
import {
|
||||
mkdtempSync,
|
||||
mkdirSync,
|
||||
writeFileSync,
|
||||
readFileSync,
|
||||
existsSync,
|
||||
rmSync,
|
||||
chmodSync,
|
||||
} from "fs";
|
||||
import { tmpdir } from "os";
|
||||
import { join } from "path";
|
||||
import { spawnSync } from "child_process";
|
||||
|
||||
interface FakeEnv {
|
||||
tmp: string;
|
||||
home: string;
|
||||
bindir: string;
|
||||
argvLog: string;
|
||||
cleanup: () => void;
|
||||
}
|
||||
|
||||
function makeFakeEnv(): FakeEnv {
|
||||
const tmp = mkdtempSync(join(tmpdir(), "gbrain-voyage-init-"));
|
||||
const home = join(tmp, "home");
|
||||
const bindir = join(tmp, "bin");
|
||||
const argvLog = join(tmp, "gbrain-argv.log");
|
||||
mkdirSync(join(home, ".gbrain"), { recursive: true });
|
||||
mkdirSync(bindir, { recursive: true });
|
||||
|
||||
// Fake gbrain logs every argv invocation to argvLog (one line per call),
|
||||
// succeeds on init (writes a sentinel pglite config), and returns canned
|
||||
// output for --version. Nothing else is needed for the shape test.
|
||||
const fake = `#!/bin/sh
|
||||
echo "$@" >> "${argvLog}"
|
||||
case "$1" in
|
||||
--version)
|
||||
echo "gbrain 0.37.1.0"
|
||||
exit 0
|
||||
;;
|
||||
init)
|
||||
cat > "${home}/.gbrain/config.json" <<JSON
|
||||
{"engine":"pglite","database_path":"${home}/.gbrain/brain.pglite"}
|
||||
JSON
|
||||
echo '{"status":"success","engine":"pglite","pages":0}'
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
exit 0
|
||||
`;
|
||||
writeFileSync(join(bindir, "gbrain"), fake);
|
||||
chmodSync(join(bindir, "gbrain"), 0o755);
|
||||
|
||||
return {
|
||||
tmp,
|
||||
home,
|
||||
bindir,
|
||||
argvLog,
|
||||
cleanup: () => rmSync(tmp, { recursive: true, force: true }),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Verbatim reimplementation of the skill template's voyage-code-3
|
||||
* conditional. The template (setup-gbrain/SKILL.md.tmpl Path 3, Step 1.5
|
||||
* inside the rollback wrapper, Step 4.5 Path 4 Yes branch) instructs the
|
||||
* model to execute this bash; we execute the same bash here and assert the
|
||||
* argv passed to gbrain matches the contract.
|
||||
*
|
||||
* If the template changes the flag set or the env-var name, this test
|
||||
* should fail until the shell here is updated too — by design.
|
||||
*/
|
||||
function runInitWithVoyageGate(env: FakeEnv, voyageKey: string | undefined): string[] {
|
||||
const script = `
|
||||
set -u
|
||||
GBRAIN_EMBED_FLAGS=""
|
||||
if [ -n "\${VOYAGE_API_KEY:-}" ]; then
|
||||
GBRAIN_EMBED_FLAGS="--embedding-model voyage:voyage-code-3 --embedding-dimensions 1024"
|
||||
fi
|
||||
gbrain init --pglite --json $GBRAIN_EMBED_FLAGS
|
||||
`;
|
||||
const baseEnv: Record<string, string> = {
|
||||
...process.env,
|
||||
HOME: env.home,
|
||||
PATH: `${env.bindir}:/usr/bin:/bin`,
|
||||
};
|
||||
if (voyageKey === undefined) {
|
||||
delete baseEnv.VOYAGE_API_KEY;
|
||||
} else {
|
||||
baseEnv.VOYAGE_API_KEY = voyageKey;
|
||||
}
|
||||
const result = spawnSync("bash", ["-c", script], {
|
||||
encoding: "utf-8",
|
||||
env: baseEnv,
|
||||
});
|
||||
if (result.status !== 0) {
|
||||
throw new Error(`init script exited ${result.status}: ${result.stderr}`);
|
||||
}
|
||||
return readFileSync(env.argvLog, "utf-8").trim().split("\n");
|
||||
}
|
||||
|
||||
describe("voyage-code-3 default for gstack-driven PGLite init", () => {
|
||||
it("passes voyage-code-3 flags when VOYAGE_API_KEY is set", () => {
|
||||
const env = makeFakeEnv();
|
||||
try {
|
||||
const calls = runInitWithVoyageGate(env, "vk_test_set");
|
||||
expect(calls.length).toBe(1);
|
||||
const argv = calls[0];
|
||||
expect(argv).toContain("init --pglite --json");
|
||||
expect(argv).toContain("--embedding-model voyage:voyage-code-3");
|
||||
expect(argv).toContain("--embedding-dimensions 1024");
|
||||
} finally {
|
||||
env.cleanup();
|
||||
}
|
||||
});
|
||||
|
||||
it("omits voyage flags when VOYAGE_API_KEY is unset", () => {
|
||||
const env = makeFakeEnv();
|
||||
try {
|
||||
const calls = runInitWithVoyageGate(env, undefined);
|
||||
expect(calls.length).toBe(1);
|
||||
const argv = calls[0];
|
||||
expect(argv).toContain("init --pglite --json");
|
||||
expect(argv).not.toContain("voyage");
|
||||
expect(argv).not.toContain("--embedding-model");
|
||||
expect(argv).not.toContain("--embedding-dimensions");
|
||||
} finally {
|
||||
env.cleanup();
|
||||
}
|
||||
});
|
||||
|
||||
it("treats empty-string VOYAGE_API_KEY the same as unset (no false positive)", () => {
|
||||
const env = makeFakeEnv();
|
||||
try {
|
||||
const calls = runInitWithVoyageGate(env, "");
|
||||
expect(calls.length).toBe(1);
|
||||
expect(calls[0]).not.toContain("voyage");
|
||||
} finally {
|
||||
env.cleanup();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("template alignment: the .tmpl actually contains the voyage gate", () => {
|
||||
// Belt-and-suspenders: if someone edits the template and drops the
|
||||
// VOYAGE_API_KEY conditional without updating the test above, this catches
|
||||
// it. The shell snippet under test must literally appear in the .tmpl.
|
||||
const TEMPLATE_PATH = join(import.meta.dir, "..", "setup-gbrain", "SKILL.md.tmpl");
|
||||
const tmpl = readFileSync(TEMPLATE_PATH, "utf-8");
|
||||
|
||||
it("setup-gbrain template gates the embedding-model flag on VOYAGE_API_KEY", () => {
|
||||
// Should appear at least once (currently 3 init sites use the same gate).
|
||||
expect(tmpl).toContain('if [ -n "${VOYAGE_API_KEY:-}" ]; then');
|
||||
expect(tmpl).toContain("--embedding-model voyage:voyage-code-3");
|
||||
expect(tmpl).toContain("--embedding-dimensions 1024");
|
||||
});
|
||||
|
||||
it("setup-gbrain template uses the conditional gate at all 3 PGLite init sites", () => {
|
||||
// Count the gate occurrences. If a future edit adds/removes a PGLite
|
||||
// init site, update this expectation deliberately.
|
||||
const matches = tmpl.match(/if \[ -n "\$\{VOYAGE_API_KEY:-\}" \]; then/g);
|
||||
expect(matches?.length).toBe(3);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,328 @@
|
||||
/**
|
||||
* Real integration: gbrain PGLite + voyage-code-3 end-to-end.
|
||||
*
|
||||
* Inits a sandboxed PGLite engine with voyage-code-3 embeddings, registers a
|
||||
* tiny code fixture as a source, syncs it (which triggers Voyage embedding
|
||||
* generation), and queries it back. The whole point is to catch the failure
|
||||
* modes that hit us in real life:
|
||||
*
|
||||
* - dimension mismatch between the configured embedding column and the
|
||||
* model's actual output dim (the 1280-vs-1536 trap that gbrain doctor
|
||||
* surfaces but `gbrain init` silently sets up)
|
||||
* - voyage-code-3 unavailable via gbrain's openai-compat adapter
|
||||
* - sync completes but embedding generation silently fails (0 chunks)
|
||||
*
|
||||
* We intentionally do NOT call `gbrain query` here — it produces correct
|
||||
* output but doesn't exit cleanly on a fresh PGLite (~2 min hang after
|
||||
* results print). The smoking-gun assertion for "embeddings worked" is the
|
||||
* "N pages embedded" line from sync output: if that's >= 1, voyage-code-3
|
||||
* returned 1024-dim vectors and gbrain persisted them. Symbol-aware
|
||||
* functionality is covered separately by the code-def test.
|
||||
*
|
||||
* Skips when:
|
||||
* - `gbrain` is not on PATH (dev machine without it installed)
|
||||
* - VOYAGE_API_KEY is unset (the test makes real Voyage API calls)
|
||||
*
|
||||
* Cost: ~$0.001 per run. The fixture is 3 tiny files, ~500 tokens total.
|
||||
* Not gated on EVALS=1 because it's not an LLM eval — it's a deterministic
|
||||
* integration test of the embedding pipeline. Always runs when the env
|
||||
* supports it.
|
||||
*
|
||||
* Runtime: ~30-60s (gbrain init schema migrations + sync + Voyage round-trip).
|
||||
* Long enough that `bun test` runs it serially with a per-test 120s timeout.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from "bun:test";
|
||||
import {
|
||||
mkdtempSync,
|
||||
mkdirSync,
|
||||
writeFileSync,
|
||||
rmSync,
|
||||
existsSync,
|
||||
} from "fs";
|
||||
import { tmpdir } from "os";
|
||||
import { join } from "path";
|
||||
import { spawnSync } from "child_process";
|
||||
|
||||
const gbrainPath = spawnSync("which", ["gbrain"], { encoding: "utf-8" }).stdout.trim();
|
||||
const gbrainAvailable = gbrainPath.length > 0;
|
||||
const voyageKey = process.env.VOYAGE_API_KEY?.trim() ?? "";
|
||||
const voyageKeyPresent = voyageKey.length > 0;
|
||||
|
||||
const shouldRun = gbrainAvailable && voyageKeyPresent;
|
||||
const skipReason = !gbrainAvailable
|
||||
? "gbrain not on PATH"
|
||||
: !voyageKeyPresent
|
||||
? "VOYAGE_API_KEY not set (real Voyage API calls required)"
|
||||
: "";
|
||||
|
||||
if (!shouldRun) {
|
||||
console.log(`[gbrain-sync-voyage-code-3-integration] SKIP: ${skipReason}`);
|
||||
}
|
||||
|
||||
interface SandboxEnv {
|
||||
root: string;
|
||||
gbrainHome: string;
|
||||
fixtureDir: string;
|
||||
cleanup: () => void;
|
||||
}
|
||||
|
||||
function makeSandbox(): SandboxEnv {
|
||||
const root = mkdtempSync(join(tmpdir(), "gbrain-voyage-int-"));
|
||||
// GBRAIN_HOME points at the PARENT of .gbrain (per gbrain's configDir());
|
||||
// setting GBRAIN_HOME=/x means gbrain looks at /x/.gbrain/.
|
||||
const gbrainHome = root;
|
||||
const fixtureDir = join(root, "fixture-repo");
|
||||
mkdirSync(fixtureDir, { recursive: true });
|
||||
|
||||
// Tiny realistic fixture: three files exercising different file types so
|
||||
// gbrain's code stage has something to extract symbols + embeddings from.
|
||||
writeFileSync(
|
||||
join(fixtureDir, "math.ts"),
|
||||
`export function fibonacci(n: number): number {
|
||||
if (n <= 1) return n;
|
||||
return fibonacci(n - 1) + fibonacci(n - 2);
|
||||
}
|
||||
|
||||
export function isPrime(n: number): boolean {
|
||||
if (n < 2) return false;
|
||||
for (let i = 2; i * i <= n; i++) {
|
||||
if (n % i === 0) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
`,
|
||||
);
|
||||
writeFileSync(
|
||||
join(fixtureDir, "queue.ts"),
|
||||
`export class JobQueue<T> {
|
||||
private items: T[] = [];
|
||||
enqueue(item: T): void { this.items.push(item); }
|
||||
dequeue(): T | undefined { return this.items.shift(); }
|
||||
size(): number { return this.items.length; }
|
||||
}
|
||||
`,
|
||||
);
|
||||
writeFileSync(
|
||||
join(fixtureDir, "README.md"),
|
||||
`# Fixture repo
|
||||
|
||||
Sample code for testing the voyage-code-3 embedding pipeline.
|
||||
The math module exposes fibonacci and primality helpers.
|
||||
The queue module is a simple FIFO job queue.
|
||||
`,
|
||||
);
|
||||
|
||||
// Make it a git repo because gbrain's code-sync strategy expects one.
|
||||
const gitInit = spawnSync("git", ["init", "-q"], { cwd: fixtureDir, encoding: "utf-8" });
|
||||
if (gitInit.status !== 0) {
|
||||
throw new Error(`git init failed: ${gitInit.stderr}`);
|
||||
}
|
||||
spawnSync("git", ["config", "user.email", "test@example.invalid"], { cwd: fixtureDir });
|
||||
spawnSync("git", ["config", "user.name", "test"], { cwd: fixtureDir });
|
||||
spawnSync("git", ["add", "."], { cwd: fixtureDir });
|
||||
spawnSync("git", ["commit", "-q", "-m", "fixture"], { cwd: fixtureDir });
|
||||
|
||||
return {
|
||||
root,
|
||||
gbrainHome,
|
||||
fixtureDir,
|
||||
cleanup: () => rmSync(root, { recursive: true, force: true }),
|
||||
};
|
||||
}
|
||||
|
||||
function gbrainEnv(s: SandboxEnv): NodeJS.ProcessEnv {
|
||||
return {
|
||||
...process.env,
|
||||
GBRAIN_HOME: s.gbrainHome,
|
||||
VOYAGE_API_KEY: voyageKey,
|
||||
};
|
||||
}
|
||||
|
||||
function runGbrain(s: SandboxEnv, args: string[], opts: { timeout?: number } = {}) {
|
||||
// cwd MUST be the sandbox root, not the test's parent CWD. If gbrain runs
|
||||
// from inside the gstack worktree, it picks up the worktree's
|
||||
// `.gbrain-source` pin and tries to sync that source too — which won't
|
||||
// exist in the sandbox PGLite, and the resulting "not found" exits 1.
|
||||
return spawnSync("gbrain", args, {
|
||||
encoding: "utf-8",
|
||||
env: gbrainEnv(s),
|
||||
cwd: s.root,
|
||||
timeout: opts.timeout ?? 120_000,
|
||||
});
|
||||
}
|
||||
|
||||
describe.skipIf(!shouldRun)(
|
||||
"gbrain PGLite + voyage-code-3 end-to-end (real Voyage API)",
|
||||
() => {
|
||||
test(
|
||||
"init with voyage-code-3 produces a 1024-dim-aligned PGLite config",
|
||||
() => {
|
||||
const s = makeSandbox();
|
||||
try {
|
||||
const init = runGbrain(s, [
|
||||
"init",
|
||||
"--pglite",
|
||||
"--json",
|
||||
"--embedding-model",
|
||||
"voyage:voyage-code-3",
|
||||
"--embedding-dimensions",
|
||||
"1024",
|
||||
]);
|
||||
expect(init.status).toBe(0);
|
||||
// init prints JSON status line at the end; just sniff for success.
|
||||
const out = (init.stdout || "") + (init.stderr || "");
|
||||
expect(out).toContain('"status":"success"');
|
||||
expect(out).toContain('"engine":"pglite"');
|
||||
|
||||
// doctor must agree the column width matches the live probe dim.
|
||||
const doctor = runGbrain(s, ["doctor"]);
|
||||
const dout = (doctor.stdout || "") + (doctor.stderr || "");
|
||||
// Doctor exits non-zero on error rows; warnings are OK. The
|
||||
// critical assertion is no dimension mismatch.
|
||||
expect(dout).not.toContain("DB dimension mismatch");
|
||||
// Should explicitly mention voyage-code-3 as the live provider.
|
||||
expect(dout).toMatch(/voyage-code-3/);
|
||||
// Width consistency check should be green for 1024d.
|
||||
expect(dout).toMatch(/Schema width \(1024d\)/);
|
||||
} finally {
|
||||
s.cleanup();
|
||||
}
|
||||
},
|
||||
120_000,
|
||||
);
|
||||
|
||||
test(
|
||||
"sync --strategy code generates Voyage embeddings and registers pages + chunks",
|
||||
() => {
|
||||
const s = makeSandbox();
|
||||
try {
|
||||
// 1. init voyage-code-3 PGLite
|
||||
const init = runGbrain(s, [
|
||||
"init",
|
||||
"--pglite",
|
||||
"--json",
|
||||
"--embedding-model",
|
||||
"voyage:voyage-code-3",
|
||||
"--embedding-dimensions",
|
||||
"1024",
|
||||
]);
|
||||
expect(init.status).toBe(0);
|
||||
|
||||
// 2. register the fixture as a code source
|
||||
const add = runGbrain(s, [
|
||||
"sources",
|
||||
"add",
|
||||
"fixture-code",
|
||||
"--path",
|
||||
s.fixtureDir,
|
||||
]);
|
||||
expect(add.status).toBe(0);
|
||||
|
||||
// 3. sync with code strategy — this is where Voyage embeddings get
|
||||
// generated. Use --skip-failed so a single oversized file (which
|
||||
// can happen in real repos) doesn't block the assertion.
|
||||
const sync = runGbrain(
|
||||
s,
|
||||
[
|
||||
"sync",
|
||||
"--source",
|
||||
"fixture-code",
|
||||
"--strategy",
|
||||
"code",
|
||||
"--skip-failed",
|
||||
],
|
||||
{ timeout: 180_000 },
|
||||
);
|
||||
if (sync.status !== 0) {
|
||||
console.error(`[sync FAILED exit=${sync.status}]`);
|
||||
console.error(`STDOUT:\n${sync.stdout}`);
|
||||
console.error(`STDERR:\n${sync.stderr}`);
|
||||
}
|
||||
expect(sync.status).toBe(0);
|
||||
const sout = (sync.stdout || "") + (sync.stderr || "");
|
||||
// The fixture has 3 files; gbrain should import at least the 2 .ts
|
||||
// files (README.md may or may not be picked up by --strategy code
|
||||
// depending on gbrain's file-type heuristics).
|
||||
expect(sout).toMatch(/imported=[1-9]/);
|
||||
// The "pages embedded" line is the smoking gun: if it's 0,
|
||||
// embedding generation silently failed (voyage adapter broken,
|
||||
// dimension mismatch, etc). Anything > 0 means voyage-code-3
|
||||
// returned 1024-dim vectors and gbrain wrote them.
|
||||
expect(sout).toMatch(/[1-9]\d* pages embedded/);
|
||||
|
||||
// 4. verify the source has pages and chunks
|
||||
const list = runGbrain(s, ["sources", "list", "--json"]);
|
||||
expect(list.status).toBe(0);
|
||||
const sources = JSON.parse(list.stdout) as {
|
||||
sources: Array<{ id: string; page_count: number }>;
|
||||
};
|
||||
const fixture = sources.sources.find((x) => x.id === "fixture-code");
|
||||
expect(fixture).toBeDefined();
|
||||
expect(fixture!.page_count).toBeGreaterThanOrEqual(2);
|
||||
} finally {
|
||||
s.cleanup();
|
||||
}
|
||||
},
|
||||
300_000,
|
||||
);
|
||||
|
||||
test(
|
||||
"code-def finds symbols defined in the embedded fixture",
|
||||
() => {
|
||||
const s = makeSandbox();
|
||||
try {
|
||||
runGbrain(s, [
|
||||
"init",
|
||||
"--pglite",
|
||||
"--json",
|
||||
"--embedding-model",
|
||||
"voyage:voyage-code-3",
|
||||
"--embedding-dimensions",
|
||||
"1024",
|
||||
]);
|
||||
runGbrain(s, ["sources", "add", "fixture-code", "--path", s.fixtureDir]);
|
||||
runGbrain(
|
||||
s,
|
||||
["sync", "--source", "fixture-code", "--strategy", "code", "--skip-failed"],
|
||||
{ timeout: 180_000 },
|
||||
);
|
||||
|
||||
// code-def is the symbol-aware path. It doesn't strictly need
|
||||
// embeddings (symbols are extracted by tree-sitter), but the JSON
|
||||
// shape it returns is the contract gstack's CLAUDE.md guidance
|
||||
// points the agent at. Verify it works against our PGLite + Voyage
|
||||
// setup.
|
||||
const result = runGbrain(s, ["code-def", "fibonacci"]);
|
||||
expect(result.status).toBe(0);
|
||||
const parsed = JSON.parse(result.stdout) as {
|
||||
symbol: string;
|
||||
count: number;
|
||||
results: Array<{ file: string; symbol_type: string }>;
|
||||
};
|
||||
expect(parsed.symbol).toBe("fibonacci");
|
||||
expect(parsed.count).toBeGreaterThanOrEqual(1);
|
||||
expect(parsed.results[0].file).toContain("math.ts");
|
||||
} finally {
|
||||
s.cleanup();
|
||||
}
|
||||
},
|
||||
300_000,
|
||||
);
|
||||
},
|
||||
);
|
||||
|
||||
// Lightweight always-on guard: even without the integration test running, we
|
||||
// can still assert that the test file's `describe.skipIf` gate is correctly
|
||||
// formed. This catches a future edit that accidentally inverts the gate.
|
||||
test("integration test gate uses the correct skip predicate", () => {
|
||||
// shouldRun must be the boolean AND of the two pre-checks. If a refactor
|
||||
// makes it true when either piece is missing, the test below would attempt
|
||||
// real API calls without a key — undefined behavior.
|
||||
expect(shouldRun).toBe(gbrainAvailable && voyageKeyPresent);
|
||||
// When skipping, we logged a reason — basic sanity that the reason string
|
||||
// matches what shouldRun says.
|
||||
if (!shouldRun) {
|
||||
expect(skipReason.length).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
Reference in New Issue
Block a user