mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 03:35:09 +02:00
Merge remote-tracking branch 'origin/main' into garrytan/pty-plan-mode-e2e
# Conflicts: # CHANGELOG.md # VERSION # package.json
This commit is contained in:
@@ -0,0 +1,440 @@
|
||||
/**
|
||||
* gstack-gbrain-source-wireup — unit tests with mocked gbrain CLI.
|
||||
*
|
||||
* The helper registers the gstack brain repo as a gbrain federated source
|
||||
* via `git worktree`, runs an initial sync, and exposes --uninstall + --probe.
|
||||
*
|
||||
* Strategy: put a fake `gbrain` binary on PATH that records every call into
|
||||
* a log file and reads/writes its "registered sources" state from a JSON
|
||||
* file in the test's tmp dir. The helper sees a consistent gbrain-CLI surface
|
||||
* but no real database, no real gbrain.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BIN_DIR = path.join(ROOT, 'bin');
|
||||
const WIREUP_BIN = path.join(BIN_DIR, 'gstack-gbrain-source-wireup');
|
||||
|
||||
let tmpHome: string;
|
||||
let gstackHome: string;
|
||||
let worktreeDir: string;
|
||||
let fakeBinDir: string;
|
||||
let gbrainCallLog: string;
|
||||
let gbrainStateFile: string;
|
||||
|
||||
function makeFakeGbrain(opts: {
|
||||
version?: string | null; // null = "binary missing" (don't write the file)
|
||||
syncFails?: boolean;
|
||||
}) {
|
||||
const version = opts.version ?? '0.18.2';
|
||||
if (version === null) return; // simulate missing binary by NOT writing one
|
||||
const syncFails = opts.syncFails ?? false;
|
||||
|
||||
// Stub gbrain reads/writes state from a JSON file. Fields:
|
||||
// sources: [{id, local_path, federated}]
|
||||
fs.writeFileSync(gbrainStateFile, JSON.stringify({ sources: [] }, null, 2));
|
||||
|
||||
const script = `#!/bin/bash
|
||||
LOG="${gbrainCallLog}"
|
||||
STATE="${gbrainStateFile}"
|
||||
# Record the call AND any GBRAIN_DATABASE_URL that the parent passed via env.
|
||||
# Format: "gbrain <args> [GBRAIN_DATABASE_URL=<url>]" so tests can assert
|
||||
# the wireup helper exported the locked URL into our env.
|
||||
LINE="gbrain $@"
|
||||
[ -n "\${GBRAIN_DATABASE_URL:-}" ] && LINE="\$LINE [GBRAIN_DATABASE_URL=\$GBRAIN_DATABASE_URL]"
|
||||
echo "\$LINE" >> "$LOG"
|
||||
|
||||
# --version
|
||||
if [ "$1" = "--version" ]; then
|
||||
echo "gbrain ${version}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# sources list --json → emits state
|
||||
if [ "$1" = "sources" ] && [ "$2" = "list" ]; then
|
||||
cat "$STATE"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# sources add <id> --path <p> --federated → adds entry
|
||||
if [ "$1" = "sources" ] && [ "$2" = "add" ]; then
|
||||
shift 2
|
||||
ID="$1"; shift
|
||||
PATH_VAL=""
|
||||
FED="false"
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--path) PATH_VAL="$2"; shift 2 ;;
|
||||
--federated) FED="true"; shift ;;
|
||||
*) shift ;;
|
||||
esac
|
||||
done
|
||||
python3 -c "
|
||||
import json, sys
|
||||
state = json.load(open('$STATE'))
|
||||
state['sources'].append({'id': '$ID', 'local_path': '$PATH_VAL', 'federated': '$FED' == 'true'})
|
||||
json.dump(state, open('$STATE','w'), indent=2)
|
||||
" || exit 1
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# sources remove <id> --yes → drops entry
|
||||
if [ "$1" = "sources" ] && [ "$2" = "remove" ]; then
|
||||
shift 2
|
||||
ID="$1"
|
||||
python3 -c "
|
||||
import json
|
||||
state = json.load(open('$STATE'))
|
||||
state['sources'] = [s for s in state['sources'] if s['id'] != '$ID']
|
||||
json.dump(state, open('$STATE','w'), indent=2)
|
||||
"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# sync --repo <p> → records, optionally fails
|
||||
if [ "$1" = "sync" ]; then
|
||||
${syncFails ? 'echo "sync failed: connection error" >&2; exit 1' : 'echo "1 page imported"; exit 0'}
|
||||
fi
|
||||
|
||||
echo "fake gbrain: unhandled subcommand: $@" >&2
|
||||
exit 99
|
||||
`;
|
||||
const gbrainPath = path.join(fakeBinDir, 'gbrain');
|
||||
fs.writeFileSync(gbrainPath, script, { mode: 0o755 });
|
||||
}
|
||||
|
||||
function run(
|
||||
argv: string[],
|
||||
opts: { env?: Record<string, string> } = {}
|
||||
) {
|
||||
const env = {
|
||||
PATH: `${fakeBinDir}:${process.env.PATH || '/usr/bin:/bin:/opt/homebrew/bin'}`,
|
||||
HOME: tmpHome,
|
||||
GSTACK_HOME: gstackHome,
|
||||
GSTACK_BRAIN_WORKTREE: worktreeDir,
|
||||
GSTACK_BRAIN_NO_SYNC: '0',
|
||||
...(opts.env || {}),
|
||||
};
|
||||
return spawnSync(WIREUP_BIN, argv, {
|
||||
env,
|
||||
encoding: 'utf-8',
|
||||
cwd: ROOT,
|
||||
});
|
||||
}
|
||||
|
||||
function readState(): { sources: Array<{ id: string; local_path: string; federated: boolean }> } {
|
||||
if (!fs.existsSync(gbrainStateFile)) return { sources: [] };
|
||||
return JSON.parse(fs.readFileSync(gbrainStateFile, 'utf-8'));
|
||||
}
|
||||
|
||||
function gbrainCalls(): string[] {
|
||||
if (!fs.existsSync(gbrainCallLog)) return [];
|
||||
return fs.readFileSync(gbrainCallLog, 'utf-8')
|
||||
.split('\n')
|
||||
.filter((l) => l.trim());
|
||||
}
|
||||
|
||||
function setupGstackRepo(remoteUrl: string) {
|
||||
// Real git repo at gstackHome with at least one commit + an origin remote.
|
||||
fs.mkdirSync(gstackHome, { recursive: true });
|
||||
spawnSync('git', ['-C', gstackHome, 'init', '-q', '-b', 'main'], { stdio: 'pipe' });
|
||||
spawnSync('git', ['-C', gstackHome, 'config', 'user.email', 'test@example.com'], { stdio: 'pipe' });
|
||||
spawnSync('git', ['-C', gstackHome, 'config', 'user.name', 'test'], { stdio: 'pipe' });
|
||||
fs.writeFileSync(path.join(gstackHome, '.brain-allowlist'), '# allowlist\n');
|
||||
spawnSync('git', ['-C', gstackHome, 'add', '.'], { stdio: 'pipe' });
|
||||
spawnSync('git', ['-C', gstackHome, 'commit', '-q', '-m', 'init'], { stdio: 'pipe' });
|
||||
spawnSync('git', ['-C', gstackHome, 'remote', 'add', 'origin', remoteUrl], { stdio: 'pipe' });
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
tmpHome = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-wireup-test-'));
|
||||
gstackHome = path.join(tmpHome, '.gstack');
|
||||
worktreeDir = path.join(tmpHome, '.gstack-brain-worktree');
|
||||
fakeBinDir = path.join(tmpHome, 'fake-bin');
|
||||
fs.mkdirSync(fakeBinDir, { recursive: true });
|
||||
gbrainCallLog = path.join(tmpHome, 'gbrain-calls.log');
|
||||
gbrainStateFile = path.join(tmpHome, 'gbrain-state.json');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
try {
|
||||
fs.rmSync(tmpHome, { recursive: true, force: true });
|
||||
} catch {}
|
||||
});
|
||||
|
||||
describe('gstack-gbrain-source-wireup — wireup mode', () => {
|
||||
test('fresh state: registers source + creates worktree + syncs', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
makeFakeGbrain({});
|
||||
const r = run([], { env: { GSTACK_BRAIN_NO_SYNC: '1' } });
|
||||
expect(r.status).toBe(0);
|
||||
expect(fs.existsSync(worktreeDir)).toBe(true);
|
||||
const state = readState();
|
||||
expect(state.sources).toHaveLength(1);
|
||||
expect(state.sources[0].id).toBe('gstack-brain-user');
|
||||
expect(state.sources[0].local_path).toBe(worktreeDir);
|
||||
expect(state.sources[0].federated).toBe(true);
|
||||
});
|
||||
|
||||
test('idempotent re-run after success: no new sources add call', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
makeFakeGbrain({});
|
||||
run([], { env: { GSTACK_BRAIN_NO_SYNC: '1' } });
|
||||
const callsAfterFirst = gbrainCalls().filter((c) => c.startsWith('gbrain sources add')).length;
|
||||
expect(callsAfterFirst).toBe(1);
|
||||
run([], { env: { GSTACK_BRAIN_NO_SYNC: '1' } });
|
||||
const callsAfterSecond = gbrainCalls().filter((c) => c.startsWith('gbrain sources add')).length;
|
||||
expect(callsAfterSecond).toBe(1); // no new add
|
||||
});
|
||||
|
||||
test('drift recovery: existing source with different path triggers remove + add', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
makeFakeGbrain({});
|
||||
// Pre-seed the fake gbrain state with a source at the wrong path
|
||||
fs.writeFileSync(
|
||||
gbrainStateFile,
|
||||
JSON.stringify({
|
||||
sources: [{ id: 'gstack-brain-user', local_path: '/old/stale/path', federated: true }],
|
||||
})
|
||||
);
|
||||
const r = run([], { env: { GSTACK_BRAIN_NO_SYNC: '1' } });
|
||||
expect(r.status).toBe(0);
|
||||
const calls = gbrainCalls();
|
||||
expect(calls.some((c) => c.startsWith('gbrain sources remove gstack-brain-user'))).toBe(true);
|
||||
expect(calls.some((c) => c.includes(`gbrain sources add gstack-brain-user --path ${worktreeDir}`))).toBe(true);
|
||||
const state = readState();
|
||||
expect(state.sources[0].local_path).toBe(worktreeDir);
|
||||
});
|
||||
|
||||
test('--strict + gbrain too old: exits 2', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
makeFakeGbrain({ version: '0.17.0' });
|
||||
const r = run(['--strict']);
|
||||
expect(r.status).toBe(2);
|
||||
expect(r.stderr).toContain('< 0.18.0');
|
||||
});
|
||||
|
||||
test('non-strict + gbrain too old: warn + exit 0', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
makeFakeGbrain({ version: '0.17.0' });
|
||||
const r = run([]);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stderr).toContain('benign skip');
|
||||
});
|
||||
|
||||
test('--strict + gbrain missing on PATH: exits 2', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
// Don't make a fake gbrain — fakeBinDir is empty. Keep system dirs on PATH
|
||||
// so basic commands (git, awk, sed, etc.) work; only `gbrain` is absent.
|
||||
const r = run(['--strict'], {
|
||||
env: { PATH: `${fakeBinDir}:/usr/bin:/bin:/opt/homebrew/bin` },
|
||||
});
|
||||
expect(r.status).toBe(2);
|
||||
});
|
||||
|
||||
test('source-id derived from origin URL', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-alice.git');
|
||||
makeFakeGbrain({});
|
||||
const r = run([], { env: { GSTACK_BRAIN_NO_SYNC: '1' } });
|
||||
expect(r.status).toBe(0);
|
||||
expect(readState().sources[0].id).toBe('gstack-brain-alice');
|
||||
});
|
||||
|
||||
test('source-id fallback to ~/.gstack-brain-remote.txt when .git is gone', () => {
|
||||
// No git repo at gstackHome; just the remote-file
|
||||
fs.mkdirSync(tmpHome, { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(tmpHome, '.gstack-brain-remote.txt'),
|
||||
'git@github.com:user/gstack-brain-bob.git\n'
|
||||
);
|
||||
makeFakeGbrain({});
|
||||
// No --strict: helper should benign-skip because .gstack/.git is missing
|
||||
const r = run([]);
|
||||
// ensure_worktree returns 2 → benign skip, exit 0
|
||||
expect(r.status).toBe(0);
|
||||
});
|
||||
|
||||
test('source-id from --source-id flag overrides everything', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-different.git');
|
||||
makeFakeGbrain({});
|
||||
run(['--source-id', 'custom-id'], { env: { GSTACK_BRAIN_NO_SYNC: '1' } });
|
||||
const state = readState();
|
||||
expect(state.sources[0].id).toBe('custom-id');
|
||||
});
|
||||
|
||||
test('--probe: read-only, prints state without mutating', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
makeFakeGbrain({});
|
||||
const r = run(['--probe']);
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stdout).toContain('source_id=gstack-brain-user');
|
||||
expect(r.stdout).toContain('worktree=');
|
||||
expect(r.stdout).toContain('gbrain=ok');
|
||||
expect(r.stdout).toContain('source_status=absent');
|
||||
// Probe should NOT call sources add / sync
|
||||
const calls = gbrainCalls();
|
||||
expect(calls.some((c) => c.startsWith('gbrain sources add'))).toBe(false);
|
||||
expect(calls.some((c) => c.startsWith('gbrain sync'))).toBe(false);
|
||||
});
|
||||
|
||||
test('gbrain sync failure: exits 1 with stderr', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
makeFakeGbrain({ syncFails: true });
|
||||
const r = run([]);
|
||||
expect(r.status).toBe(1);
|
||||
expect(r.stderr).toContain('sync failed');
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-gbrain-source-wireup — --database-url lock (defends against external config rewrites)', () => {
|
||||
test('--database-url flag is exported as GBRAIN_DATABASE_URL to child gbrain calls', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
makeFakeGbrain({});
|
||||
const TARGET = 'postgresql://postgres.abc:pw@aws.pooler.supabase.com:5432/postgres';
|
||||
const r = run(['--database-url', TARGET], { env: { GSTACK_BRAIN_NO_SYNC: '1' } });
|
||||
expect(r.status).toBe(0);
|
||||
const calls = gbrainCalls();
|
||||
// every gbrain invocation should carry the locked URL
|
||||
const writingCalls = calls.filter((c) => c.includes('sources') || c.includes('sync'));
|
||||
expect(writingCalls.length).toBeGreaterThan(0);
|
||||
for (const c of writingCalls) {
|
||||
expect(c).toContain(`[GBRAIN_DATABASE_URL=${TARGET}]`);
|
||||
}
|
||||
});
|
||||
|
||||
test('falls back to ~/.gbrain/config.json database_url when no flag and no env', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
makeFakeGbrain({});
|
||||
const FILE_URL = 'postgresql://postgres.xyz:pw@aws.pooler.supabase.com:5432/postgres';
|
||||
fs.mkdirSync(path.join(tmpHome, '.gbrain'), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(tmpHome, '.gbrain', 'config.json'),
|
||||
JSON.stringify({ engine: 'postgres', database_url: FILE_URL })
|
||||
);
|
||||
// Important: don't pass GBRAIN_DATABASE_URL or DATABASE_URL in env; helper
|
||||
// should read from $HOME/.gbrain/config.json (HOME is tmpHome here).
|
||||
const r = run([], {
|
||||
env: {
|
||||
GSTACK_BRAIN_NO_SYNC: '1',
|
||||
GBRAIN_DATABASE_URL: '',
|
||||
DATABASE_URL: '',
|
||||
},
|
||||
});
|
||||
expect(r.status).toBe(0);
|
||||
const calls = gbrainCalls();
|
||||
const writingCalls = calls.filter((c) => c.includes('sources add'));
|
||||
expect(writingCalls.length).toBe(1);
|
||||
expect(writingCalls[0]).toContain(`[GBRAIN_DATABASE_URL=${FILE_URL}]`);
|
||||
});
|
||||
|
||||
test('--database-url overrides env GBRAIN_DATABASE_URL and config.json', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
makeFakeGbrain({});
|
||||
const FLAG_URL = 'postgresql://postgres.flag:pw@a.b:5432/postgres';
|
||||
const ENV_URL = 'postgresql://postgres.env:pw@x.y:5432/postgres';
|
||||
const FILE_URL = 'postgresql://postgres.file:pw@p.q:5432/postgres';
|
||||
fs.mkdirSync(path.join(tmpHome, '.gbrain'), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(tmpHome, '.gbrain', 'config.json'),
|
||||
JSON.stringify({ engine: 'postgres', database_url: FILE_URL })
|
||||
);
|
||||
const r = run(['--database-url', FLAG_URL], {
|
||||
env: {
|
||||
GSTACK_BRAIN_NO_SYNC: '1',
|
||||
GBRAIN_DATABASE_URL: ENV_URL,
|
||||
},
|
||||
});
|
||||
expect(r.status).toBe(0);
|
||||
const calls = gbrainCalls();
|
||||
const writingCalls = calls.filter((c) => c.includes('sources add'));
|
||||
expect(writingCalls.length).toBe(1);
|
||||
expect(writingCalls[0]).toContain(`[GBRAIN_DATABASE_URL=${FLAG_URL}]`);
|
||||
expect(writingCalls[0]).not.toContain(ENV_URL);
|
||||
expect(writingCalls[0]).not.toContain(FILE_URL);
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-gbrain-source-wireup — uninstall mode', () => {
|
||||
test('after wireup: removes source + worktree', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
makeFakeGbrain({});
|
||||
run([], { env: { GSTACK_BRAIN_NO_SYNC: '1' } });
|
||||
expect(readState().sources).toHaveLength(1);
|
||||
expect(fs.existsSync(worktreeDir)).toBe(true);
|
||||
const r = run(['--uninstall']);
|
||||
expect(r.status).toBe(0);
|
||||
expect(readState().sources).toHaveLength(0);
|
||||
expect(fs.existsSync(worktreeDir)).toBe(false);
|
||||
});
|
||||
|
||||
test('with no prior state: exits 3 (cannot derive id)', () => {
|
||||
// No git repo, no remote file. --uninstall must fail with code 3.
|
||||
fs.mkdirSync(tmpHome, { recursive: true });
|
||||
makeFakeGbrain({});
|
||||
const r = run(['--uninstall']);
|
||||
expect(r.status).toBe(3);
|
||||
});
|
||||
|
||||
test('--uninstall when gbrain is missing: exits 0 (best-effort), still removes worktree', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
// First wireup with fake gbrain to create the worktree + register source
|
||||
makeFakeGbrain({});
|
||||
run([], { env: { GSTACK_BRAIN_NO_SYNC: '1' } });
|
||||
expect(fs.existsSync(worktreeDir)).toBe(true);
|
||||
// Now remove the fake gbrain so uninstall sees gbrain missing
|
||||
fs.rmSync(path.join(fakeBinDir, 'gbrain'), { force: true });
|
||||
const r = run(['--uninstall'], {
|
||||
env: { PATH: `${fakeBinDir}:/usr/bin:/bin:/opt/homebrew/bin` },
|
||||
});
|
||||
expect(r.status).toBe(0); // best-effort, never fails on gbrain absence
|
||||
expect(fs.existsSync(worktreeDir)).toBe(false); // worktree still cleaned up
|
||||
});
|
||||
});
|
||||
|
||||
describe('gstack-gbrain-source-wireup — defensive paths', () => {
|
||||
test('--no-pull skips HEAD advance on existing worktree', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
makeFakeGbrain({});
|
||||
// First run to create worktree
|
||||
run([], { env: { GSTACK_BRAIN_NO_SYNC: '1' } });
|
||||
// Make a new commit on parent so worktree HEAD is "behind"
|
||||
fs.writeFileSync(path.join(gstackHome, 'newfile.md'), 'new');
|
||||
spawnSync('git', ['-C', gstackHome, 'add', '.'], { stdio: 'pipe' });
|
||||
spawnSync('git', ['-C', gstackHome, 'commit', '-q', '-m', 'second commit'], { stdio: 'pipe' });
|
||||
const parentHeadAfter = spawnSync('git', ['-C', gstackHome, 'rev-parse', 'HEAD'], {
|
||||
encoding: 'utf-8',
|
||||
}).stdout.trim();
|
||||
const worktreeHeadBefore = spawnSync('git', ['-C', worktreeDir, 'rev-parse', 'HEAD'], {
|
||||
encoding: 'utf-8',
|
||||
}).stdout.trim();
|
||||
expect(parentHeadAfter).not.toBe(worktreeHeadBefore); // sanity: parent advanced
|
||||
// --no-pull should leave worktree HEAD where it was
|
||||
const r = run(['--no-pull'], { env: { GSTACK_BRAIN_NO_SYNC: '1' } });
|
||||
expect(r.status).toBe(0);
|
||||
const worktreeHeadAfter = spawnSync('git', ['-C', worktreeDir, 'rev-parse', 'HEAD'], {
|
||||
encoding: 'utf-8',
|
||||
}).stdout.trim();
|
||||
expect(worktreeHeadAfter).toBe(worktreeHeadBefore);
|
||||
expect(worktreeHeadAfter).not.toBe(parentHeadAfter);
|
||||
});
|
||||
|
||||
test('stray non-git directory at worktree path is cleaned up + worktree created', () => {
|
||||
setupGstackRepo('git@github.com:user/gstack-brain-user.git');
|
||||
makeFakeGbrain({});
|
||||
// Plant a stray non-git directory at the worktree path
|
||||
fs.mkdirSync(worktreeDir, { recursive: true });
|
||||
fs.writeFileSync(path.join(worktreeDir, 'unrelated.txt'), 'not a worktree');
|
||||
expect(fs.existsSync(path.join(worktreeDir, 'unrelated.txt'))).toBe(true);
|
||||
expect(fs.existsSync(path.join(worktreeDir, '.git'))).toBe(false);
|
||||
// Helper should remove the stray dir + create a real worktree
|
||||
const r = run([], { env: { GSTACK_BRAIN_NO_SYNC: '1' } });
|
||||
expect(r.status).toBe(0);
|
||||
expect(fs.existsSync(path.join(worktreeDir, '.git'))).toBe(true); // real worktree
|
||||
expect(fs.existsSync(path.join(worktreeDir, 'unrelated.txt'))).toBe(false); // stray gone
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,151 @@
|
||||
/**
|
||||
* gstack-upgrade/migrations/v1.17.0.0.sh — migration script unit tests.
|
||||
*
|
||||
* The migration runs on /gstack-upgrade for users with brain-sync configured but
|
||||
* never wired up to gbrain. It has 4 skip conditions and one happy path.
|
||||
*
|
||||
* Strategy: stub gstack-config and gstack-gbrain-source-wireup binaries on PATH
|
||||
* so each skip condition can be triggered independently. The migration script
|
||||
* itself is plain bash — we exercise it directly.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const MIGRATION = path.join(ROOT, 'gstack-upgrade', 'migrations', 'v1.17.0.0.sh');
|
||||
|
||||
let tmpHome: string;
|
||||
let fakeBinDir: string;
|
||||
let stubLog: string;
|
||||
|
||||
function makeFakeStubs(opts: {
|
||||
configValue?: string; // value gstack-config returns for gbrain_sync_mode
|
||||
configMissing?: boolean; // gstack-config binary itself missing (test edge)
|
||||
wireupMissing?: boolean; // wireup binary missing
|
||||
wireupExitCode?: number;
|
||||
}) {
|
||||
const skillsBin = path.join(tmpHome, '.claude', 'skills', 'gstack', 'bin');
|
||||
fs.mkdirSync(skillsBin, { recursive: true });
|
||||
|
||||
if (!opts.configMissing) {
|
||||
const cfg = `#!/bin/bash
|
||||
echo "gstack-config $@" >> "${stubLog}"
|
||||
[ "$1" = "get" ] && [ "$2" = "gbrain_sync_mode" ] && echo "${opts.configValue ?? ''}"
|
||||
exit 0
|
||||
`;
|
||||
fs.writeFileSync(path.join(skillsBin, 'gstack-config'), cfg, { mode: 0o755 });
|
||||
}
|
||||
|
||||
if (!opts.wireupMissing) {
|
||||
const wu = `#!/bin/bash
|
||||
echo "gstack-gbrain-source-wireup $@" >> "${stubLog}"
|
||||
exit ${opts.wireupExitCode ?? 0}
|
||||
`;
|
||||
fs.writeFileSync(path.join(skillsBin, 'gstack-gbrain-source-wireup'), wu, { mode: 0o755 });
|
||||
}
|
||||
}
|
||||
|
||||
function makeBrainGitRepo() {
|
||||
const gstackHome = path.join(tmpHome, '.gstack');
|
||||
fs.mkdirSync(path.join(gstackHome, '.git'), { recursive: true });
|
||||
}
|
||||
|
||||
function run(opts: { env?: Record<string, string> } = {}) {
|
||||
const env = {
|
||||
PATH: '/usr/bin:/bin:/opt/homebrew/bin',
|
||||
HOME: tmpHome,
|
||||
...(opts.env || {}),
|
||||
};
|
||||
return spawnSync('bash', [MIGRATION], {
|
||||
env,
|
||||
encoding: 'utf-8',
|
||||
cwd: tmpHome,
|
||||
});
|
||||
}
|
||||
|
||||
function stubCalls(): string[] {
|
||||
if (!fs.existsSync(stubLog)) return [];
|
||||
return fs.readFileSync(stubLog, 'utf-8').split('\n').filter((l) => l.trim());
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
tmpHome = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-migration-test-'));
|
||||
fakeBinDir = path.join(tmpHome, 'fake-bin');
|
||||
fs.mkdirSync(fakeBinDir, { recursive: true });
|
||||
stubLog = path.join(tmpHome, 'stub-calls.log');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
try {
|
||||
fs.rmSync(tmpHome, { recursive: true, force: true });
|
||||
} catch {}
|
||||
});
|
||||
|
||||
describe('migrations/v1.17.0.0.sh', () => {
|
||||
test('HOME unset: prints message + exit 0 (defensive)', () => {
|
||||
// Override HOME to empty string. Bash's [ -z "${HOME:-}" ] guard should fire.
|
||||
const r = run({ env: { HOME: '' } });
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stderr).toContain('HOME is unset or empty');
|
||||
});
|
||||
|
||||
test('gbrain_sync_mode = off: exit 0 silently (no helper invoked)', () => {
|
||||
makeFakeStubs({ configValue: 'off' });
|
||||
const r = run();
|
||||
expect(r.status).toBe(0);
|
||||
// Helper should not have been invoked
|
||||
const calls = stubCalls();
|
||||
expect(calls.some((c) => c.startsWith('gstack-gbrain-source-wireup'))).toBe(false);
|
||||
});
|
||||
|
||||
test('gbrain_sync_mode unset/empty: exit 0 silently', () => {
|
||||
makeFakeStubs({ configValue: '' }); // empty string return
|
||||
const r = run();
|
||||
expect(r.status).toBe(0);
|
||||
const calls = stubCalls();
|
||||
expect(calls.some((c) => c.startsWith('gstack-gbrain-source-wireup'))).toBe(false);
|
||||
});
|
||||
|
||||
test('no ~/.gstack/.git: exit 0 silently (no brain-sync configured)', () => {
|
||||
makeFakeStubs({ configValue: 'full' });
|
||||
// Do NOT call makeBrainGitRepo() — no .gstack/.git directory exists
|
||||
const r = run();
|
||||
expect(r.status).toBe(0);
|
||||
const calls = stubCalls();
|
||||
expect(calls.some((c) => c.startsWith('gstack-gbrain-source-wireup'))).toBe(false);
|
||||
});
|
||||
|
||||
test('helper missing on PATH: prints warning, exit 0 (defensive)', () => {
|
||||
makeFakeStubs({ configValue: 'full', wireupMissing: true });
|
||||
makeBrainGitRepo();
|
||||
const r = run();
|
||||
expect(r.status).toBe(0);
|
||||
expect(r.stderr).toContain('missing or non-executable');
|
||||
});
|
||||
|
||||
test('happy path: invokes the helper', () => {
|
||||
makeFakeStubs({ configValue: 'full' });
|
||||
makeBrainGitRepo();
|
||||
const r = run();
|
||||
expect(r.status).toBe(0);
|
||||
const calls = stubCalls();
|
||||
expect(calls.some((c) => c.startsWith('gstack-gbrain-source-wireup'))).toBe(true);
|
||||
// Note: migration invokes WITHOUT --strict (benign-skip semantics for batch upgrade)
|
||||
const helperCall = calls.find((c) => c.startsWith('gstack-gbrain-source-wireup'));
|
||||
expect(helperCall).not.toContain('--strict');
|
||||
});
|
||||
|
||||
test('helper exits non-zero: migration prints retry hint, exit 0 (non-blocking)', () => {
|
||||
// The migration uses `|| { echo retry-hint; }` so non-zero helper still
|
||||
// exits 0 and prints a retry hint to stderr.
|
||||
makeFakeStubs({ configValue: 'full', wireupExitCode: 2 });
|
||||
makeBrainGitRepo();
|
||||
const r = run();
|
||||
expect(r.status).toBe(0); // migration is non-blocking
|
||||
expect(r.stderr).toContain('Wireup exited non-zero');
|
||||
});
|
||||
});
|
||||
@@ -251,6 +251,29 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||
// Multi-provider benchmark adapters — live API smoke against real claude/codex/gemini CLIs
|
||||
'benchmark-providers-live': ['bin/gstack-model-benchmark', 'test/helpers/providers/**', 'test/helpers/benchmark-runner.ts', 'test/helpers/pricing.ts'],
|
||||
|
||||
// Browser-skills Phase 2a — /scrape + /skillify (v1.19.0.0). Gate-tier
|
||||
// E2E covers the D1 (provenance guard), D3 (atomic write) contracts plus
|
||||
// the basic loop. Shared deps: both skill templates, the D3 helper, the
|
||||
// Phase 1 runtime, and the bundled hackernews-frontpage reference (the
|
||||
// match-path test relies on it).
|
||||
'scrape-match-path': [
|
||||
'scrape/**', 'browse/src/browser-skills.ts', 'browse/src/browser-skill-commands.ts',
|
||||
'browser-skills/hackernews-frontpage/**',
|
||||
],
|
||||
'scrape-prototype-path': [
|
||||
'scrape/**', 'browse/src/browser-skills.ts', 'browse/src/browser-skill-commands.ts',
|
||||
],
|
||||
'skillify-happy-path': [
|
||||
'skillify/**', 'scrape/**', 'browse/src/browser-skill-write.ts',
|
||||
'browse/src/browser-skills.ts', 'browse/src/browser-skill-commands.ts',
|
||||
],
|
||||
'skillify-provenance-refusal': [
|
||||
'skillify/**', 'browse/src/browser-skill-write.ts',
|
||||
],
|
||||
'skillify-approval-reject': [
|
||||
'skillify/**', 'scrape/**', 'browse/src/browser-skill-write.ts',
|
||||
],
|
||||
|
||||
// Skill routing — journey-stage tests (depend on ALL skill descriptions)
|
||||
'journey-ideation': ['*/SKILL.md.tmpl', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
|
||||
'journey-plan-eng': ['*/SKILL.md.tmpl', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
|
||||
@@ -496,6 +519,13 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||
// Multi-provider benchmark — periodic (requires external CLIs + auth, paid)
|
||||
'benchmark-providers-live': 'periodic',
|
||||
|
||||
// Browser-skills Phase 2a — gate (D1/D3 contracts must not silently break)
|
||||
'scrape-match-path': 'gate',
|
||||
'scrape-prototype-path': 'gate',
|
||||
'skillify-happy-path': 'gate',
|
||||
'skillify-provenance-refusal': 'gate',
|
||||
'skillify-approval-reject': 'gate',
|
||||
|
||||
// Skill routing — periodic (LLM routing is non-deterministic)
|
||||
'journey-ideation': 'periodic',
|
||||
'journey-plan-eng': 'periodic',
|
||||
|
||||
@@ -0,0 +1,452 @@
|
||||
/**
|
||||
* Browser-skills Phase 2a — gate-tier E2E for /scrape and /skillify.
|
||||
*
|
||||
* Five scenarios cover the productivity loop and the contracts locked
|
||||
* during the v1.19.0.0 plan review:
|
||||
*
|
||||
* D1 — /skillify provenance guard (scenario 4)
|
||||
* D2 — synthesis input slice (covered indirectly by scenario 3 — the
|
||||
* committed SKILL.md must not contain conversation prose)
|
||||
* D3 — atomic write discipline (scenarios 3 and 5)
|
||||
*
|
||||
* 1. scrape-match-path — /scrape with intent matching bundled
|
||||
* hackernews-frontpage routes via $B skill run, no prototype.
|
||||
* 2. scrape-prototype-path — /scrape against a local file:// fixture
|
||||
* (no matching skill) drives $B primitives, returns JSON, suggests
|
||||
* /skillify.
|
||||
* 3. skillify-happy-path — /scrape then /skillify in one session.
|
||||
* Skill written to ~/.gstack/browser-skills/<name>/ with full
|
||||
* file tree, $B skill test passes.
|
||||
* 4. skillify-provenance-refusal — cold /skillify with no prior
|
||||
* /scrape refuses with the D1 message; nothing on disk.
|
||||
* 5. skillify-approval-reject — /scrape then /skillify but reject in
|
||||
* the approval gate; temp dir is removed, nothing at final path.
|
||||
*
|
||||
* All five run gate-tier (~$0.50–$1.50 each, ~$5 total per CI).
|
||||
* Set EVALS=1 to enable. Set EVALS_MODEL to override (default sonnet-4-6).
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { runSkillTest } from './helpers/session-runner';
|
||||
import {
|
||||
ROOT, browseBin, runId,
|
||||
describeIfSelected, testConcurrentIfSelected,
|
||||
setupBrowseShims, copyDirSync, logCost, recordE2E,
|
||||
createEvalCollector, finalizeEvalCollector,
|
||||
} from './helpers/e2e-helpers';
|
||||
import { spawnSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
|
||||
const evalCollector = createEvalCollector('e2e-skillify');
|
||||
|
||||
// ─── Shared workdir setup ───────────────────────────────────────
|
||||
|
||||
interface Workdir {
|
||||
workDir: string;
|
||||
gstackHome: string;
|
||||
skillsDir: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a working directory that has:
|
||||
* - The /scrape and /skillify skills installed under .claude/skills/
|
||||
* - The browse binary symlinked + find-browse shim (via setupBrowseShims)
|
||||
* - bin/ scripts referenced by the preamble
|
||||
* - A scoped GSTACK_HOME under the workdir so on-disk artifacts are
|
||||
* contained and assertable
|
||||
* - A CLAUDE.md routing block instructing Skill-tool invocation
|
||||
*
|
||||
* `installSkills` lets each test pick the minimum surface (e.g., the
|
||||
* provenance-refusal scenario doesn't need /scrape).
|
||||
*/
|
||||
function setupSkillifyWorkdir(suffix: string, installSkills: string[] = ['scrape', 'skillify']): Workdir {
|
||||
const workDir = fs.mkdtempSync(path.join(os.tmpdir(), `skill-e2e-skillify-${suffix}-`));
|
||||
const gstackHome = path.join(workDir, '.gstack-home');
|
||||
fs.mkdirSync(gstackHome, { recursive: true });
|
||||
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: workDir, stdio: 'pipe', timeout: 5000 });
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
fs.writeFileSync(path.join(workDir, 'README.md'), '# test\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
setupBrowseShims(workDir);
|
||||
|
||||
// Install requested skills.
|
||||
const skillsDir = path.join(workDir, '.claude', 'skills');
|
||||
for (const skill of installSkills) {
|
||||
const destDir = path.join(skillsDir, skill);
|
||||
fs.mkdirSync(destDir, { recursive: true });
|
||||
fs.copyFileSync(path.join(ROOT, skill, 'SKILL.md'), path.join(destDir, 'SKILL.md'));
|
||||
}
|
||||
|
||||
// bin/ scripts — preamble references several of these.
|
||||
const binDir = path.join(workDir, 'bin');
|
||||
fs.mkdirSync(binDir, { recursive: true });
|
||||
for (const script of [
|
||||
'gstack-timeline-log', 'gstack-slug', 'gstack-config',
|
||||
'gstack-update-check', 'gstack-repo-mode',
|
||||
'gstack-learnings-log', 'gstack-learnings-search',
|
||||
]) {
|
||||
const src = path.join(ROOT, 'bin', script);
|
||||
if (fs.existsSync(src)) {
|
||||
fs.copyFileSync(src, path.join(binDir, script));
|
||||
fs.chmodSync(path.join(binDir, script), 0o755);
|
||||
}
|
||||
}
|
||||
|
||||
fs.writeFileSync(path.join(workDir, 'CLAUDE.md'), `# Project Instructions
|
||||
|
||||
## Skill routing
|
||||
|
||||
When the user's request matches an available skill, ALWAYS invoke it via
|
||||
the Skill tool as your FIRST action.
|
||||
|
||||
Key routing rules:
|
||||
- /scrape, "scrape", "get data from", "extract from" → invoke scrape
|
||||
- /skillify, "skillify", "codify this scrape" → invoke skillify
|
||||
|
||||
Environment:
|
||||
- GSTACK_HOME="${gstackHome}" for all gstack bin scripts.
|
||||
- bin scripts are at ./bin/ relative to this directory.
|
||||
- Browse binary is at ${browseBin} — assign to $B (e.g., \`B=${browseBin}\`).
|
||||
`);
|
||||
|
||||
return { workDir, gstackHome, skillsDir };
|
||||
}
|
||||
|
||||
/**
|
||||
* Install the bundled hackernews-frontpage browser-skill into the workdir's
|
||||
* project-tier (so $B skill list finds it for match-path tests). The skill
|
||||
* has to live under <workdir>/.gstack/browser-skills/ for the project-tier
|
||||
* lookup to find it (gstack's bundled tier resolves from the install dir,
|
||||
* which the test workdir doesn't have).
|
||||
*/
|
||||
function installBundledHackernewsSkill(workDir: string) {
|
||||
const src = path.join(ROOT, 'browser-skills', 'hackernews-frontpage');
|
||||
const dst = path.join(workDir, '.gstack', 'browser-skills', 'hackernews-frontpage');
|
||||
copyDirSync(src, dst);
|
||||
}
|
||||
|
||||
/** Helper: every Bash invocation's command string from the agent. */
|
||||
function bashCommands(result: { toolCalls: Array<{ tool: string; input: any }> }): string[] {
|
||||
return result.toolCalls
|
||||
.filter((tc) => tc.tool === 'Bash')
|
||||
.map((tc) => String(tc.input?.command ?? ''))
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
/** Helper: the union of agent text + every tool input/output for matching. */
|
||||
function fullSurface(result: any): string {
|
||||
const parts: string[] = [];
|
||||
if (result.output) parts.push(String(result.output));
|
||||
for (const tc of result.toolCalls || []) {
|
||||
parts.push(JSON.stringify(tc.input || {}));
|
||||
if (tc.output) parts.push(String(tc.output));
|
||||
}
|
||||
for (const entry of result.transcript || []) {
|
||||
try { parts.push(JSON.stringify(entry)); } catch { /* skip */ }
|
||||
}
|
||||
return parts.join('\n');
|
||||
}
|
||||
|
||||
// ─── Test fixtures ──────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Tiny HTML fixture for the prototype-path test. Stable structure with three
|
||||
* "items" the agent should be able to extract via $B html + parse.
|
||||
*/
|
||||
const PROTOTYPE_FIXTURE_HTML = `<!doctype html>
|
||||
<html><body>
|
||||
<h1>Test Items</h1>
|
||||
<ul id="items">
|
||||
<li class="item"><a href="/a">First Title</a><span class="score">42</span></li>
|
||||
<li class="item"><a href="/b">Second Title</a><span class="score">17</span></li>
|
||||
<li class="item"><a href="/c">Third Title</a><span class="score">8</span></li>
|
||||
</ul>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
// ─── Live-fire suite ────────────────────────────────────────────
|
||||
|
||||
describeIfSelected('Browser-skills Phase 2a E2E (/scrape + /skillify)', [
|
||||
'scrape-match-path',
|
||||
'scrape-prototype-path',
|
||||
'skillify-happy-path',
|
||||
'skillify-provenance-refusal',
|
||||
'skillify-approval-reject',
|
||||
], () => {
|
||||
afterAll(() => { finalizeEvalCollector(evalCollector); });
|
||||
|
||||
// ── 1. /scrape match path: bundled hackernews-frontpage matches ──────
|
||||
testConcurrentIfSelected('scrape-match-path', async () => {
|
||||
const { workDir, gstackHome } = setupSkillifyWorkdir('match', ['scrape']);
|
||||
installBundledHackernewsSkill(workDir);
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `Run /scrape latest hacker news stories. Invoke /scrape via the Skill tool.
|
||||
You MUST follow the skill's match-phase logic:
|
||||
1. Run \`$B skill list\` to see what browser-skills are available
|
||||
2. Recognize that "latest hacker news stories" matches the bundled
|
||||
hackernews-frontpage skill's triggers
|
||||
3. Run \`$B skill run hackernews-frontpage\` and emit the JSON
|
||||
Do NOT enter the prototype phase. Do NOT use AskUserQuestion.`,
|
||||
workingDirectory: workDir,
|
||||
env: { GSTACK_HOME: gstackHome },
|
||||
maxTurns: 12,
|
||||
allowedTools: ['Skill', 'Bash', 'Read'],
|
||||
timeout: 120_000,
|
||||
testName: 'scrape-match-path',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('scrape-match-path', result);
|
||||
|
||||
const cmds = bashCommands(result);
|
||||
const listedSkills = cmds.some(c => /\bskill\s+list\b/.test(c));
|
||||
const ranBundledSkill = cmds.some(c => /\bskill\s+run\s+hackernews-frontpage\b/.test(c));
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'scrape match-path routes to bundled skill', 'Phase 2a E2E', result, {
|
||||
passed: exitOk && listedSkills && ranBundledSkill,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(listedSkills).toBe(true);
|
||||
expect(ranBundledSkill).toBe(true);
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
}, 180_000);
|
||||
|
||||
// ── 2. /scrape prototype path: drive $B primitives against fixture ────
|
||||
testConcurrentIfSelected('scrape-prototype-path', async () => {
|
||||
const { workDir, gstackHome } = setupSkillifyWorkdir('prototype', ['scrape']);
|
||||
|
||||
// Stage a local HTML fixture the agent can goto via file://
|
||||
const fixturePath = path.join(workDir, 'fixture.html');
|
||||
fs.writeFileSync(fixturePath, PROTOTYPE_FIXTURE_HTML);
|
||||
const fileUrl = `file://${fixturePath}`;
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `Run /scrape titles and scores from ${fileUrl}.
|
||||
Invoke /scrape via the Skill tool. Follow the skill's prototype-phase logic:
|
||||
1. \`$B skill list\` finds NO matching skill
|
||||
2. Drive: \`$B goto ${fileUrl}\` then \`$B html\` (or \`$B text\`)
|
||||
3. Parse the items (each has a title and a score)
|
||||
4. Emit JSON of the form {"items": [{"title": "...", "score": N}, ...], "count": N}
|
||||
5. Suggest /skillify in one line
|
||||
Do NOT use AskUserQuestion.`,
|
||||
workingDirectory: workDir,
|
||||
env: { GSTACK_HOME: gstackHome },
|
||||
maxTurns: 18,
|
||||
allowedTools: ['Skill', 'Bash', 'Read'],
|
||||
timeout: 180_000,
|
||||
testName: 'scrape-prototype-path',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('scrape-prototype-path', result);
|
||||
|
||||
const cmds = bashCommands(result);
|
||||
const wentToFixture = cmds.some(c => c.includes(fileUrl));
|
||||
const fetchedHtml = cmds.some(c => /\bgoto\b|\bhtml\b|\btext\b/.test(c));
|
||||
const surface = fullSurface(result);
|
||||
const mentionsSkillify = /skillify/i.test(surface);
|
||||
const hasJsonItems = /"items"\s*:\s*\[/.test(surface) || /'items'\s*:/.test(surface);
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'scrape prototype-path drives $B + emits JSON + nudges skillify', 'Phase 2a E2E', result, {
|
||||
passed: exitOk && wentToFixture && fetchedHtml && hasJsonItems && mentionsSkillify,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(wentToFixture).toBe(true);
|
||||
expect(fetchedHtml).toBe(true);
|
||||
expect(hasJsonItems).toBe(true);
|
||||
expect(mentionsSkillify).toBe(true);
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
}, 240_000);
|
||||
|
||||
// ── 3. /skillify happy path: scrape then skillify in one session ─────
|
||||
testConcurrentIfSelected('skillify-happy-path', async () => {
|
||||
const { workDir, gstackHome } = setupSkillifyWorkdir('happy', ['scrape', 'skillify']);
|
||||
const fixturePath = path.join(workDir, 'fixture.html');
|
||||
fs.writeFileSync(fixturePath, PROTOTYPE_FIXTURE_HTML);
|
||||
const fileUrl = `file://${fixturePath}`;
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `Two steps in this session:
|
||||
|
||||
1. Run /scrape titles and scores from ${fileUrl} via the Skill tool.
|
||||
Drive the prototype path; return JSON with items[].
|
||||
|
||||
2. Run /skillify via the Skill tool. Follow ALL 11 steps including:
|
||||
- D1 provenance guard (you have a recent /scrape, proceed)
|
||||
- D2 synthesis: include ONLY the final-attempt $B calls (goto + html)
|
||||
- D3 atomic write: stage to temp dir, run test, then commit on approval
|
||||
- When AskUserQuestion fires, choose the recommended option (A)
|
||||
for both the name/tier question AND the approval gate.
|
||||
|
||||
Use HOME=${workDir} so all skill writes land under the test workdir
|
||||
(translates to ~/.gstack/browser-skills/<name>/ via $HOME).
|
||||
|
||||
Do NOT halt for clarification.`,
|
||||
workingDirectory: workDir,
|
||||
env: {
|
||||
GSTACK_HOME: gstackHome,
|
||||
HOME: workDir, // /skillify writes to $HOME/.gstack/browser-skills/
|
||||
},
|
||||
maxTurns: 40,
|
||||
allowedTools: ['Skill', 'Bash', 'Read', 'Write'],
|
||||
timeout: 360_000,
|
||||
testName: 'skillify-happy-path',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('skillify-happy-path', result);
|
||||
|
||||
// The skill should land in $HOME/.gstack/browser-skills/<name>/
|
||||
const skillsRoot = path.join(workDir, '.gstack', 'browser-skills');
|
||||
const writtenSkills = fs.existsSync(skillsRoot)
|
||||
? fs.readdirSync(skillsRoot).filter(d => !d.startsWith('.') && d !== 'hackernews-frontpage')
|
||||
: [];
|
||||
const skillName = writtenSkills[0];
|
||||
const skillDir = skillName ? path.join(skillsRoot, skillName) : '';
|
||||
const hasAllFiles = !!skillDir
|
||||
&& fs.existsSync(path.join(skillDir, 'SKILL.md'))
|
||||
&& fs.existsSync(path.join(skillDir, 'script.ts'))
|
||||
&& fs.existsSync(path.join(skillDir, 'script.test.ts'))
|
||||
&& fs.existsSync(path.join(skillDir, '_lib', 'browse-client.ts'))
|
||||
&& fs.existsSync(path.join(skillDir, 'fixtures'));
|
||||
|
||||
// D2 enforcement: the SKILL.md prose body MUST NOT contain conversation
|
||||
// fragments. Cheap heuristic: it shouldn't have "I" or "Let me" or other
|
||||
// first-person/agent-narration markers.
|
||||
let prosesClean = false;
|
||||
if (hasAllFiles) {
|
||||
const skillMd = fs.readFileSync(path.join(skillDir, 'SKILL.md'), 'utf-8');
|
||||
const body = skillMd.split(/\n---\n/)[1] || '';
|
||||
prosesClean = !/^I /m.test(body)
|
||||
&& !/Let me /i.test(body)
|
||||
&& !/^I'll /m.test(body);
|
||||
}
|
||||
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'skillify happy path writes well-formed skill on disk', 'Phase 2a E2E', result, {
|
||||
passed: exitOk && hasAllFiles && prosesClean,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(writtenSkills.length).toBeGreaterThan(0);
|
||||
expect(hasAllFiles).toBe(true);
|
||||
expect(prosesClean).toBe(true);
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
}, 420_000);
|
||||
|
||||
// ── 4. /skillify provenance refusal: D1 contract ─────────────────────
|
||||
testConcurrentIfSelected('skillify-provenance-refusal', async () => {
|
||||
const { workDir, gstackHome } = setupSkillifyWorkdir('refusal', ['skillify']);
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `Run /skillify via the Skill tool. There has been NO prior /scrape
|
||||
in this conversation. Follow the skill's Step 1 (D1 provenance guard) literally:
|
||||
walk back through agent turns, find no /scrape result, refuse with the exact
|
||||
message the skill specifies, and stop. Do NOT synthesize anything. Do NOT
|
||||
write any files.`,
|
||||
workingDirectory: workDir,
|
||||
env: {
|
||||
GSTACK_HOME: gstackHome,
|
||||
HOME: workDir,
|
||||
},
|
||||
maxTurns: 8,
|
||||
allowedTools: ['Skill', 'Bash', 'Read'],
|
||||
timeout: 90_000,
|
||||
testName: 'skillify-provenance-refusal',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('skillify-provenance-refusal', result);
|
||||
|
||||
const surface = fullSurface(result);
|
||||
const refusalText = /no recent \/?scrape result|run \/scrape.*first|no prior \/?scrape/i.test(surface);
|
||||
|
||||
// Critical: nothing on disk. No staged dir, no committed skill.
|
||||
const skillsRoot = path.join(workDir, '.gstack', 'browser-skills');
|
||||
const stagingRoot = path.join(workDir, '.gstack', '.tmp');
|
||||
const noSkillsWritten = !fs.existsSync(skillsRoot)
|
||||
|| fs.readdirSync(skillsRoot).filter(d => !d.startsWith('.')).length === 0;
|
||||
const noStaging = !fs.existsSync(stagingRoot)
|
||||
|| fs.readdirSync(stagingRoot).filter(d => d.startsWith('skillify-')).length === 0;
|
||||
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'skillify D1 refusal — no on-disk write', 'Phase 2a E2E', result, {
|
||||
passed: exitOk && refusalText && noSkillsWritten && noStaging,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(refusalText).toBe(true);
|
||||
expect(noSkillsWritten).toBe(true);
|
||||
expect(noStaging).toBe(true);
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
}, 120_000);
|
||||
|
||||
// ── 5. /skillify approval-gate reject: D3 cleanup ────────────────────
|
||||
testConcurrentIfSelected('skillify-approval-reject', async () => {
|
||||
const { workDir, gstackHome } = setupSkillifyWorkdir('reject', ['scrape', 'skillify']);
|
||||
const fixturePath = path.join(workDir, 'fixture.html');
|
||||
fs.writeFileSync(fixturePath, PROTOTYPE_FIXTURE_HTML);
|
||||
const fileUrl = `file://${fixturePath}`;
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `Two steps:
|
||||
|
||||
1. Run /scrape titles and scores from ${fileUrl} via the Skill tool.
|
||||
|
||||
2. Run /skillify via the Skill tool. Follow steps 1-9. When the approval
|
||||
gate AskUserQuestion fires (Step 9), choose option C (Discard) instead
|
||||
of A (Commit). The D3 contract says the temp dir must be removed and
|
||||
nothing should land at the final tier path.
|
||||
|
||||
Use HOME=${workDir}. Do NOT commit the skill.`,
|
||||
workingDirectory: workDir,
|
||||
env: {
|
||||
GSTACK_HOME: gstackHome,
|
||||
HOME: workDir,
|
||||
},
|
||||
maxTurns: 35,
|
||||
allowedTools: ['Skill', 'Bash', 'Read', 'Write'],
|
||||
timeout: 360_000,
|
||||
testName: 'skillify-approval-reject',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('skillify-approval-reject', result);
|
||||
|
||||
// D3 contract: nothing at the final tier path; staging dir is gone.
|
||||
const skillsRoot = path.join(workDir, '.gstack', 'browser-skills');
|
||||
const writtenSkills = fs.existsSync(skillsRoot)
|
||||
? fs.readdirSync(skillsRoot).filter(d => !d.startsWith('.'))
|
||||
: [];
|
||||
const stagingRoot = path.join(workDir, '.gstack', '.tmp');
|
||||
const stagingLeftovers = fs.existsSync(stagingRoot)
|
||||
? fs.readdirSync(stagingRoot).filter(d => d.startsWith('skillify-'))
|
||||
: [];
|
||||
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
recordE2E(evalCollector, 'skillify approval-reject leaves no on-disk artifact', 'Phase 2a E2E', result, {
|
||||
passed: exitOk && writtenSkills.length === 0 && stagingLeftovers.length === 0,
|
||||
});
|
||||
|
||||
expect(exitOk).toBe(true);
|
||||
expect(writtenSkills.length).toBe(0);
|
||||
expect(stagingLeftovers.length).toBe(0);
|
||||
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch {}
|
||||
}, 420_000);
|
||||
});
|
||||
@@ -1682,3 +1682,83 @@ describe('no compiled binaries in git', () => {
|
||||
// claude PTY (terminal-agent.ts); these assertions had no target file.
|
||||
// Terminal-pane invariants are covered by browse/test/sidebar-tabs.test.ts
|
||||
// and browse/test/terminal-agent.test.ts.
|
||||
|
||||
// ─── Browser-skills validation ──────────────────────────────────
|
||||
//
|
||||
// Browser-skills are bundled in <gstack-root>/browser-skills/<name>/. Each
|
||||
// must have a SKILL.md whose frontmatter satisfies the contract enforced by
|
||||
// browse/src/browser-skills.ts:parseSkillFile (host required, args + triggers
|
||||
// parseable as the right shape). This test catches malformed bundled skills
|
||||
// at CI time, before they ship.
|
||||
|
||||
describe('Bundled browser-skills frontmatter contract', () => {
|
||||
const browserSkillsRoot = path.join(ROOT, 'browser-skills');
|
||||
|
||||
function listBundledSkillDirs(): string[] {
|
||||
if (!fs.existsSync(browserSkillsRoot)) return [];
|
||||
return fs.readdirSync(browserSkillsRoot)
|
||||
.filter(name => !name.startsWith('.'))
|
||||
.map(name => path.join(browserSkillsRoot, name))
|
||||
.filter(dir => {
|
||||
try { return fs.statSync(dir).isDirectory(); } catch { return false; }
|
||||
});
|
||||
}
|
||||
|
||||
test('each bundled skill has a SKILL.md', () => {
|
||||
for (const dir of listBundledSkillDirs()) {
|
||||
const skillFile = path.join(dir, 'SKILL.md');
|
||||
expect(fs.existsSync(skillFile)).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
test('each bundled skill SKILL.md frontmatter parses with required fields', async () => {
|
||||
const { parseSkillFile } = await import('../browse/src/browser-skills');
|
||||
for (const dir of listBundledSkillDirs()) {
|
||||
const name = path.basename(dir);
|
||||
const content = fs.readFileSync(path.join(dir, 'SKILL.md'), 'utf-8');
|
||||
// parseSkillFile throws on missing required fields; we just want to
|
||||
// make sure none of our shipped skills tripwire it.
|
||||
const { frontmatter } = parseSkillFile(content, { skillName: name });
|
||||
expect(frontmatter.name).toBe(name);
|
||||
expect(typeof frontmatter.host).toBe('string');
|
||||
expect(frontmatter.host.length).toBeGreaterThan(0);
|
||||
expect(Array.isArray(frontmatter.triggers)).toBe(true);
|
||||
expect(Array.isArray(frontmatter.args)).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
test('each bundled skill has a script.ts', () => {
|
||||
for (const dir of listBundledSkillDirs()) {
|
||||
expect(fs.existsSync(path.join(dir, 'script.ts'))).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
test('each bundled skill ships a sibling SDK at _lib/browse-client.ts', () => {
|
||||
for (const dir of listBundledSkillDirs()) {
|
||||
expect(fs.existsSync(path.join(dir, '_lib', 'browse-client.ts'))).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
test('each bundled skill has a script.test.ts', () => {
|
||||
for (const dir of listBundledSkillDirs()) {
|
||||
expect(fs.existsSync(path.join(dir, 'script.test.ts'))).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
test("each bundled skill's _lib/browse-client.ts matches the canonical SDK", () => {
|
||||
// If the canonical SDK changes, the bundled copy must be updated. This
|
||||
// test enforces that — the _lib copy should be byte-identical.
|
||||
const canonical = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'browse-client.ts'), 'utf-8');
|
||||
for (const dir of listBundledSkillDirs()) {
|
||||
const sibling = fs.readFileSync(path.join(dir, '_lib', 'browse-client.ts'), 'utf-8');
|
||||
expect(sibling).toBe(canonical);
|
||||
}
|
||||
});
|
||||
|
||||
test('script.ts imports browse from ./_lib/browse-client', () => {
|
||||
for (const dir of listBundledSkillDirs()) {
|
||||
const content = fs.readFileSync(path.join(dir, 'script.ts'), 'utf-8');
|
||||
expect(content).toMatch(/from\s+['"]\.\/_lib\/browse-client['"]/);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user