Merge origin/main into garrytan/browserharness

Resolves 52 conflicts from the merge:

VERSION + CHANGELOG + package.json: kept v1.16.0.0 (next slot above
main's v1.15.0.0). CHANGELOG entry for v1.16.0.0 (browser-skills) sits
above v1.15.0.0 (slim preamble + plan-mode E2E harness) and the rest
of main's history.

TODOS.md: kept browser-skills phases (P1 Phase 2, P2 Phase 3, P2
Phase 4) AND main's new entries (Sidebar Terminal v1.1, Structural
STOP-Ask forcing function P1).

README.md: took main's GBrain section (newer /setup-gbrain story).

browse/src/server.ts: took main's chat-queue refactor (sidebar agent
ripped in favor of interactive PTY) and re-applied browser-skills'
LOCAL_LISTEN_PORT module-level state + daemonPort plumbing through
MetaCommandOpts.

scripts/resolvers/preamble.ts: took main's reorder of AskUserQuestion
Format ahead of model overlay (v1.6.4.0 fix).

scripts/resolvers/preamble/generate-brain-sync-block.ts: took main's
slimmer version (slim preamble v1.15.0.0).

bin/gstack-brain-{init,sync}, bin/gstack-config, test/brain-sync.test.ts:
took main's mature versions (gbrain-sync shipped via #1151).

test/skill-validation.test.ts: took main's known-large-fixtures form +
removed sidebar-agent #584 assertions (file was deleted in main); kept
my Bundled browser-skills frontmatter contract block.

SKILL.md files (37 of them) + golden fixtures: took main's, then ran
`bun run gen:skill-docs --host all` to re-add the new $B skill +
domain-skill + cdp commands to the generated docs.

All 805 tests pass across browser-skills + skill-validation + gen-skill-docs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-26 14:24:50 -07:00
167 changed files with 23453 additions and 20217 deletions
+60 -47
View File
@@ -566,10 +566,21 @@ describe('v0.4.1 preamble features', () => {
const skillsWithPreamble = [...tier1Skills, ...tier2PlusSkills];
for (const skill of tier2PlusSkills) {
test(`${skill} contains RECOMMENDATION format`, () => {
test(`${skill} contains AskUserQuestion Pros/Cons format`, () => {
const content = fs.readFileSync(path.join(ROOT, skill), 'utf-8');
expect(content).toContain('RECOMMENDATION: Choose');
// v1.7.0.0 Pros/Cons format tokens. The preamble resolver
// (generate-ask-user-format.ts) injects all of these into every
// tier-2+ skill. Drop any of them and the test catches it on the
// next `bun test` run.
expect(content).toContain('AskUserQuestion');
expect(content).toContain('Pros / cons:');
expect(content).toContain('Recommendation: <choice>');
expect(content).toContain('Net:');
expect(content).toContain('ELI10');
expect(content).toContain('Stakes if we pick wrong:');
// Concrete format markers must be documented in the resolver text
expect(content).toMatch(/✅/);
expect(content).toMatch(/❌/);
});
}
@@ -789,9 +800,8 @@ describe('Enum & Value Completeness in review checklist', () => {
describe('Completeness Principle in generated SKILL.md files', () => {
const skillsWithPreamble = [
'SKILL.md', 'browse/SKILL.md', 'qa/SKILL.md',
'qa/SKILL.md',
'qa-only/SKILL.md',
'setup-browser-cookies/SKILL.md',
'ship/SKILL.md', 'review/SKILL.md',
'plan-ceo-review/SKILL.md', 'plan-eng-review/SKILL.md',
'retro/SKILL.md',
@@ -809,11 +819,12 @@ describe('Completeness Principle in generated SKILL.md files', () => {
});
}
test('Completeness Principle includes compression table in tier 2+ skills', () => {
// Root is tier 1 (no completeness). Check tier 2+ skill.
test('Completeness Principle keeps compact scoring guidance in tier 2+ skills', () => {
const content = fs.readFileSync(path.join(ROOT, 'cso', 'SKILL.md'), 'utf-8');
expect(content).toContain('CC+gstack');
expect(content).toContain('Compression');
expect(content).toContain('Completeness: X/10');
expect(content).toContain('10 = all edge cases');
expect(content).toContain('Note: options differ in kind, not coverage');
expect(content).toContain('Do not fabricate scores');
});
});
@@ -1457,12 +1468,16 @@ describe('Codex skill validation', () => {
cwd: ROOT, stdout: 'pipe', stderr: 'pipe',
});
// Discover all Claude skills with templates (except /codex which is Claude-only)
// Discover all shared skills with templates.
// Host-exclusive outside-voice skills are intentionally omitted here:
// - /codex is Claude-only
// - /claude is external-host-only
const CLAUDE_SKILLS_WITH_TEMPLATES = (() => {
const skills: string[] = [];
for (const entry of fs.readdirSync(ROOT, { withFileTypes: true })) {
if (!entry.isDirectory() || entry.name.startsWith('.') || entry.name === 'node_modules') continue;
if (entry.name === 'codex') continue; // Claude-only skill
if (entry.name === 'claude') continue; // External-host-only skill
if (fs.existsSync(path.join(ROOT, entry.name, 'SKILL.md.tmpl'))) {
skills.push(entry.name);
}
@@ -1493,6 +1508,13 @@ describe('Codex skill validation', () => {
expect(fs.existsSync(path.join(AGENTS_DIR, 'gstack-codex', 'SKILL.md'))).toBe(false);
});
test('/claude skill is external-host-only — no Claude-host variant', () => {
// Claude host should not get an outside-voice skill that shells into Claude.
expect(fs.existsSync(path.join(ROOT, 'claude', 'SKILL.md'))).toBe(false);
// Codex/external hosts should get the generated wrapper.
expect(fs.existsSync(path.join(AGENTS_DIR, 'gstack-claude', 'SKILL.md'))).toBe(true);
});
test('Codex skill names follow gstack-{name} convention', () => {
const codexDirs = fs.readdirSync(AGENTS_DIR);
for (const dir of codexDirs) {
@@ -1620,55 +1642,46 @@ describe('no compiled binaries in git', () => {
expect(binaries).toEqual([]);
});
test('git tracks no files larger than 2MB', () => {
// Pure fs.statSync — no shell spawn per file.
test('warns about tracked files larger than 2MB', () => {
// Large fixtures can be legitimate test infrastructure. Keep visibility on
// repository size without blocking those fixtures from living in git.
// Known-good fixtures are exempted from the warning to keep CI logs clean.
const MAX_BYTES = 2 * 1024 * 1024;
// Exempt fixtures that are deliberately tracked at large size (security
// benchmark replay data). Add additions to this list with a justification
// in the test review trail.
const LARGE_FIXTURE_EXEMPTIONS = new Set([
const knownLargeFixtures = new Set([
// Deterministic replay fixture for BrowseSafe-Bench. The live bench is
// expensive; this file is intentionally committed so the gate is free.
'browse/test/fixtures/security-bench-haiku-responses.json',
]);
const oversized = trackedFiles.filter((f: string) => {
if (LARGE_FIXTURE_EXEMPTIONS.has(f)) return false;
const oversized = trackedFiles.flatMap((f: string) => {
if (knownLargeFixtures.has(f)) return [];
const full = path.join(ROOT, f);
try {
return fs.statSync(full).size > MAX_BYTES;
const size = fs.statSync(full).size;
return size > MAX_BYTES ? [{ file: f, size }] : [];
} catch {
return false;
return [];
}
});
expect(oversized).toEqual([]);
if (oversized.length > 0) {
const formatted = oversized
.map(({ file, size }: { file: string; size: number }) => {
const mib = (size / (1024 * 1024)).toFixed(1);
return `${file} (${mib} MiB)`;
})
.join(', ');
console.warn(`[size-warning] tracked files over 2 MiB: ${formatted}`);
}
expect(Array.isArray(oversized)).toBe(true);
});
});
describe('sidebar agent (#584)', () => {
// #584 — Sidebar Write: sidebar-agent.ts allowedTools includes Write
test('sidebar-agent.ts allowedTools includes Write', () => {
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'sidebar-agent.ts'), 'utf-8');
// Find the allowedTools line in the askClaude function
const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
expect(match).not.toBeNull();
expect(match![1]).toContain('Write');
});
// #584 — Server Write: server.ts allowedTools includes Write (DRY parity)
test('server.ts allowedTools excludes Write (agent is read-only + Bash)', () => {
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'server.ts'), 'utf-8');
// Find the sidebar allowedTools in the headed-mode path
const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
expect(match).not.toBeNull();
expect(match![1]).toContain('Bash');
expect(match![1]).not.toContain('Write');
});
// #584 — Sidebar stderr: stderr handler is not empty
test('sidebar-agent.ts stderr handler is not empty', () => {
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'sidebar-agent.ts'), 'utf-8');
// The stderr handler should NOT be an empty arrow function
expect(content).not.toContain("proc.stderr.on('data', () => {})");
});
});
// `sidebar agent (#584)` describe block was here. sidebar-agent.ts and
// the entire chat-queue path were ripped in favor of the interactive
// claude PTY (terminal-agent.ts); these assertions had no target file.
// Terminal-pane invariants are covered by browse/test/sidebar-tabs.test.ts
// and browse/test/terminal-agent.test.ts.
// ─── Browser-skills validation ──────────────────────────────────
//