mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-08 14:34:49 +02:00
Merge origin/main into garrytan/browserharness
Resolves 52 conflicts from the merge:
VERSION + CHANGELOG + package.json: kept v1.16.0.0 (next slot above
main's v1.15.0.0). CHANGELOG entry for v1.16.0.0 (browser-skills) sits
above v1.15.0.0 (slim preamble + plan-mode E2E harness) and the rest
of main's history.
TODOS.md: kept browser-skills phases (P1 Phase 2, P2 Phase 3, P2
Phase 4) AND main's new entries (Sidebar Terminal v1.1, Structural
STOP-Ask forcing function P1).
README.md: took main's GBrain section (newer /setup-gbrain story).
browse/src/server.ts: took main's chat-queue refactor (sidebar agent
ripped in favor of interactive PTY) and re-applied browser-skills'
LOCAL_LISTEN_PORT module-level state + daemonPort plumbing through
MetaCommandOpts.
scripts/resolvers/preamble.ts: took main's reorder of AskUserQuestion
Format ahead of model overlay (v1.6.4.0 fix).
scripts/resolvers/preamble/generate-brain-sync-block.ts: took main's
slimmer version (slim preamble v1.15.0.0).
bin/gstack-brain-{init,sync}, bin/gstack-config, test/brain-sync.test.ts:
took main's mature versions (gbrain-sync shipped via #1151).
test/skill-validation.test.ts: took main's known-large-fixtures form +
removed sidebar-agent #584 assertions (file was deleted in main); kept
my Bundled browser-skills frontmatter contract block.
SKILL.md files (37 of them) + golden fixtures: took main's, then ran
`bun run gen:skill-docs --host all` to re-add the new $B skill +
domain-skill + cdp commands to the generated docs.
All 805 tests pass across browser-skills + skill-validation + gen-skill-docs.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -566,10 +566,21 @@ describe('v0.4.1 preamble features', () => {
|
||||
const skillsWithPreamble = [...tier1Skills, ...tier2PlusSkills];
|
||||
|
||||
for (const skill of tier2PlusSkills) {
|
||||
test(`${skill} contains RECOMMENDATION format`, () => {
|
||||
test(`${skill} contains AskUserQuestion Pros/Cons format`, () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, skill), 'utf-8');
|
||||
expect(content).toContain('RECOMMENDATION: Choose');
|
||||
// v1.7.0.0 Pros/Cons format tokens. The preamble resolver
|
||||
// (generate-ask-user-format.ts) injects all of these into every
|
||||
// tier-2+ skill. Drop any of them and the test catches it on the
|
||||
// next `bun test` run.
|
||||
expect(content).toContain('AskUserQuestion');
|
||||
expect(content).toContain('Pros / cons:');
|
||||
expect(content).toContain('Recommendation: <choice>');
|
||||
expect(content).toContain('Net:');
|
||||
expect(content).toContain('ELI10');
|
||||
expect(content).toContain('Stakes if we pick wrong:');
|
||||
// Concrete format markers must be documented in the resolver text
|
||||
expect(content).toMatch(/✅/);
|
||||
expect(content).toMatch(/❌/);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -789,9 +800,8 @@ describe('Enum & Value Completeness in review checklist', () => {
|
||||
|
||||
describe('Completeness Principle in generated SKILL.md files', () => {
|
||||
const skillsWithPreamble = [
|
||||
'SKILL.md', 'browse/SKILL.md', 'qa/SKILL.md',
|
||||
'qa/SKILL.md',
|
||||
'qa-only/SKILL.md',
|
||||
'setup-browser-cookies/SKILL.md',
|
||||
'ship/SKILL.md', 'review/SKILL.md',
|
||||
'plan-ceo-review/SKILL.md', 'plan-eng-review/SKILL.md',
|
||||
'retro/SKILL.md',
|
||||
@@ -809,11 +819,12 @@ describe('Completeness Principle in generated SKILL.md files', () => {
|
||||
});
|
||||
}
|
||||
|
||||
test('Completeness Principle includes compression table in tier 2+ skills', () => {
|
||||
// Root is tier 1 (no completeness). Check tier 2+ skill.
|
||||
test('Completeness Principle keeps compact scoring guidance in tier 2+ skills', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'cso', 'SKILL.md'), 'utf-8');
|
||||
expect(content).toContain('CC+gstack');
|
||||
expect(content).toContain('Compression');
|
||||
expect(content).toContain('Completeness: X/10');
|
||||
expect(content).toContain('10 = all edge cases');
|
||||
expect(content).toContain('Note: options differ in kind, not coverage');
|
||||
expect(content).toContain('Do not fabricate scores');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1457,12 +1468,16 @@ describe('Codex skill validation', () => {
|
||||
cwd: ROOT, stdout: 'pipe', stderr: 'pipe',
|
||||
});
|
||||
|
||||
// Discover all Claude skills with templates (except /codex which is Claude-only)
|
||||
// Discover all shared skills with templates.
|
||||
// Host-exclusive outside-voice skills are intentionally omitted here:
|
||||
// - /codex is Claude-only
|
||||
// - /claude is external-host-only
|
||||
const CLAUDE_SKILLS_WITH_TEMPLATES = (() => {
|
||||
const skills: string[] = [];
|
||||
for (const entry of fs.readdirSync(ROOT, { withFileTypes: true })) {
|
||||
if (!entry.isDirectory() || entry.name.startsWith('.') || entry.name === 'node_modules') continue;
|
||||
if (entry.name === 'codex') continue; // Claude-only skill
|
||||
if (entry.name === 'claude') continue; // External-host-only skill
|
||||
if (fs.existsSync(path.join(ROOT, entry.name, 'SKILL.md.tmpl'))) {
|
||||
skills.push(entry.name);
|
||||
}
|
||||
@@ -1493,6 +1508,13 @@ describe('Codex skill validation', () => {
|
||||
expect(fs.existsSync(path.join(AGENTS_DIR, 'gstack-codex', 'SKILL.md'))).toBe(false);
|
||||
});
|
||||
|
||||
test('/claude skill is external-host-only — no Claude-host variant', () => {
|
||||
// Claude host should not get an outside-voice skill that shells into Claude.
|
||||
expect(fs.existsSync(path.join(ROOT, 'claude', 'SKILL.md'))).toBe(false);
|
||||
// Codex/external hosts should get the generated wrapper.
|
||||
expect(fs.existsSync(path.join(AGENTS_DIR, 'gstack-claude', 'SKILL.md'))).toBe(true);
|
||||
});
|
||||
|
||||
test('Codex skill names follow gstack-{name} convention', () => {
|
||||
const codexDirs = fs.readdirSync(AGENTS_DIR);
|
||||
for (const dir of codexDirs) {
|
||||
@@ -1620,55 +1642,46 @@ describe('no compiled binaries in git', () => {
|
||||
expect(binaries).toEqual([]);
|
||||
});
|
||||
|
||||
test('git tracks no files larger than 2MB', () => {
|
||||
// Pure fs.statSync — no shell spawn per file.
|
||||
test('warns about tracked files larger than 2MB', () => {
|
||||
// Large fixtures can be legitimate test infrastructure. Keep visibility on
|
||||
// repository size without blocking those fixtures from living in git.
|
||||
// Known-good fixtures are exempted from the warning to keep CI logs clean.
|
||||
const MAX_BYTES = 2 * 1024 * 1024;
|
||||
// Exempt fixtures that are deliberately tracked at large size (security
|
||||
// benchmark replay data). Add additions to this list with a justification
|
||||
// in the test review trail.
|
||||
const LARGE_FIXTURE_EXEMPTIONS = new Set([
|
||||
const knownLargeFixtures = new Set([
|
||||
// Deterministic replay fixture for BrowseSafe-Bench. The live bench is
|
||||
// expensive; this file is intentionally committed so the gate is free.
|
||||
'browse/test/fixtures/security-bench-haiku-responses.json',
|
||||
]);
|
||||
const oversized = trackedFiles.filter((f: string) => {
|
||||
if (LARGE_FIXTURE_EXEMPTIONS.has(f)) return false;
|
||||
const oversized = trackedFiles.flatMap((f: string) => {
|
||||
if (knownLargeFixtures.has(f)) return [];
|
||||
const full = path.join(ROOT, f);
|
||||
try {
|
||||
return fs.statSync(full).size > MAX_BYTES;
|
||||
const size = fs.statSync(full).size;
|
||||
return size > MAX_BYTES ? [{ file: f, size }] : [];
|
||||
} catch {
|
||||
return false;
|
||||
return [];
|
||||
}
|
||||
});
|
||||
expect(oversized).toEqual([]);
|
||||
|
||||
if (oversized.length > 0) {
|
||||
const formatted = oversized
|
||||
.map(({ file, size }: { file: string; size: number }) => {
|
||||
const mib = (size / (1024 * 1024)).toFixed(1);
|
||||
return `${file} (${mib} MiB)`;
|
||||
})
|
||||
.join(', ');
|
||||
console.warn(`[size-warning] tracked files over 2 MiB: ${formatted}`);
|
||||
}
|
||||
|
||||
expect(Array.isArray(oversized)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('sidebar agent (#584)', () => {
|
||||
// #584 — Sidebar Write: sidebar-agent.ts allowedTools includes Write
|
||||
test('sidebar-agent.ts allowedTools includes Write', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'sidebar-agent.ts'), 'utf-8');
|
||||
// Find the allowedTools line in the askClaude function
|
||||
const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
|
||||
expect(match).not.toBeNull();
|
||||
expect(match![1]).toContain('Write');
|
||||
});
|
||||
|
||||
// #584 — Server Write: server.ts allowedTools includes Write (DRY parity)
|
||||
test('server.ts allowedTools excludes Write (agent is read-only + Bash)', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'server.ts'), 'utf-8');
|
||||
// Find the sidebar allowedTools in the headed-mode path
|
||||
const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
|
||||
expect(match).not.toBeNull();
|
||||
expect(match![1]).toContain('Bash');
|
||||
expect(match![1]).not.toContain('Write');
|
||||
});
|
||||
|
||||
// #584 — Sidebar stderr: stderr handler is not empty
|
||||
test('sidebar-agent.ts stderr handler is not empty', () => {
|
||||
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'sidebar-agent.ts'), 'utf-8');
|
||||
// The stderr handler should NOT be an empty arrow function
|
||||
expect(content).not.toContain("proc.stderr.on('data', () => {})");
|
||||
});
|
||||
});
|
||||
// `sidebar agent (#584)` describe block was here. sidebar-agent.ts and
|
||||
// the entire chat-queue path were ripped in favor of the interactive
|
||||
// claude PTY (terminal-agent.ts); these assertions had no target file.
|
||||
// Terminal-pane invariants are covered by browse/test/sidebar-tabs.test.ts
|
||||
// and browse/test/terminal-agent.test.ts.
|
||||
|
||||
// ─── Browser-skills validation ──────────────────────────────────
|
||||
//
|
||||
|
||||
Reference in New Issue
Block a user