Merge origin/main into garrytan/browserharness

Resolves 52 conflicts from the merge: VERSION + CHANGELOG + package.json: kept v1.16.0.0 (next slot above main's v1.15.0.0). CHANGELOG entry for v1.16.0.0 (browser-skills) sits above v1.15.0.0 (slim preamble + plan-mode E2E harness) and the rest of main's history. TODOS.md: kept browser-skills phases (P1 Phase 2, P2 Phase 3, P2 Phase 4) AND main's new entries (Sidebar Terminal v1.1, Structural STOP-Ask forcing function P1). README.md: took main's GBrain section (newer /setup-gbrain story). browse/src/server.ts: took main's chat-queue refactor (sidebar agent ripped in favor of interactive PTY) and re-applied browser-skills' LOCAL_LISTEN_PORT module-level state + daemonPort plumbing through MetaCommandOpts. scripts/resolvers/preamble.ts: took main's reorder of AskUserQuestion Format ahead of model overlay (v1.6.4.0 fix). scripts/resolvers/preamble/generate-brain-sync-block.ts: took main's slimmer version (slim preamble v1.15.0.0). bin/gstack-brain-{init,sync}, bin/gstack-config, test/brain-sync.test.ts: took main's mature versions (gbrain-sync shipped via #1151). test/skill-validation.test.ts: took main's known-large-fixtures form + removed sidebar-agent #584 assertions (file was deleted in main); kept my Bundled browser-skills frontmatter contract block. SKILL.md files (37 of them) + golden fixtures: took main's, then ran `bun run gen:skill-docs --host all` to re-add the new $B skill + domain-skill + cdp commands to the generated docs. All 805 tests pass across browser-skills + skill-validation + gen-skill-docs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 14:34:49 +02:00 · 2026-04-26 14:24:50 -07:00
parent bfaa923437 dde55103fc
commit a3df475707
167 changed files with 23453 additions and 20217 deletions
@@ -566,10 +566,21 @@ describe('v0.4.1 preamble features', () => {
  const skillsWithPreamble = [...tier1Skills, ...tier2PlusSkills];

  for (const skill of tier2PlusSkills) {
-    test(`${skill} contains RECOMMENDATION format`, () => {
+    test(`${skill} contains AskUserQuestion Pros/Cons format`, () => {
      const content = fs.readFileSync(path.join(ROOT, skill), 'utf-8');
-      expect(content).toContain('RECOMMENDATION: Choose');
+      // v1.7.0.0 Pros/Cons format tokens. The preamble resolver
+      // (generate-ask-user-format.ts) injects all of these into every
+      // tier-2+ skill. Drop any of them and the test catches it on the
+      // next `bun test` run.
      expect(content).toContain('AskUserQuestion');
+      expect(content).toContain('Pros / cons:');
+      expect(content).toContain('Recommendation: <choice>');
+      expect(content).toContain('Net:');
+      expect(content).toContain('ELI10');
+      expect(content).toContain('Stakes if we pick wrong:');
+      // Concrete format markers must be documented in the resolver text
+      expect(content).toMatch(/✅/);
+      expect(content).toMatch(/❌/);
    });
  }

@@ -789,9 +800,8 @@ describe('Enum & Value Completeness in review checklist', () => {

 describe('Completeness Principle in generated SKILL.md files', () => {
  const skillsWithPreamble = [
-    'SKILL.md', 'browse/SKILL.md', 'qa/SKILL.md',
+    'qa/SKILL.md',
    'qa-only/SKILL.md',
-    'setup-browser-cookies/SKILL.md',
    'ship/SKILL.md', 'review/SKILL.md',
    'plan-ceo-review/SKILL.md', 'plan-eng-review/SKILL.md',
    'retro/SKILL.md',
@@ -809,11 +819,12 @@ describe('Completeness Principle in generated SKILL.md files', () => {
    });
  }

-  test('Completeness Principle includes compression table in tier 2+ skills', () => {
-    // Root is tier 1 (no completeness). Check tier 2+ skill.
+  test('Completeness Principle keeps compact scoring guidance in tier 2+ skills', () => {
    const content = fs.readFileSync(path.join(ROOT, 'cso', 'SKILL.md'), 'utf-8');
-    expect(content).toContain('CC+gstack');
-    expect(content).toContain('Compression');
+    expect(content).toContain('Completeness: X/10');
+    expect(content).toContain('10 = all edge cases');
+    expect(content).toContain('Note: options differ in kind, not coverage');
+    expect(content).toContain('Do not fabricate scores');
  });
 });

@@ -1457,12 +1468,16 @@ describe('Codex skill validation', () => {
    cwd: ROOT, stdout: 'pipe', stderr: 'pipe',
  });

-  // Discover all Claude skills with templates (except /codex which is Claude-only)
+  // Discover all shared skills with templates.
+  // Host-exclusive outside-voice skills are intentionally omitted here:
+  // - /codex is Claude-only
+  // - /claude is external-host-only
  const CLAUDE_SKILLS_WITH_TEMPLATES = (() => {
    const skills: string[] = [];
    for (const entry of fs.readdirSync(ROOT, { withFileTypes: true })) {
      if (!entry.isDirectory() || entry.name.startsWith('.') || entry.name === 'node_modules') continue;
      if (entry.name === 'codex') continue; // Claude-only skill
+      if (entry.name === 'claude') continue; // External-host-only skill
      if (fs.existsSync(path.join(ROOT, entry.name, 'SKILL.md.tmpl'))) {
        skills.push(entry.name);
      }
@@ -1493,6 +1508,13 @@ describe('Codex skill validation', () => {
    expect(fs.existsSync(path.join(AGENTS_DIR, 'gstack-codex', 'SKILL.md'))).toBe(false);
  });

+  test('/claude skill is external-host-only — no Claude-host variant', () => {
+    // Claude host should not get an outside-voice skill that shells into Claude.
+    expect(fs.existsSync(path.join(ROOT, 'claude', 'SKILL.md'))).toBe(false);
+    // Codex/external hosts should get the generated wrapper.
+    expect(fs.existsSync(path.join(AGENTS_DIR, 'gstack-claude', 'SKILL.md'))).toBe(true);
+  });
+
  test('Codex skill names follow gstack-{name} convention', () => {
    const codexDirs = fs.readdirSync(AGENTS_DIR);
    for (const dir of codexDirs) {
@@ -1620,55 +1642,46 @@ describe('no compiled binaries in git', () => {
    expect(binaries).toEqual([]);
  });

-  test('git tracks no files larger than 2MB', () => {
-    // Pure fs.statSync — no shell spawn per file.
+  test('warns about tracked files larger than 2MB', () => {
+    // Large fixtures can be legitimate test infrastructure. Keep visibility on
+    // repository size without blocking those fixtures from living in git.
+    // Known-good fixtures are exempted from the warning to keep CI logs clean.
    const MAX_BYTES = 2 * 1024 * 1024;
-    // Exempt fixtures that are deliberately tracked at large size (security
-    // benchmark replay data). Add additions to this list with a justification
-    // in the test review trail.
-    const LARGE_FIXTURE_EXEMPTIONS = new Set([
+    const knownLargeFixtures = new Set([
+      // Deterministic replay fixture for BrowseSafe-Bench. The live bench is
+      // expensive; this file is intentionally committed so the gate is free.
      'browse/test/fixtures/security-bench-haiku-responses.json',
    ]);
-    const oversized = trackedFiles.filter((f: string) => {
-      if (LARGE_FIXTURE_EXEMPTIONS.has(f)) return false;
+    const oversized = trackedFiles.flatMap((f: string) => {
+      if (knownLargeFixtures.has(f)) return [];
      const full = path.join(ROOT, f);
      try {
-        return fs.statSync(full).size > MAX_BYTES;
+        const size = fs.statSync(full).size;
+        return size > MAX_BYTES ? [{ file: f, size }] : [];
      } catch {
-        return false;
+        return [];
      }
    });
-    expect(oversized).toEqual([]);
+
+    if (oversized.length > 0) {
+      const formatted = oversized
+        .map(({ file, size }: { file: string; size: number }) => {
+          const mib = (size / (1024 * 1024)).toFixed(1);
+          return `${file} (${mib} MiB)`;
+        })
+        .join(', ');
+      console.warn(`[size-warning] tracked files over 2 MiB: ${formatted}`);
+    }
+
+    expect(Array.isArray(oversized)).toBe(true);
  });
 });

-describe('sidebar agent (#584)', () => {
-  // #584 — Sidebar Write: sidebar-agent.ts allowedTools includes Write
-  test('sidebar-agent.ts allowedTools includes Write', () => {
-    const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'sidebar-agent.ts'), 'utf-8');
-    // Find the allowedTools line in the askClaude function
-    const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
-    expect(match).not.toBeNull();
-    expect(match![1]).toContain('Write');
-  });
-
-  // #584 — Server Write: server.ts allowedTools includes Write (DRY parity)
-  test('server.ts allowedTools excludes Write (agent is read-only + Bash)', () => {
-    const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'server.ts'), 'utf-8');
-    // Find the sidebar allowedTools in the headed-mode path
-    const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
-    expect(match).not.toBeNull();
-    expect(match![1]).toContain('Bash');
-    expect(match![1]).not.toContain('Write');
-  });
-
-  // #584 — Sidebar stderr: stderr handler is not empty
-  test('sidebar-agent.ts stderr handler is not empty', () => {
-    const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'sidebar-agent.ts'), 'utf-8');
-    // The stderr handler should NOT be an empty arrow function
-    expect(content).not.toContain("proc.stderr.on('data', () => {})");
-  });
-});
+// `sidebar agent (#584)` describe block was here. sidebar-agent.ts and
+// the entire chat-queue path were ripped in favor of the interactive
+// claude PTY (terminal-agent.ts); these assertions had no target file.
+// Terminal-pane invariants are covered by browse/test/sidebar-tabs.test.ts
+// and browse/test/terminal-agent.test.ts.

 // ─── Browser-skills validation ──────────────────────────────────
 //