From 27962738dbaffd0f7516124efa1100eaceabacc9 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sat, 25 Apr 2026 13:30:48 -0700 Subject: [PATCH] test(browse): E2E gate-tier tests for domain-skills + CDP domain-skills-e2e.test.ts (4 tests): - save derives host from active tab top-level origin (T3) - save lands quarantined; list surfaces it - readSkill returns null until 3 uses without flag promote to active (T6) - save without an active page errors with structured guidance cdp-e2e.test.ts (8 tests): - Accessibility.getFullAXTree returns wrapped JSON (allowed, untrusted-output) - Performance.getMetrics returns plain JSON (allowed, trusted-output) - Runtime.evaluate DENIED with structured guidance (T2 RCE block) - Page.navigate DENIED (must use $B goto for blocklist routing) - Network.getResponseBody DENIED (exfil block) - malformed JSON params surfaces clear error - non Domain.method format surfaces clear error - $B cdp help returns help text Both files boot a real Chromium via BrowserManager.launch() and exercise the dispatch handlers end-to-end. Total 12 E2E tests in <2s. Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/test/cdp-e2e.test.ts | 106 +++++++++++++++++++++++++ browse/test/domain-skills-e2e.test.ts | 109 ++++++++++++++++++++++++++ 2 files changed, 215 insertions(+) create mode 100644 browse/test/cdp-e2e.test.ts create mode 100644 browse/test/domain-skills-e2e.test.ts diff --git a/browse/test/cdp-e2e.test.ts b/browse/test/cdp-e2e.test.ts new file mode 100644 index 00000000..c6b2c8a8 --- /dev/null +++ b/browse/test/cdp-e2e.test.ts @@ -0,0 +1,106 @@ +/** + * E2E (gate tier): boots a real Chromium via BrowserManager.launch(), navigates + * to the fixture server, exercises $B cdp end-to-end against a Playwright-owned + * CDPSession (Path A from the spike). + * + * Verifies (T2 + T7): + * - allowed methods (Accessibility, Performance, DOM, CSS read-only) succeed + * - dangerous methods are DENIED with structured error + * - untrusted-output methods get UNTRUSTED envelope + * - mutex works against a real CDPSession + */ + +import { describe, test, expect, beforeAll, afterAll } from 'bun:test'; +import * as path from 'path'; +import * as os from 'os'; +import { promises as fs } from 'fs'; +import { startTestServer } from './test-server'; +import { BrowserManager } from '../src/browser-manager'; + +const TMP_HOME = path.join(os.tmpdir(), `gstack-cdp-e2e-${process.pid}-${Date.now()}`); +process.env.GSTACK_HOME = TMP_HOME; +process.env.GSTACK_TELEMETRY_OFF = '1'; // don't pollute analytics during tests + +let testServer: ReturnType; +let bm: BrowserManager; +let baseUrl: string; + +beforeAll(async () => { + await fs.rm(TMP_HOME, { recursive: true, force: true }); + await fs.mkdir(TMP_HOME, { recursive: true }); + testServer = startTestServer(0); + baseUrl = testServer.url; + bm = new BrowserManager(); + await bm.launch(); + await bm.getPage().goto(baseUrl + '/basic.html'); +}); + +afterAll(async () => { + try { await bm.cleanup?.(); } catch {} + try { testServer.server.stop(); } catch {} + await fs.rm(TMP_HOME, { recursive: true, force: true }); +}); + +describe('$B cdp (E2E gate tier)', () => { + test('Accessibility.getFullAXTree (allowed, untrusted-output) returns wrapped JSON', async () => { + const { handleCdpCommand } = await import('../src/cdp-commands'); + const out = await handleCdpCommand(['Accessibility.getFullAXTree', '{}'], bm); + // Untrusted-output methods get the envelope + expect(out).toContain('--- BEGIN UNTRUSTED EXTERNAL CONTENT'); + expect(out).toContain('--- END UNTRUSTED EXTERNAL CONTENT ---'); + // The envelope wraps a JSON tree + const inner = out.replace(/--- BEGIN .*?\n/s, '').replace(/\n--- END .*$/s, ''); + const parsed = JSON.parse(inner); + expect(parsed).toHaveProperty('nodes'); + expect(Array.isArray(parsed.nodes)).toBe(true); + }); + + test('Performance.getMetrics (allowed, trusted-output) returns plain JSON', async () => { + const { handleCdpCommand } = await import('../src/cdp-commands'); + // Performance domain needs to be enabled first + await handleCdpCommand(['Performance.enable', '{}'], bm); + const out = await handleCdpCommand(['Performance.getMetrics', '{}'], bm); + // Trusted-output = no envelope + expect(out).not.toContain('UNTRUSTED'); + const parsed = JSON.parse(out); + expect(parsed).toHaveProperty('metrics'); + expect(Array.isArray(parsed.metrics)).toBe(true); + }); + + test('Runtime.evaluate (DENIED) errors with structured guidance', async () => { + const { handleCdpCommand } = await import('../src/cdp-commands'); + await expect(handleCdpCommand(['Runtime.evaluate', '{"expression":"1+1"}'], bm)) + .rejects.toThrow(/DENIED.*Runtime\.evaluate/); + }); + + test('Page.navigate (DENIED — must use $B goto for blocklist routing)', async () => { + const { handleCdpCommand } = await import('../src/cdp-commands'); + await expect(handleCdpCommand(['Page.navigate', '{"url":"http://example.com"}'], bm)) + .rejects.toThrow(/DENIED.*Page\.navigate/); + }); + + test('Network.getResponseBody (DENIED — exfil surface)', async () => { + const { handleCdpCommand } = await import('../src/cdp-commands'); + await expect(handleCdpCommand(['Network.getResponseBody', '{}'], bm)) + .rejects.toThrow(/DENIED.*Network\.getResponseBody/); + }); + + test('malformed JSON params surfaces a clear error', async () => { + const { handleCdpCommand } = await import('../src/cdp-commands'); + await expect(handleCdpCommand(['Accessibility.getFullAXTree', 'not-json'], bm)) + .rejects.toThrow(/Cannot parse params as JSON/); + }); + + test('non Domain.method format surfaces a clear error', async () => { + const { handleCdpCommand } = await import('../src/cdp-commands'); + await expect(handleCdpCommand(['justOneWord'], bm)) + .rejects.toThrow(/Domain\.method format/); + }); + + test('--help returns the help text', async () => { + const { handleCdpCommand } = await import('../src/cdp-commands'); + const out = await handleCdpCommand(['help'], bm); + expect(out).toContain('deny-default escape hatch'); + expect(out).toContain('cdp-allowlist.ts'); + }); +}); diff --git a/browse/test/domain-skills-e2e.test.ts b/browse/test/domain-skills-e2e.test.ts new file mode 100644 index 00000000..4c26ac56 --- /dev/null +++ b/browse/test/domain-skills-e2e.test.ts @@ -0,0 +1,109 @@ +/** + * E2E (gate tier): boots a real Chromium via BrowserManager.launch(), navigates + * to the fixture server, exercises $B domain-skill save/show/list end-to-end. + * + * Verifies (T3 + T4 + T6): + * - host derives from active tab top-level origin (not agent-supplied) + * - save lands in JSONL state:"quarantined" + * - listSkills surfaces the saved row + * - 3 successful uses promote to active; readSkill then returns it + */ + +import { describe, test, expect, beforeAll, afterAll } from 'bun:test'; +import { promises as fs } from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { startTestServer } from './test-server'; +import { BrowserManager } from '../src/browser-manager'; + +const TMP_HOME = path.join(os.tmpdir(), `gstack-domain-e2e-${process.pid}-${Date.now()}`); +process.env.GSTACK_HOME = TMP_HOME; +process.env.GSTACK_PROJECT_SLUG = 'e2e-test-slug'; + +let testServer: ReturnType; +let bm: BrowserManager; +let baseUrl: string; + +async function fakeBodyPipe(body: string): Promise { + // Some subcommands read from stdin or --from-file. We use --from-file with a tmp. + const tmpFile = path.join(os.tmpdir(), `e2e-body-${process.pid}-${Date.now()}.md`); + await fs.writeFile(tmpFile, body, 'utf8'); + return tmpFile; +} + +beforeAll(async () => { + await fs.rm(TMP_HOME, { recursive: true, force: true }); + await fs.mkdir(path.join(TMP_HOME, 'projects', 'e2e-test-slug'), { recursive: true }); + testServer = startTestServer(0); + baseUrl = testServer.url; + bm = new BrowserManager(); + await bm.launch(); +}); + +afterAll(async () => { + try { await bm.cleanup?.(); } catch {} + try { testServer.server.stop(); } catch {} + await fs.rm(TMP_HOME, { recursive: true, force: true }); +}); + +describe('$B domain-skill (E2E gate tier)', () => { + test('save: derives host from active tab, writes quarantined row, list surfaces it', async () => { + const { handleDomainSkillCommand } = await import('../src/domain-skill-commands'); + // Navigate to a test page (host: 127.0.0.1 in this fixture server) + await bm.getPage().goto(baseUrl + '/basic.html'); + + const bodyFile = await fakeBodyPipe('# Test skill\n\nThis page is the basic fixture.'); + const out = await handleDomainSkillCommand(['save', '--from-file', bodyFile], bm); + + // Output is structured per DX D5 + expect(out).toContain('Saved'); + expect(out).toContain('quarantined'); + expect(out).toContain('127.0.0.1'); + expect(out).toContain('Next:'); + + // Check the JSONL file actually has it + const jsonl = await fs.readFile( + path.join(TMP_HOME, 'projects', 'e2e-test-slug', 'learnings.jsonl'), + 'utf8', + ); + const lines = jsonl.trim().split('\n').map((l) => JSON.parse(l)); + const skill = lines.find((r: any) => r.type === 'domain' && r.host === '127.0.0.1'); + expect(skill).toBeTruthy(); + expect(skill.state).toBe('quarantined'); + expect(skill.scope).toBe('project'); + expect(skill.body).toContain('Test skill'); + expect(skill.source).toBe('agent'); + + await fs.unlink(bodyFile).catch(() => {}); + }); + + test('list: shows the saved skill with state', async () => { + const { handleDomainSkillCommand } = await import('../src/domain-skill-commands'); + const out = await handleDomainSkillCommand(['list'], bm); + expect(out).toContain('Project (per-project):'); + expect(out).toContain('[quarantined] 127.0.0.1'); + }); + + test('readSkill returns null until the skill is promoted to active (T6)', async () => { + const { readSkill, recordSkillUse } = await import('../src/domain-skills'); + // While quarantined, readSkill returns null + expect(await readSkill('127.0.0.1', 'e2e-test-slug')).toBeNull(); + // Three uses without flag triggers auto-promote + await recordSkillUse('127.0.0.1', 'e2e-test-slug', false); + await recordSkillUse('127.0.0.1', 'e2e-test-slug', false); + await recordSkillUse('127.0.0.1', 'e2e-test-slug', false); + const result = await readSkill('127.0.0.1', 'e2e-test-slug'); + expect(result).not.toBeNull(); + expect(result!.row.state).toBe('active'); + expect(result!.source).toBe('project'); + }); + + test('save without an active page errors with structured guidance', async () => { + const { handleDomainSkillCommand } = await import('../src/domain-skill-commands'); + // Navigate to about:blank — domain-skill save must refuse + await bm.getPage().goto('about:blank'); + const bodyFile = await fakeBodyPipe('# Should fail'); + await expect(handleDomainSkillCommand(['save', '--from-file', bodyFile], bm)).rejects.toThrow(/no top-level URL/); + await fs.unlink(bodyFile).catch(() => {}); + }); +});