mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 03:35:09 +02:00
test(browse): E2E gate-tier tests for domain-skills + CDP
domain-skills-e2e.test.ts (4 tests): - save derives host from active tab top-level origin (T3) - save lands quarantined; list surfaces it - readSkill returns null until 3 uses without flag promote to active (T6) - save without an active page errors with structured guidance cdp-e2e.test.ts (8 tests): - Accessibility.getFullAXTree returns wrapped JSON (allowed, untrusted-output) - Performance.getMetrics returns plain JSON (allowed, trusted-output) - Runtime.evaluate DENIED with structured guidance (T2 RCE block) - Page.navigate DENIED (must use $B goto for blocklist routing) - Network.getResponseBody DENIED (exfil block) - malformed JSON params surfaces clear error - non Domain.method format surfaces clear error - $B cdp help returns help text Both files boot a real Chromium via BrowserManager.launch() and exercise the dispatch handlers end-to-end. Total 12 E2E tests in <2s. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,106 @@
|
||||
/**
|
||||
* E2E (gate tier): boots a real Chromium via BrowserManager.launch(), navigates
|
||||
* to the fixture server, exercises $B cdp end-to-end against a Playwright-owned
|
||||
* CDPSession (Path A from the spike).
|
||||
*
|
||||
* Verifies (T2 + T7):
|
||||
* - allowed methods (Accessibility, Performance, DOM, CSS read-only) succeed
|
||||
* - dangerous methods are DENIED with structured error
|
||||
* - untrusted-output methods get UNTRUSTED envelope
|
||||
* - mutex works against a real CDPSession
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { promises as fs } from 'fs';
|
||||
import { startTestServer } from './test-server';
|
||||
import { BrowserManager } from '../src/browser-manager';
|
||||
|
||||
const TMP_HOME = path.join(os.tmpdir(), `gstack-cdp-e2e-${process.pid}-${Date.now()}`);
|
||||
process.env.GSTACK_HOME = TMP_HOME;
|
||||
process.env.GSTACK_TELEMETRY_OFF = '1'; // don't pollute analytics during tests
|
||||
|
||||
let testServer: ReturnType<typeof startTestServer>;
|
||||
let bm: BrowserManager;
|
||||
let baseUrl: string;
|
||||
|
||||
beforeAll(async () => {
|
||||
await fs.rm(TMP_HOME, { recursive: true, force: true });
|
||||
await fs.mkdir(TMP_HOME, { recursive: true });
|
||||
testServer = startTestServer(0);
|
||||
baseUrl = testServer.url;
|
||||
bm = new BrowserManager();
|
||||
await bm.launch();
|
||||
await bm.getPage().goto(baseUrl + '/basic.html');
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
try { await bm.cleanup?.(); } catch {}
|
||||
try { testServer.server.stop(); } catch {}
|
||||
await fs.rm(TMP_HOME, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe('$B cdp (E2E gate tier)', () => {
|
||||
test('Accessibility.getFullAXTree (allowed, untrusted-output) returns wrapped JSON', async () => {
|
||||
const { handleCdpCommand } = await import('../src/cdp-commands');
|
||||
const out = await handleCdpCommand(['Accessibility.getFullAXTree', '{}'], bm);
|
||||
// Untrusted-output methods get the envelope
|
||||
expect(out).toContain('--- BEGIN UNTRUSTED EXTERNAL CONTENT');
|
||||
expect(out).toContain('--- END UNTRUSTED EXTERNAL CONTENT ---');
|
||||
// The envelope wraps a JSON tree
|
||||
const inner = out.replace(/--- BEGIN .*?\n/s, '').replace(/\n--- END .*$/s, '');
|
||||
const parsed = JSON.parse(inner);
|
||||
expect(parsed).toHaveProperty('nodes');
|
||||
expect(Array.isArray(parsed.nodes)).toBe(true);
|
||||
});
|
||||
|
||||
test('Performance.getMetrics (allowed, trusted-output) returns plain JSON', async () => {
|
||||
const { handleCdpCommand } = await import('../src/cdp-commands');
|
||||
// Performance domain needs to be enabled first
|
||||
await handleCdpCommand(['Performance.enable', '{}'], bm);
|
||||
const out = await handleCdpCommand(['Performance.getMetrics', '{}'], bm);
|
||||
// Trusted-output = no envelope
|
||||
expect(out).not.toContain('UNTRUSTED');
|
||||
const parsed = JSON.parse(out);
|
||||
expect(parsed).toHaveProperty('metrics');
|
||||
expect(Array.isArray(parsed.metrics)).toBe(true);
|
||||
});
|
||||
|
||||
test('Runtime.evaluate (DENIED) errors with structured guidance', async () => {
|
||||
const { handleCdpCommand } = await import('../src/cdp-commands');
|
||||
await expect(handleCdpCommand(['Runtime.evaluate', '{"expression":"1+1"}'], bm))
|
||||
.rejects.toThrow(/DENIED.*Runtime\.evaluate/);
|
||||
});
|
||||
|
||||
test('Page.navigate (DENIED — must use $B goto for blocklist routing)', async () => {
|
||||
const { handleCdpCommand } = await import('../src/cdp-commands');
|
||||
await expect(handleCdpCommand(['Page.navigate', '{"url":"http://example.com"}'], bm))
|
||||
.rejects.toThrow(/DENIED.*Page\.navigate/);
|
||||
});
|
||||
|
||||
test('Network.getResponseBody (DENIED — exfil surface)', async () => {
|
||||
const { handleCdpCommand } = await import('../src/cdp-commands');
|
||||
await expect(handleCdpCommand(['Network.getResponseBody', '{}'], bm))
|
||||
.rejects.toThrow(/DENIED.*Network\.getResponseBody/);
|
||||
});
|
||||
|
||||
test('malformed JSON params surfaces a clear error', async () => {
|
||||
const { handleCdpCommand } = await import('../src/cdp-commands');
|
||||
await expect(handleCdpCommand(['Accessibility.getFullAXTree', 'not-json'], bm))
|
||||
.rejects.toThrow(/Cannot parse params as JSON/);
|
||||
});
|
||||
|
||||
test('non Domain.method format surfaces a clear error', async () => {
|
||||
const { handleCdpCommand } = await import('../src/cdp-commands');
|
||||
await expect(handleCdpCommand(['justOneWord'], bm))
|
||||
.rejects.toThrow(/Domain\.method format/);
|
||||
});
|
||||
|
||||
test('--help returns the help text', async () => {
|
||||
const { handleCdpCommand } = await import('../src/cdp-commands');
|
||||
const out = await handleCdpCommand(['help'], bm);
|
||||
expect(out).toContain('deny-default escape hatch');
|
||||
expect(out).toContain('cdp-allowlist.ts');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,109 @@
|
||||
/**
|
||||
* E2E (gate tier): boots a real Chromium via BrowserManager.launch(), navigates
|
||||
* to the fixture server, exercises $B domain-skill save/show/list end-to-end.
|
||||
*
|
||||
* Verifies (T3 + T4 + T6):
|
||||
* - host derives from active tab top-level origin (not agent-supplied)
|
||||
* - save lands in JSONL state:"quarantined"
|
||||
* - listSkills surfaces the saved row
|
||||
* - 3 successful uses promote to active; readSkill then returns it
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { promises as fs } from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { startTestServer } from './test-server';
|
||||
import { BrowserManager } from '../src/browser-manager';
|
||||
|
||||
const TMP_HOME = path.join(os.tmpdir(), `gstack-domain-e2e-${process.pid}-${Date.now()}`);
|
||||
process.env.GSTACK_HOME = TMP_HOME;
|
||||
process.env.GSTACK_PROJECT_SLUG = 'e2e-test-slug';
|
||||
|
||||
let testServer: ReturnType<typeof startTestServer>;
|
||||
let bm: BrowserManager;
|
||||
let baseUrl: string;
|
||||
|
||||
async function fakeBodyPipe(body: string): Promise<string> {
|
||||
// Some subcommands read from stdin or --from-file. We use --from-file with a tmp.
|
||||
const tmpFile = path.join(os.tmpdir(), `e2e-body-${process.pid}-${Date.now()}.md`);
|
||||
await fs.writeFile(tmpFile, body, 'utf8');
|
||||
return tmpFile;
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
await fs.rm(TMP_HOME, { recursive: true, force: true });
|
||||
await fs.mkdir(path.join(TMP_HOME, 'projects', 'e2e-test-slug'), { recursive: true });
|
||||
testServer = startTestServer(0);
|
||||
baseUrl = testServer.url;
|
||||
bm = new BrowserManager();
|
||||
await bm.launch();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
try { await bm.cleanup?.(); } catch {}
|
||||
try { testServer.server.stop(); } catch {}
|
||||
await fs.rm(TMP_HOME, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe('$B domain-skill (E2E gate tier)', () => {
|
||||
test('save: derives host from active tab, writes quarantined row, list surfaces it', async () => {
|
||||
const { handleDomainSkillCommand } = await import('../src/domain-skill-commands');
|
||||
// Navigate to a test page (host: 127.0.0.1 in this fixture server)
|
||||
await bm.getPage().goto(baseUrl + '/basic.html');
|
||||
|
||||
const bodyFile = await fakeBodyPipe('# Test skill\n\nThis page is the basic fixture.');
|
||||
const out = await handleDomainSkillCommand(['save', '--from-file', bodyFile], bm);
|
||||
|
||||
// Output is structured per DX D5
|
||||
expect(out).toContain('Saved');
|
||||
expect(out).toContain('quarantined');
|
||||
expect(out).toContain('127.0.0.1');
|
||||
expect(out).toContain('Next:');
|
||||
|
||||
// Check the JSONL file actually has it
|
||||
const jsonl = await fs.readFile(
|
||||
path.join(TMP_HOME, 'projects', 'e2e-test-slug', 'learnings.jsonl'),
|
||||
'utf8',
|
||||
);
|
||||
const lines = jsonl.trim().split('\n').map((l) => JSON.parse(l));
|
||||
const skill = lines.find((r: any) => r.type === 'domain' && r.host === '127.0.0.1');
|
||||
expect(skill).toBeTruthy();
|
||||
expect(skill.state).toBe('quarantined');
|
||||
expect(skill.scope).toBe('project');
|
||||
expect(skill.body).toContain('Test skill');
|
||||
expect(skill.source).toBe('agent');
|
||||
|
||||
await fs.unlink(bodyFile).catch(() => {});
|
||||
});
|
||||
|
||||
test('list: shows the saved skill with state', async () => {
|
||||
const { handleDomainSkillCommand } = await import('../src/domain-skill-commands');
|
||||
const out = await handleDomainSkillCommand(['list'], bm);
|
||||
expect(out).toContain('Project (per-project):');
|
||||
expect(out).toContain('[quarantined] 127.0.0.1');
|
||||
});
|
||||
|
||||
test('readSkill returns null until the skill is promoted to active (T6)', async () => {
|
||||
const { readSkill, recordSkillUse } = await import('../src/domain-skills');
|
||||
// While quarantined, readSkill returns null
|
||||
expect(await readSkill('127.0.0.1', 'e2e-test-slug')).toBeNull();
|
||||
// Three uses without flag triggers auto-promote
|
||||
await recordSkillUse('127.0.0.1', 'e2e-test-slug', false);
|
||||
await recordSkillUse('127.0.0.1', 'e2e-test-slug', false);
|
||||
await recordSkillUse('127.0.0.1', 'e2e-test-slug', false);
|
||||
const result = await readSkill('127.0.0.1', 'e2e-test-slug');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.row.state).toBe('active');
|
||||
expect(result!.source).toBe('project');
|
||||
});
|
||||
|
||||
test('save without an active page errors with structured guidance', async () => {
|
||||
const { handleDomainSkillCommand } = await import('../src/domain-skill-commands');
|
||||
// Navigate to about:blank — domain-skill save must refuse
|
||||
await bm.getPage().goto('about:blank');
|
||||
const bodyFile = await fakeBodyPipe('# Should fail');
|
||||
await expect(handleDomainSkillCommand(['save', '--from-file', bodyFile], bm)).rejects.toThrow(/no top-level URL/);
|
||||
await fs.unlink(bodyFile).catch(() => {});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user