diff --git a/browse/src/network-capture.ts b/browse/src/network-capture.ts new file mode 100644 index 00000000..48dc5a3b --- /dev/null +++ b/browse/src/network-capture.ts @@ -0,0 +1,179 @@ +/** + * Network response body capture — SizeCappedBuffer + capture lifecycle. + * + * Architecture: + * page.on('response') listener → filter by URL pattern → store body + * SizeCappedBuffer: evicts oldest entries when total size exceeds cap + * Export: writes JSONL file (one response per line) + * + * Memory management: + * - 50MB total buffer cap (configurable) + * - 5MB per-entry body cap (larger responses stored as metadata only) + * - Binary responses stored as base64 + * - Text responses stored as-is + */ + +import * as fs from 'fs'; +import type { Response as PlaywrightResponse } from 'playwright'; + +export interface CapturedResponse { + url: string; + status: number; + headers: Record; + body: string; + contentType: string; + timestamp: number; + size: number; + bodyTruncated: boolean; +} + +const MAX_BUFFER_SIZE = 50 * 1024 * 1024; // 50MB total +const MAX_ENTRY_SIZE = 5 * 1024 * 1024; // 5MB per response body + +export class SizeCappedBuffer { + private entries: CapturedResponse[] = []; + private totalSize = 0; + private readonly maxSize: number; + + constructor(maxSize = MAX_BUFFER_SIZE) { + this.maxSize = maxSize; + } + + push(entry: CapturedResponse): void { + // Evict oldest entries until we have room + while (this.entries.length > 0 && this.totalSize + entry.size > this.maxSize) { + const evicted = this.entries.shift()!; + this.totalSize -= evicted.size; + } + this.entries.push(entry); + this.totalSize += entry.size; + } + + toArray(): CapturedResponse[] { + return [...this.entries]; + } + + get length(): number { + return this.entries.length; + } + + get byteSize(): number { + return this.totalSize; + } + + clear(): void { + this.entries = []; + this.totalSize = 0; + } + + /** Export to JSONL file. */ + exportToFile(filePath: string): number { + const lines = this.entries.map(e => JSON.stringify(e)); + fs.writeFileSync(filePath, lines.join('\n') + '\n'); + return this.entries.length; + } + + /** Summary of captured responses (URL, status, size). */ + summary(): string { + if (this.entries.length === 0) return 'No captured responses.'; + const lines = this.entries.map((e, i) => + ` [${i + 1}] ${e.status} ${e.url.slice(0, 100)} (${Math.round(e.size / 1024)}KB${e.bodyTruncated ? ', truncated' : ''})` + ); + return `${this.entries.length} responses (${Math.round(this.totalSize / 1024)}KB total):\n${lines.join('\n')}`; + } +} + +/** Global capture state. */ +let captureBuffer = new SizeCappedBuffer(); +let captureActive = false; +let captureFilter: RegExp | null = null; +let captureListener: ((response: PlaywrightResponse) => Promise) | null = null; + +export function isCaptureActive(): boolean { + return captureActive; +} + +export function getCaptureBuffer(): SizeCappedBuffer { + return captureBuffer; +} + +/** Create the response listener function. */ +function createResponseListener(filter: RegExp | null): (response: PlaywrightResponse) => Promise { + return async (response: PlaywrightResponse) => { + const url = response.url(); + if (filter && !filter.test(url)) return; + + // Skip non-content responses (redirects, 204, etc.) + const status = response.status(); + if (status === 204 || status === 301 || status === 302 || status === 304) return; + + const contentType = response.headers()['content-type'] || ''; + let body = ''; + let bodySize = 0; + let truncated = false; + + try { + const rawBody = await response.body(); + bodySize = rawBody.length; + + if (bodySize > MAX_ENTRY_SIZE) { + truncated = true; + body = ''; + } else if (contentType.includes('json') || contentType.includes('text') || contentType.includes('xml') || contentType.includes('html')) { + body = rawBody.toString('utf-8'); + } else { + body = rawBody.toString('base64'); + } + } catch { + // Response body may be unavailable (e.g., streaming, aborted) + body = ''; + truncated = true; + } + + const entry: CapturedResponse = { + url, + status, + headers: response.headers(), + body, + contentType, + timestamp: Date.now(), + size: bodySize, + bodyTruncated: truncated, + }; + + captureBuffer.push(entry); + }; +} + +/** Start capturing response bodies. */ +export function startCapture(filterPattern?: string): { filter: string | null } { + captureFilter = filterPattern ? new RegExp(filterPattern) : null; + captureActive = true; + captureListener = createResponseListener(captureFilter); + return { filter: filterPattern || null }; +} + +/** Get the active listener (to attach to page). */ +export function getCaptureListener(): ((response: PlaywrightResponse) => Promise) | null { + return captureListener; +} + +/** Stop capturing. */ +export function stopCapture(): { count: number; sizeKB: number } { + captureActive = false; + captureListener = null; + return { + count: captureBuffer.length, + sizeKB: Math.round(captureBuffer.byteSize / 1024), + }; +} + +/** Clear the capture buffer. */ +export function clearCapture(): void { + captureBuffer.clear(); +} + +/** Export captured responses to JSONL file. */ +export function exportCapture(filePath: string): number { + return captureBuffer.exportToFile(filePath); +} diff --git a/browse/src/read-commands.ts b/browse/src/read-commands.ts index 7d4d8bf7..cdf99136 100644 --- a/browse/src/read-commands.ts +++ b/browse/src/read-commands.ts @@ -225,6 +225,50 @@ export async function handleReadCommand( networkBuffer.clear(); return 'Network buffer cleared.'; } + + // Network capture extensions + if (args[0] === '--capture') { + const { + startCapture, stopCapture, getCaptureListener, isCaptureActive, + } = await import('./network-capture'); + + if (args[1] === 'stop') { + // Detach listener from current page + const page = bm.getPage(); + const listener = getCaptureListener(); + if (listener) page.removeListener('response', listener); + const result = stopCapture(); + return `Network capture stopped. ${result.count} responses captured (${result.sizeKB}KB).`; + } + + // Start capture + if (isCaptureActive()) return 'Capture already active. Use --capture stop first.'; + const filterIdx = args.indexOf('--filter'); + const filterPattern = filterIdx >= 0 ? args[filterIdx + 1] : undefined; + const info = startCapture(filterPattern); + // Attach listener to current page + const page = bm.getPage(); + const listener = getCaptureListener(); + if (listener) page.on('response', listener); + return `Network capture started${info.filter ? ` (filter: ${info.filter})` : ''}. Use --capture stop to stop.`; + } + + if (args[0] === '--export') { + const { exportCapture } = await import('./network-capture'); + const { validateOutputPath: vop } = await import('./path-security'); + const exportPath = args[1]; + if (!exportPath) throw new Error('Usage: network --export '); + vop(exportPath); + const count = exportCapture(exportPath); + return `Exported ${count} captured responses to ${exportPath}`; + } + + if (args[0] === '--bodies') { + const { getCaptureBuffer } = await import('./network-capture'); + return getCaptureBuffer().summary(); + } + + // Default: show request metadata if (networkBuffer.length === 0) return '(no network requests)'; return networkBuffer.toArray().map(e => `${e.method} ${e.url} → ${e.status || 'pending'} (${e.duration || '?'}ms, ${e.size || '?'}B)`