From b7cf46d6e6cba830a7f8b08a9c70f74b19468eed Mon Sep 17 00:00:00 2001 From: gus Date: Thu, 16 Apr 2026 19:47:19 -0300 Subject: [PATCH] security: gate download + scrape through validateNavigationUrl (SSRF) The `goto` command was correctly wired through validateNavigationUrl, but `download` and `scrape` called page.request.fetch(url, ...) directly. A caller with the default write scope could hit the /command endpoint and ask the daemon to fetch http://169.254.169.254/latest/meta-data/ (AWS IMDSv1) or the GCP/Azure/internal equivalents. The response body comes back as base64 or lands on disk where GET /file serves it. Fix: call validateNavigationUrl(url) immediately before each page.request.fetch() call site in download and in the scrape loop. Same blocklist that already protects `goto`: file://, javascript:, data:, chrome://, cloud metadata (IPv4 all encodings, IPv6 ULA, metadata.*.internal). Tests: extend browse/test/url-validation.test.ts with a source-level guard that walks every `await page.request.fetch(` call site and asserts a validateNavigationUrl call precedes it within the same branch. Regression trips before code review if a future refactor drops the gate. --- browse/src/write-commands.ts | 15 +++++- browse/test/url-validation.test.ts | 74 ++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/browse/src/write-commands.ts b/browse/src/write-commands.ts index 7548db79..f74f045e 100644 --- a/browse/src/write-commands.ts +++ b/browse/src/write-commands.ts @@ -1188,7 +1188,16 @@ export async function handleWriteCommand( contentType = match[1]; buffer = Buffer.from(match[2], 'base64'); } else { - // Strategy 1: Direct URL via page.request.fetch() + // Strategy 1: Direct URL via page.request.fetch(). + // Gate the URL through the same validator `goto` uses. Without + // this check, download + scrape bypass the navigation + // blocklist and a caller with write scope can read + // http://169.254.169.254/latest/meta-data/ (AWS IMDSv1), the + // GCP/Azure metadata equivalents, or any internal IPv4/IPv6 + // the server happens to route to. The response body is then + // returned to the caller (base64) or written to disk where + // GET /file serves it back. + await validateNavigationUrl(url); const response = await page.request.fetch(url, { timeout: 30000 }); const status = response.status(); if (status >= 400) { @@ -1286,6 +1295,10 @@ export async function handleWriteCommand( for (let i = 0; i < toDownload.length; i++) { const { url, type } = toDownload[i]; try { + // Same gate as the download command — page.request.fetch + // must not reach cloud metadata, ULA ranges, or the rest of + // the blocklist. See url-validation.ts for the full list. + await validateNavigationUrl(url); const response = await page.request.fetch(url, { timeout: 30000 }); if (response.status() >= 400) throw new Error(`HTTP ${response.status()}`); const ct = response.headers()['content-type'] || 'application/octet-stream'; diff --git a/browse/test/url-validation.test.ts b/browse/test/url-validation.test.ts index cdeb2b05..55af0af8 100644 --- a/browse/test/url-validation.test.ts +++ b/browse/test/url-validation.test.ts @@ -221,3 +221,77 @@ describe('validateNavigationUrl — file:// URL-encoding', () => { ).rejects.toThrow(/encoded \/|Path must be within/i); }); }); + +// --------------------------------------------------------------------------- +// download + scrape must gate page.request.fetch through validateNavigationUrl +// +// Regression: the `goto` command was correctly wired through +// validateNavigationUrl, but the `download` and `scrape` commands +// called page.request.fetch(url, ...) directly. A caller with the +// default write scope could hit the /command endpoint and ask the +// daemon to fetch http://169.254.169.254/latest/meta-data/ (AWS +// IMDSv1) or the GCP/Azure/internal equivalents; the body comes back +// as base64 or lands on disk where GET /file serves it. +// +// Source-level check: both page.request.fetch call sites must have a +// validateNavigationUrl invocation immediately before them. +// --------------------------------------------------------------------------- +import { readFileSync } from 'fs'; +import { join } from 'path'; + +describe('download + scrape SSRF gate', () => { + const WRITE_COMMANDS_SRC = readFileSync( + join(import.meta.dir, '..', 'src', 'write-commands.ts'), + 'utf-8', + ); + + function callsitesOf(needle: string): number[] { + const idxs: number[] = []; + let at = 0; + while ((at = WRITE_COMMANDS_SRC.indexOf(needle, at)) !== -1) { + idxs.push(at); + at += needle.length; + } + return idxs; + } + + it('every page.request.fetch sits under a preceding validateNavigationUrl', () => { + // Match the actual call site (`await page.request.fetch(`), not the + // token when it appears inside a code comment. + const fetches = callsitesOf('await page.request.fetch('); + expect(fetches.length).toBeGreaterThan(0); + for (const idx of fetches) { + // Look at the 400 chars preceding the call — the gate must live + // within the same branch / try block. 400 covers the comment + + // await invocation without letting an unrelated upstream gate + // pass as evidence. + const lead = WRITE_COMMANDS_SRC.slice(Math.max(0, idx - 400), idx); + expect(lead).toMatch(/validateNavigationUrl\s*\(/); + } + }); + + it('download command validates the URL before fetch', () => { + const block = WRITE_COMMANDS_SRC.slice( + WRITE_COMMANDS_SRC.indexOf("case 'download'"), + WRITE_COMMANDS_SRC.indexOf("case 'scrape'"), + ); + const vIdx = block.indexOf('validateNavigationUrl'); + const fIdx = block.indexOf('await page.request.fetch('); + expect(vIdx).toBeGreaterThan(-1); + expect(fIdx).toBeGreaterThan(-1); + expect(vIdx).toBeLessThan(fIdx); + }); + + it('scrape command validates each URL before fetch in the loop', () => { + const block = WRITE_COMMANDS_SRC.slice( + WRITE_COMMANDS_SRC.indexOf("case 'scrape'"), + ); + // find the first actual `await page.request.fetch(` call site in scrape + // and the nearest preceding validateNavigationUrl + const fIdx = block.indexOf('await page.request.fetch('); + expect(fIdx).toBeGreaterThan(-1); + const preFetch = block.slice(0, fIdx); + const vIdx = preFetch.lastIndexOf('validateNavigationUrl'); + expect(vIdx).toBeGreaterThan(-1); + }); +});