security: gate download + scrape through validateNavigationUrl (SSRF)

The `goto` command was correctly wired through validateNavigationUrl, but `download` and `scrape` called page.request.fetch(url, ...) directly. A caller with the default write scope could hit the /command endpoint and ask the daemon to fetch http://169.254.169.254/latest/meta-data/ (AWS IMDSv1) or the GCP/Azure/internal equivalents. The response body comes back as base64 or lands on disk where GET /file serves it. Fix: call validateNavigationUrl(url) immediately before each page.request.fetch() call site in download and in the scrape loop. Same blocklist that already protects `goto`: file://, javascript:, data:, chrome://, cloud metadata (IPv4 all encodings, IPv6 ULA, metadata.*.internal). Tests: extend browse/test/url-validation.test.ts with a source-level guard that walks every `await page.request.fetch(` call site and asserts a validateNavigationUrl call precedes it within the same branch. Regression trips before code review if a future refactor drops the gate.
2026-05-05 05:05:08 +02:00 · 2026-04-16 19:47:19 -03:00
parent de972b514f
commit b7cf46d6e6
2 changed files with 88 additions and 1 deletions
@@ -1188,7 +1188,16 @@ export async function handleWriteCommand(
        contentType = match[1];
        buffer = Buffer.from(match[2], 'base64');
      } else {
-        // Strategy 1: Direct URL via page.request.fetch()
+        // Strategy 1: Direct URL via page.request.fetch().
+        // Gate the URL through the same validator `goto` uses. Without
+        // this check, download + scrape bypass the navigation
+        // blocklist and a caller with write scope can read
+        // http://169.254.169.254/latest/meta-data/ (AWS IMDSv1), the
+        // GCP/Azure metadata equivalents, or any internal IPv4/IPv6
+        // the server happens to route to. The response body is then
+        // returned to the caller (base64) or written to disk where
+        // GET /file serves it back.
+        await validateNavigationUrl(url);
        const response = await page.request.fetch(url, { timeout: 30000 });
        const status = response.status();
        if (status >= 400) {
@@ -1286,6 +1295,10 @@ export async function handleWriteCommand(
      for (let i = 0; i < toDownload.length; i++) {
        const { url, type } = toDownload[i];
        try {
+          // Same gate as the download command — page.request.fetch
+          // must not reach cloud metadata, ULA ranges, or the rest of
+          // the blocklist. See url-validation.ts for the full list.
+          await validateNavigationUrl(url);
          const response = await page.request.fetch(url, { timeout: 30000 });
          if (response.status() >= 400) throw new Error(`HTTP ${response.status()}`);
          const ct = response.headers()['content-type'] || 'application/octet-stream';
@@ -221,3 +221,77 @@ describe('validateNavigationUrl — file:// URL-encoding', () => {
    ).rejects.toThrow(/encoded \/|Path must be within/i);
  });
 });
+
+// ---------------------------------------------------------------------------
+// download + scrape must gate page.request.fetch through validateNavigationUrl
+//
+// Regression: the `goto` command was correctly wired through
+// validateNavigationUrl, but the `download` and `scrape` commands
+// called page.request.fetch(url, ...) directly. A caller with the
+// default write scope could hit the /command endpoint and ask the
+// daemon to fetch http://169.254.169.254/latest/meta-data/ (AWS
+// IMDSv1) or the GCP/Azure/internal equivalents; the body comes back
+// as base64 or lands on disk where GET /file serves it.
+//
+// Source-level check: both page.request.fetch call sites must have a
+// validateNavigationUrl invocation immediately before them.
+// ---------------------------------------------------------------------------
+import { readFileSync } from 'fs';
+import { join } from 'path';
+
+describe('download + scrape SSRF gate', () => {
+  const WRITE_COMMANDS_SRC = readFileSync(
+    join(import.meta.dir, '..', 'src', 'write-commands.ts'),
+    'utf-8',
+  );
+
+  function callsitesOf(needle: string): number[] {
+    const idxs: number[] = [];
+    let at = 0;
+    while ((at = WRITE_COMMANDS_SRC.indexOf(needle, at)) !== -1) {
+      idxs.push(at);
+      at += needle.length;
+    }
+    return idxs;
+  }
+
+  it('every page.request.fetch sits under a preceding validateNavigationUrl', () => {
+    // Match the actual call site (`await page.request.fetch(`), not the
+    // token when it appears inside a code comment.
+    const fetches = callsitesOf('await page.request.fetch(');
+    expect(fetches.length).toBeGreaterThan(0);
+    for (const idx of fetches) {
+      // Look at the 400 chars preceding the call — the gate must live
+      // within the same branch / try block. 400 covers the comment +
+      // await invocation without letting an unrelated upstream gate
+      // pass as evidence.
+      const lead = WRITE_COMMANDS_SRC.slice(Math.max(0, idx - 400), idx);
+      expect(lead).toMatch(/validateNavigationUrl\s*\(/);
+    }
+  });
+
+  it('download command validates the URL before fetch', () => {
+    const block = WRITE_COMMANDS_SRC.slice(
+      WRITE_COMMANDS_SRC.indexOf("case 'download'"),
+      WRITE_COMMANDS_SRC.indexOf("case 'scrape'"),
+    );
+    const vIdx = block.indexOf('validateNavigationUrl');
+    const fIdx = block.indexOf('await page.request.fetch(');
+    expect(vIdx).toBeGreaterThan(-1);
+    expect(fIdx).toBeGreaterThan(-1);
+    expect(vIdx).toBeLessThan(fIdx);
+  });
+
+  it('scrape command validates each URL before fetch in the loop', () => {
+    const block = WRITE_COMMANDS_SRC.slice(
+      WRITE_COMMANDS_SRC.indexOf("case 'scrape'"),
+    );
+    // find the first actual `await page.request.fetch(` call site in scrape
+    // and the nearest preceding validateNavigationUrl
+    const fIdx = block.indexOf('await page.request.fetch(');
+    expect(fIdx).toBeGreaterThan(-1);
+    const preFetch = block.slice(0, fIdx);
+    const vIdx = preFetch.lastIndexOf('validateNavigationUrl');
+    expect(vIdx).toBeGreaterThan(-1);
+  });
+});