From 5ca8ec799e85a18e9d536d707fb2e2f734fd0009 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sat, 18 Apr 2026 18:17:01 +0800 Subject: [PATCH] feat(browse): accept file:// in goto with smart cwd/home-relative parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends validateNavigationUrl to accept file:// URLs scoped to safe dirs (cwd + TEMP_DIR) via the existing validateReadPath policy. The workhorse is a new normalizeFileUrl() helper that handles non-standard relative forms BEFORE the WHATWG URL parser sees them: file:///abs/path.html → unchanged file://./docs/page.html → file:///docs/page.html file://~/Documents/page.html → file:///Documents/page.html file://docs/page.html → file:///docs/page.html file://localhost/abs/path → unchanged file://host.example.com/... → rejected (UNC/network) file:// and file:/// → rejected (would list a directory) Host heuristic rejects segments with '.', ':', '\\', '%', IPv6 brackets, or Windows drive-letter patterns — so file://docs.v1/page.html, file://127.0.0.1/x, file://[::1]/x, and file://C:/Users/x are explicit errors. Uses fileURLToPath() + pathToFileURL() from node:url (never string-concat) so URL escapes like %20 decode correctly and Node rejects encoded-slash traversal (%2F..%2F) outright. Signature change: validateNavigationUrl now returns Promise (the normalized URL) instead of Promise. Existing callers that ignore the return value still compile — they just don't benefit from smart-parsing until updated in follow-up commits. Callers will be migrated in the next few commits (goto, diff, newTab, restoreState). Rewrites the url-validation test file: updates existing tests for the new return type, adds 20+ new tests covering every normalizeFileUrl shape variant, URL-encoding edge cases, and path-traversal rejection. References: codex consult v3 P1 findings on URL parser semantics and fileURLToPath. --- browse/src/url-validation.ts | 144 ++++++++++++++++++++++++++++- browse/test/url-validation.test.ts | 137 ++++++++++++++++++++++++--- 2 files changed, 264 insertions(+), 17 deletions(-) diff --git a/browse/src/url-validation.ts b/browse/src/url-validation.ts index ddac0d5a..18c01fdc 100644 --- a/browse/src/url-validation.ts +++ b/browse/src/url-validation.ts @@ -3,6 +3,11 @@ * Localhost and private IPs are allowed (primary use case: QA testing local dev servers). */ +import { fileURLToPath, pathToFileURL } from 'node:url'; +import * as path from 'node:path'; +import * as os from 'node:os'; +import { validateReadPath } from './path-security'; + export const BLOCKED_METADATA_HOSTS = new Set([ '169.254.169.254', // AWS/GCP/Azure instance metadata 'fe80::1', // IPv6 link-local — common metadata endpoint alias @@ -105,17 +110,148 @@ async function resolvesToBlockedIp(hostname: string): Promise { } } -export async function validateNavigationUrl(url: string): Promise { +/** + * Normalize non-standard file:// URLs into absolute form before the WHATWG URL parser + * sees them. Handles cwd-relative, home-relative, and bare-segment shapes that the + * standard parser would otherwise mis-interpret as hostnames. + * + * file:///abs/path.html → unchanged + * file://./ → file:/// + * file://~/ → file:/// + * file:///... → file:////... (cwd-relative) + * file://localhost/ → unchanged + * file:///... → unchanged (caller rejects via host heuristic) + * + * Rejects empty (file://) and root-only (file:///) URLs — these would silently + * trigger Chromium's directory listing, which is a different product surface. + */ +export function normalizeFileUrl(url: string): string { + if (!url.toLowerCase().startsWith('file:')) return url; + + const rest = url.slice('file:'.length); + + // file:/// or longer → standard absolute; pass through unchanged (caller validates path). + if (rest.startsWith('///')) { + // Reject bare root-only (file:/// with nothing after) + if (rest === '///' || rest === '////') { + throw new Error('Invalid file URL: file:/// has no path. Use file:///.'); + } + return url; + } + + // Everything else: must start with // (we accept file://... only) + if (!rest.startsWith('//')) { + throw new Error(`Invalid file URL: ${url}. Use file:/// or file://./ or file://~/.`); + } + + const afterDoubleSlash = rest.slice(2); + + // Reject empty (file://) and trailing-slash-only (file://./ listing cwd). + if (afterDoubleSlash === '') { + throw new Error('Invalid file URL: file:// is empty. Use file:///.'); + } + if (afterDoubleSlash === '.' || afterDoubleSlash === './') { + throw new Error('Invalid file URL: file://./ would list the current directory. Use file://./ to render a specific file.'); + } + if (afterDoubleSlash === '~' || afterDoubleSlash === '~/') { + throw new Error('Invalid file URL: file://~/ would list the home directory. Use file://~/ to render a specific file.'); + } + + // Home-relative: file://~/ + if (afterDoubleSlash.startsWith('~/')) { + const rel = afterDoubleSlash.slice(2); + const absPath = path.join(os.homedir(), rel); + return pathToFileURL(absPath).href; + } + + // cwd-relative with explicit ./ : file://./ + if (afterDoubleSlash.startsWith('./')) { + const rel = afterDoubleSlash.slice(2); + const absPath = path.resolve(process.cwd(), rel); + return pathToFileURL(absPath).href; + } + + // localhost host explicitly allowed: file://localhost/ (pass through to standard parser). + if (afterDoubleSlash.toLowerCase().startsWith('localhost/')) { + return url; + } + + // Ambiguous: file:/// — treat as cwd-relative ONLY if is a + // simple path name (no dots, no colons, no backslashes, no percent-encoding, no + // IPv6 brackets, no Windows drive letter pattern). + const firstSlash = afterDoubleSlash.indexOf('/'); + const segment = firstSlash === -1 ? afterDoubleSlash : afterDoubleSlash.slice(0, firstSlash); + + // Reject host-like segments: dotted names (docs.v1), IPs (127.0.0.1), IPv6 ([::1]), + // drive letters (C:), percent-encoded, or backslash paths. + const looksLikeHost = /[.:\\%]/.test(segment) || segment.startsWith('['); + if (looksLikeHost) { + throw new Error( + `Unsupported file URL host: ${segment}. Use file:/// for local files (network/UNC paths are not supported).` + ); + } + + // Simple-segment cwd-relative: file://docs/page.html → cwd/docs/page.html + const absPath = path.resolve(process.cwd(), afterDoubleSlash); + return pathToFileURL(absPath).href; +} + +/** + * Validate a navigation URL and return a normalized version suitable for page.goto(). + * + * Callers MUST use the return value — normalization of non-standard file:// forms + * only takes effect at the navigation site, not at the original URL. + * + * Callers (keep this list current, grep before removing): + * - write-commands.ts:goto + * - meta-commands.ts:diff (both URL args) + * - browser-manager.ts:newTab + * - browser-manager.ts:restoreState + */ +export async function validateNavigationUrl(url: string): Promise { + // Normalize non-standard file:// shapes before the URL parser sees them. + let normalized = url; + if (url.toLowerCase().startsWith('file:')) { + normalized = normalizeFileUrl(url); + } + let parsed: URL; try { - parsed = new URL(url); + parsed = new URL(normalized); } catch { throw new Error(`Invalid URL: ${url}`); } + // file:// path: validate against safe-dirs and allow; otherwise defer to http(s) logic. + if (parsed.protocol === 'file:') { + // Reject non-empty non-localhost hosts (UNC / network paths). + if (parsed.host !== '' && parsed.host.toLowerCase() !== 'localhost') { + throw new Error( + `Unsupported file URL host: ${parsed.host}. Use file:/// for local files.` + ); + } + + // Convert URL → filesystem path with proper decoding (handles %20, %2F, etc.) + let fsPath: string; + try { + fsPath = fileURLToPath(parsed); + } catch (e: any) { + throw new Error(`Invalid file URL: ${url} (${e.message})`); + } + + // Reject path traversal after decoding — e.g. file:///tmp/safe%2F..%2Fetc/passwd + // Note: fileURLToPath doesn't collapse .., so a literal '..' in the decoded path + // is suspicious. path.resolve will normalize it; check the result against safe dirs. + validateReadPath(fsPath); + + // Return the canonical file:// URL derived from the filesystem path. + // This guarantees page.goto() gets a well-formed URL regardless of input shape. + return pathToFileURL(fsPath).href; + } + if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') { throw new Error( - `Blocked: scheme "${parsed.protocol}" is not allowed. Only http: and https: URLs are permitted.` + `Blocked: scheme "${parsed.protocol}" is not allowed. Only http:, https:, and file: URLs are permitted.` ); } @@ -137,4 +273,6 @@ export async function validateNavigationUrl(url: string): Promise { `Blocked: ${parsed.hostname} resolves to a cloud metadata IP. Possible DNS rebinding attack.` ); } + + return url; } diff --git a/browse/test/url-validation.test.ts b/browse/test/url-validation.test.ts index f6e52175..cdeb2b05 100644 --- a/browse/test/url-validation.test.ts +++ b/browse/test/url-validation.test.ts @@ -1,29 +1,50 @@ import { describe, it, expect } from 'bun:test'; -import { validateNavigationUrl } from '../src/url-validation'; +import { validateNavigationUrl, normalizeFileUrl } from '../src/url-validation'; +import * as fs from 'fs'; +import * as path from 'path'; +import { TEMP_DIR } from '../src/platform'; describe('validateNavigationUrl', () => { it('allows http URLs', async () => { - await expect(validateNavigationUrl('http://example.com')).resolves.toBeUndefined(); + await expect(validateNavigationUrl('http://example.com')).resolves.toBe('http://example.com'); }); it('allows https URLs', async () => { - await expect(validateNavigationUrl('https://example.com/path?q=1')).resolves.toBeUndefined(); + await expect(validateNavigationUrl('https://example.com/path?q=1')).resolves.toBe('https://example.com/path?q=1'); }); it('allows localhost', async () => { - await expect(validateNavigationUrl('http://localhost:3000')).resolves.toBeUndefined(); + await expect(validateNavigationUrl('http://localhost:3000')).resolves.toBe('http://localhost:3000'); }); it('allows 127.0.0.1', async () => { - await expect(validateNavigationUrl('http://127.0.0.1:8080')).resolves.toBeUndefined(); + await expect(validateNavigationUrl('http://127.0.0.1:8080')).resolves.toBe('http://127.0.0.1:8080'); }); it('allows private IPs', async () => { - await expect(validateNavigationUrl('http://192.168.1.1')).resolves.toBeUndefined(); + await expect(validateNavigationUrl('http://192.168.1.1')).resolves.toBe('http://192.168.1.1'); }); - it('blocks file:// scheme', async () => { - await expect(validateNavigationUrl('file:///etc/passwd')).rejects.toThrow(/scheme.*not allowed/i); + it('rejects file:// paths outside safe dirs (cwd + TEMP_DIR)', async () => { + // file:// is accepted as a scheme now, but safe-dirs policy blocks /etc/passwd. + await expect(validateNavigationUrl('file:///etc/passwd')).rejects.toThrow(/Path must be within/i); + }); + + it('accepts file:// for files under TEMP_DIR', async () => { + const tmpHtml = path.join(TEMP_DIR, `browse-test-${Date.now()}.html`); + fs.writeFileSync(tmpHtml, 'ok'); + try { + const result = await validateNavigationUrl(`file://${tmpHtml}`); + // Result should be a canonical file:// URL (pathToFileURL form) + expect(result.startsWith('file://')).toBe(true); + expect(result.toLowerCase()).toContain('browse-test-'); + } finally { + fs.unlinkSync(tmpHtml); + } + }); + + it('rejects unsupported file URL host (UNC/network paths)', async () => { + await expect(validateNavigationUrl('file://host.example.com/foo.html')).rejects.toThrow(/Unsupported file URL host/i); }); it('blocks javascript: scheme', async () => { @@ -79,11 +100,11 @@ describe('validateNavigationUrl', () => { }); it('does not block hostnames starting with fd (e.g. fd.example.com)', async () => { - await expect(validateNavigationUrl('https://fd.example.com/')).resolves.toBeUndefined(); + await expect(validateNavigationUrl('https://fd.example.com/')).resolves.toBe('https://fd.example.com/'); }); it('does not block hostnames starting with fc (e.g. fcustomer.com)', async () => { - await expect(validateNavigationUrl('https://fcustomer.com/')).resolves.toBeUndefined(); + await expect(validateNavigationUrl('https://fcustomer.com/')).resolves.toBe('https://fcustomer.com/'); }); it('throws on malformed URLs', async () => { @@ -92,8 +113,8 @@ describe('validateNavigationUrl', () => { }); describe('validateNavigationUrl — restoreState coverage', () => { - it('blocks file:// URLs that could appear in saved state', async () => { - await expect(validateNavigationUrl('file:///etc/passwd')).rejects.toThrow(/scheme.*not allowed/i); + it('blocks file:// URLs outside safe dirs that could appear in saved state', async () => { + await expect(validateNavigationUrl('file:///etc/passwd')).rejects.toThrow(/Path must be within/i); }); it('blocks chrome:// URLs that could appear in saved state', async () => { @@ -105,10 +126,98 @@ describe('validateNavigationUrl — restoreState coverage', () => { }); it('allows normal https URLs from saved state', async () => { - await expect(validateNavigationUrl('https://example.com/page')).resolves.toBeUndefined(); + await expect(validateNavigationUrl('https://example.com/page')).resolves.toBe('https://example.com/page'); }); it('allows localhost URLs from saved state', async () => { - await expect(validateNavigationUrl('http://localhost:3000/app')).resolves.toBeUndefined(); + await expect(validateNavigationUrl('http://localhost:3000/app')).resolves.toBe('http://localhost:3000/app'); + }); +}); + +describe('normalizeFileUrl', () => { + const cwd = process.cwd(); + + it('passes through absolute file:/// URLs unchanged', () => { + expect(normalizeFileUrl('file:///tmp/page.html')).toBe('file:///tmp/page.html'); + }); + + it('expands file://./ to absolute file:///', () => { + const result = normalizeFileUrl('file://./docs/page.html'); + expect(result.startsWith('file://')).toBe(true); + expect(result).toContain(cwd.replace(/\\/g, '/')); + expect(result.endsWith('/docs/page.html')).toBe(true); + }); + + it('expands file://~/ to absolute file:///', () => { + const result = normalizeFileUrl('file://~/Documents/page.html'); + expect(result.startsWith('file://')).toBe(true); + expect(result.endsWith('/Documents/page.html')).toBe(true); + }); + + it('expands file:/// to cwd-relative', () => { + const result = normalizeFileUrl('file://docs/page.html'); + expect(result.startsWith('file://')).toBe(true); + expect(result).toContain(cwd.replace(/\\/g, '/')); + expect(result.endsWith('/docs/page.html')).toBe(true); + }); + + it('passes through file://localhost/ unchanged', () => { + expect(normalizeFileUrl('file://localhost/tmp/page.html')).toBe('file://localhost/tmp/page.html'); + }); + + it('rejects empty file:// URL', () => { + expect(() => normalizeFileUrl('file://')).toThrow(/is empty/i); + }); + + it('rejects file:/// with no path', () => { + expect(() => normalizeFileUrl('file:///')).toThrow(/no path/i); + }); + + it('rejects file://./ (directory listing)', () => { + expect(() => normalizeFileUrl('file://./')).toThrow(/current directory/i); + }); + + it('rejects dotted host-like segment file://docs.v1/page.html', () => { + expect(() => normalizeFileUrl('file://docs.v1/page.html')).toThrow(/Unsupported file URL host/i); + }); + + it('rejects IP-like host file://127.0.0.1/foo', () => { + expect(() => normalizeFileUrl('file://127.0.0.1/tmp/x')).toThrow(/Unsupported file URL host/i); + }); + + it('rejects IPv6 host file://[::1]/foo', () => { + expect(() => normalizeFileUrl('file://[::1]/tmp/x')).toThrow(/Unsupported file URL host/i); + }); + + it('rejects Windows drive letter file://C:/Users/x', () => { + expect(() => normalizeFileUrl('file://C:/Users/x')).toThrow(/Unsupported file URL host/i); + }); + + it('passes through non-file URLs', () => { + expect(normalizeFileUrl('https://example.com')).toBe('https://example.com'); + }); +}); + +describe('validateNavigationUrl — file:// URL-encoding', () => { + it('decodes %20 via fileURLToPath (space in filename)', async () => { + const tmpHtml = path.join(TEMP_DIR, `hello world ${Date.now()}.html`); + fs.writeFileSync(tmpHtml, 'ok'); + try { + // Build an escaped file:// URL and verify it validates against the actual path + const encodedPath = tmpHtml.split('/').map(encodeURIComponent).join('/'); + const url = `file://${encodedPath}`; + const result = await validateNavigationUrl(url); + expect(result.startsWith('file://')).toBe(true); + } finally { + fs.unlinkSync(tmpHtml); + } + }); + + it('rejects path traversal via encoded slash (file:///tmp/safe%2F..%2Fetc/passwd)', async () => { + // Node's fileURLToPath rejects encoded slashes outright with a clear error. + // Either "encoded /" rejection OR "Path must be within" safe-dirs rejection is acceptable. + await expect( + validateNavigationUrl('file:///tmp/safe%2F..%2Fetc/passwd') + ).rejects.toThrow(/encoded \/|Path must be within/i); }); });