feat(browse): accept file:// in goto with smart cwd/home-relative parsing

Extends validateNavigationUrl to accept file:// URLs scoped to safe dirs
(cwd + TEMP_DIR) via the existing validateReadPath policy. The workhorse is a
new normalizeFileUrl() helper that handles non-standard relative forms BEFORE
the WHATWG URL parser sees them:

    file:///abs/path.html       → unchanged
    file://./docs/page.html     → file://<cwd>/docs/page.html
    file://~/Documents/page.html → file://<HOME>/Documents/page.html
    file://docs/page.html       → file://<cwd>/docs/page.html
    file://localhost/abs/path   → unchanged
    file://host.example.com/... → rejected (UNC/network)
    file:// and file:///        → rejected (would list a directory)

Host heuristic rejects segments with '.', ':', '\\', '%', IPv6 brackets, or
Windows drive-letter patterns — so file://docs.v1/page.html, file://127.0.0.1/x,
file://[::1]/x, and file://C:/Users/x are explicit errors.

Uses fileURLToPath() + pathToFileURL() from node:url (never string-concat) so
URL escapes like %20 decode correctly and Node rejects encoded-slash traversal
(%2F..%2F) outright.

Signature change: validateNavigationUrl now returns Promise<string> (the
normalized URL) instead of Promise<void>. Existing callers that ignore the
return value still compile — they just don't benefit from smart-parsing until
updated in follow-up commits. Callers will be migrated in the next few commits
(goto, diff, newTab, restoreState).

Rewrites the url-validation test file: updates existing tests for the new
return type, adds 20+ new tests covering every normalizeFileUrl shape variant,
URL-encoding edge cases, and path-traversal rejection.

References: codex consult v3 P1 findings on URL parser semantics and fileURLToPath.
This commit is contained in:
Garry Tan
2026-04-18 18:17:01 +08:00
parent 57e3ed174d
commit 5ca8ec799e
2 changed files with 264 additions and 17 deletions
+141 -3
View File
@@ -3,6 +3,11 @@
* Localhost and private IPs are allowed (primary use case: QA testing local dev servers).
*/
import { fileURLToPath, pathToFileURL } from 'node:url';
import * as path from 'node:path';
import * as os from 'node:os';
import { validateReadPath } from './path-security';
export const BLOCKED_METADATA_HOSTS = new Set([
'169.254.169.254', // AWS/GCP/Azure instance metadata
'fe80::1', // IPv6 link-local — common metadata endpoint alias
@@ -105,17 +110,148 @@ async function resolvesToBlockedIp(hostname: string): Promise<boolean> {
}
}
export async function validateNavigationUrl(url: string): Promise<void> {
/**
* Normalize non-standard file:// URLs into absolute form before the WHATWG URL parser
* sees them. Handles cwd-relative, home-relative, and bare-segment shapes that the
* standard parser would otherwise mis-interpret as hostnames.
*
* file:///abs/path.html → unchanged
* file://./<rel> → file://<cwd>/<rel>
* file://~/<rel> → file://<HOME>/<rel>
* file://<single-segment>/... → file://<cwd>/<single-segment>/... (cwd-relative)
* file://localhost/<abs> → unchanged
* file://<host-like>/... → unchanged (caller rejects via host heuristic)
*
* Rejects empty (file://) and root-only (file:///) URLs — these would silently
* trigger Chromium's directory listing, which is a different product surface.
*/
export function normalizeFileUrl(url: string): string {
if (!url.toLowerCase().startsWith('file:')) return url;
const rest = url.slice('file:'.length);
// file:/// or longer → standard absolute; pass through unchanged (caller validates path).
if (rest.startsWith('///')) {
// Reject bare root-only (file:/// with nothing after)
if (rest === '///' || rest === '////') {
throw new Error('Invalid file URL: file:/// has no path. Use file:///<absolute-path>.');
}
return url;
}
// Everything else: must start with // (we accept file://... only)
if (!rest.startsWith('//')) {
throw new Error(`Invalid file URL: ${url}. Use file:///<absolute-path> or file://./<rel> or file://~/<rel>.`);
}
const afterDoubleSlash = rest.slice(2);
// Reject empty (file://) and trailing-slash-only (file://./ listing cwd).
if (afterDoubleSlash === '') {
throw new Error('Invalid file URL: file:// is empty. Use file:///<absolute-path>.');
}
if (afterDoubleSlash === '.' || afterDoubleSlash === './') {
throw new Error('Invalid file URL: file://./ would list the current directory. Use file://./<filename> to render a specific file.');
}
if (afterDoubleSlash === '~' || afterDoubleSlash === '~/') {
throw new Error('Invalid file URL: file://~/ would list the home directory. Use file://~/<filename> to render a specific file.');
}
// Home-relative: file://~/<rel>
if (afterDoubleSlash.startsWith('~/')) {
const rel = afterDoubleSlash.slice(2);
const absPath = path.join(os.homedir(), rel);
return pathToFileURL(absPath).href;
}
// cwd-relative with explicit ./ : file://./<rel>
if (afterDoubleSlash.startsWith('./')) {
const rel = afterDoubleSlash.slice(2);
const absPath = path.resolve(process.cwd(), rel);
return pathToFileURL(absPath).href;
}
// localhost host explicitly allowed: file://localhost/<abs> (pass through to standard parser).
if (afterDoubleSlash.toLowerCase().startsWith('localhost/')) {
return url;
}
// Ambiguous: file://<segment>/<rest> — treat as cwd-relative ONLY if <segment> is a
// simple path name (no dots, no colons, no backslashes, no percent-encoding, no
// IPv6 brackets, no Windows drive letter pattern).
const firstSlash = afterDoubleSlash.indexOf('/');
const segment = firstSlash === -1 ? afterDoubleSlash : afterDoubleSlash.slice(0, firstSlash);
// Reject host-like segments: dotted names (docs.v1), IPs (127.0.0.1), IPv6 ([::1]),
// drive letters (C:), percent-encoded, or backslash paths.
const looksLikeHost = /[.:\\%]/.test(segment) || segment.startsWith('[');
if (looksLikeHost) {
throw new Error(
`Unsupported file URL host: ${segment}. Use file:///<absolute-path> for local files (network/UNC paths are not supported).`
);
}
// Simple-segment cwd-relative: file://docs/page.html → cwd/docs/page.html
const absPath = path.resolve(process.cwd(), afterDoubleSlash);
return pathToFileURL(absPath).href;
}
/**
* Validate a navigation URL and return a normalized version suitable for page.goto().
*
* Callers MUST use the return value — normalization of non-standard file:// forms
* only takes effect at the navigation site, not at the original URL.
*
* Callers (keep this list current, grep before removing):
* - write-commands.ts:goto
* - meta-commands.ts:diff (both URL args)
* - browser-manager.ts:newTab
* - browser-manager.ts:restoreState
*/
export async function validateNavigationUrl(url: string): Promise<string> {
// Normalize non-standard file:// shapes before the URL parser sees them.
let normalized = url;
if (url.toLowerCase().startsWith('file:')) {
normalized = normalizeFileUrl(url);
}
let parsed: URL;
try {
parsed = new URL(url);
parsed = new URL(normalized);
} catch {
throw new Error(`Invalid URL: ${url}`);
}
// file:// path: validate against safe-dirs and allow; otherwise defer to http(s) logic.
if (parsed.protocol === 'file:') {
// Reject non-empty non-localhost hosts (UNC / network paths).
if (parsed.host !== '' && parsed.host.toLowerCase() !== 'localhost') {
throw new Error(
`Unsupported file URL host: ${parsed.host}. Use file:///<absolute-path> for local files.`
);
}
// Convert URL → filesystem path with proper decoding (handles %20, %2F, etc.)
let fsPath: string;
try {
fsPath = fileURLToPath(parsed);
} catch (e: any) {
throw new Error(`Invalid file URL: ${url} (${e.message})`);
}
// Reject path traversal after decoding — e.g. file:///tmp/safe%2F..%2Fetc/passwd
// Note: fileURLToPath doesn't collapse .., so a literal '..' in the decoded path
// is suspicious. path.resolve will normalize it; check the result against safe dirs.
validateReadPath(fsPath);
// Return the canonical file:// URL derived from the filesystem path.
// This guarantees page.goto() gets a well-formed URL regardless of input shape.
return pathToFileURL(fsPath).href;
}
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
throw new Error(
`Blocked: scheme "${parsed.protocol}" is not allowed. Only http: and https: URLs are permitted.`
`Blocked: scheme "${parsed.protocol}" is not allowed. Only http:, https:, and file: URLs are permitted.`
);
}
@@ -137,4 +273,6 @@ export async function validateNavigationUrl(url: string): Promise<void> {
`Blocked: ${parsed.hostname} resolves to a cloud metadata IP. Possible DNS rebinding attack.`
);
}
return url;
}
+123 -14
View File
@@ -1,29 +1,50 @@
import { describe, it, expect } from 'bun:test';
import { validateNavigationUrl } from '../src/url-validation';
import { validateNavigationUrl, normalizeFileUrl } from '../src/url-validation';
import * as fs from 'fs';
import * as path from 'path';
import { TEMP_DIR } from '../src/platform';
describe('validateNavigationUrl', () => {
it('allows http URLs', async () => {
await expect(validateNavigationUrl('http://example.com')).resolves.toBeUndefined();
await expect(validateNavigationUrl('http://example.com')).resolves.toBe('http://example.com');
});
it('allows https URLs', async () => {
await expect(validateNavigationUrl('https://example.com/path?q=1')).resolves.toBeUndefined();
await expect(validateNavigationUrl('https://example.com/path?q=1')).resolves.toBe('https://example.com/path?q=1');
});
it('allows localhost', async () => {
await expect(validateNavigationUrl('http://localhost:3000')).resolves.toBeUndefined();
await expect(validateNavigationUrl('http://localhost:3000')).resolves.toBe('http://localhost:3000');
});
it('allows 127.0.0.1', async () => {
await expect(validateNavigationUrl('http://127.0.0.1:8080')).resolves.toBeUndefined();
await expect(validateNavigationUrl('http://127.0.0.1:8080')).resolves.toBe('http://127.0.0.1:8080');
});
it('allows private IPs', async () => {
await expect(validateNavigationUrl('http://192.168.1.1')).resolves.toBeUndefined();
await expect(validateNavigationUrl('http://192.168.1.1')).resolves.toBe('http://192.168.1.1');
});
it('blocks file:// scheme', async () => {
await expect(validateNavigationUrl('file:///etc/passwd')).rejects.toThrow(/scheme.*not allowed/i);
it('rejects file:// paths outside safe dirs (cwd + TEMP_DIR)', async () => {
// file:// is accepted as a scheme now, but safe-dirs policy blocks /etc/passwd.
await expect(validateNavigationUrl('file:///etc/passwd')).rejects.toThrow(/Path must be within/i);
});
it('accepts file:// for files under TEMP_DIR', async () => {
const tmpHtml = path.join(TEMP_DIR, `browse-test-${Date.now()}.html`);
fs.writeFileSync(tmpHtml, '<html><body>ok</body></html>');
try {
const result = await validateNavigationUrl(`file://${tmpHtml}`);
// Result should be a canonical file:// URL (pathToFileURL form)
expect(result.startsWith('file://')).toBe(true);
expect(result.toLowerCase()).toContain('browse-test-');
} finally {
fs.unlinkSync(tmpHtml);
}
});
it('rejects unsupported file URL host (UNC/network paths)', async () => {
await expect(validateNavigationUrl('file://host.example.com/foo.html')).rejects.toThrow(/Unsupported file URL host/i);
});
it('blocks javascript: scheme', async () => {
@@ -79,11 +100,11 @@ describe('validateNavigationUrl', () => {
});
it('does not block hostnames starting with fd (e.g. fd.example.com)', async () => {
await expect(validateNavigationUrl('https://fd.example.com/')).resolves.toBeUndefined();
await expect(validateNavigationUrl('https://fd.example.com/')).resolves.toBe('https://fd.example.com/');
});
it('does not block hostnames starting with fc (e.g. fcustomer.com)', async () => {
await expect(validateNavigationUrl('https://fcustomer.com/')).resolves.toBeUndefined();
await expect(validateNavigationUrl('https://fcustomer.com/')).resolves.toBe('https://fcustomer.com/');
});
it('throws on malformed URLs', async () => {
@@ -92,8 +113,8 @@ describe('validateNavigationUrl', () => {
});
describe('validateNavigationUrl — restoreState coverage', () => {
it('blocks file:// URLs that could appear in saved state', async () => {
await expect(validateNavigationUrl('file:///etc/passwd')).rejects.toThrow(/scheme.*not allowed/i);
it('blocks file:// URLs outside safe dirs that could appear in saved state', async () => {
await expect(validateNavigationUrl('file:///etc/passwd')).rejects.toThrow(/Path must be within/i);
});
it('blocks chrome:// URLs that could appear in saved state', async () => {
@@ -105,10 +126,98 @@ describe('validateNavigationUrl — restoreState coverage', () => {
});
it('allows normal https URLs from saved state', async () => {
await expect(validateNavigationUrl('https://example.com/page')).resolves.toBeUndefined();
await expect(validateNavigationUrl('https://example.com/page')).resolves.toBe('https://example.com/page');
});
it('allows localhost URLs from saved state', async () => {
await expect(validateNavigationUrl('http://localhost:3000/app')).resolves.toBeUndefined();
await expect(validateNavigationUrl('http://localhost:3000/app')).resolves.toBe('http://localhost:3000/app');
});
});
describe('normalizeFileUrl', () => {
const cwd = process.cwd();
it('passes through absolute file:/// URLs unchanged', () => {
expect(normalizeFileUrl('file:///tmp/page.html')).toBe('file:///tmp/page.html');
});
it('expands file://./<rel> to absolute file://<cwd>/<rel>', () => {
const result = normalizeFileUrl('file://./docs/page.html');
expect(result.startsWith('file://')).toBe(true);
expect(result).toContain(cwd.replace(/\\/g, '/'));
expect(result.endsWith('/docs/page.html')).toBe(true);
});
it('expands file://~/<rel> to absolute file://<homedir>/<rel>', () => {
const result = normalizeFileUrl('file://~/Documents/page.html');
expect(result.startsWith('file://')).toBe(true);
expect(result.endsWith('/Documents/page.html')).toBe(true);
});
it('expands file://<simple-segment>/<rest> to cwd-relative', () => {
const result = normalizeFileUrl('file://docs/page.html');
expect(result.startsWith('file://')).toBe(true);
expect(result).toContain(cwd.replace(/\\/g, '/'));
expect(result.endsWith('/docs/page.html')).toBe(true);
});
it('passes through file://localhost/<abs> unchanged', () => {
expect(normalizeFileUrl('file://localhost/tmp/page.html')).toBe('file://localhost/tmp/page.html');
});
it('rejects empty file:// URL', () => {
expect(() => normalizeFileUrl('file://')).toThrow(/is empty/i);
});
it('rejects file:/// with no path', () => {
expect(() => normalizeFileUrl('file:///')).toThrow(/no path/i);
});
it('rejects file://./ (directory listing)', () => {
expect(() => normalizeFileUrl('file://./')).toThrow(/current directory/i);
});
it('rejects dotted host-like segment file://docs.v1/page.html', () => {
expect(() => normalizeFileUrl('file://docs.v1/page.html')).toThrow(/Unsupported file URL host/i);
});
it('rejects IP-like host file://127.0.0.1/foo', () => {
expect(() => normalizeFileUrl('file://127.0.0.1/tmp/x')).toThrow(/Unsupported file URL host/i);
});
it('rejects IPv6 host file://[::1]/foo', () => {
expect(() => normalizeFileUrl('file://[::1]/tmp/x')).toThrow(/Unsupported file URL host/i);
});
it('rejects Windows drive letter file://C:/Users/x', () => {
expect(() => normalizeFileUrl('file://C:/Users/x')).toThrow(/Unsupported file URL host/i);
});
it('passes through non-file URLs', () => {
expect(normalizeFileUrl('https://example.com')).toBe('https://example.com');
});
});
describe('validateNavigationUrl — file:// URL-encoding', () => {
it('decodes %20 via fileURLToPath (space in filename)', async () => {
const tmpHtml = path.join(TEMP_DIR, `hello world ${Date.now()}.html`);
fs.writeFileSync(tmpHtml, '<html>ok</html>');
try {
// Build an escaped file:// URL and verify it validates against the actual path
const encodedPath = tmpHtml.split('/').map(encodeURIComponent).join('/');
const url = `file://${encodedPath}`;
const result = await validateNavigationUrl(url);
expect(result.startsWith('file://')).toBe(true);
} finally {
fs.unlinkSync(tmpHtml);
}
});
it('rejects path traversal via encoded slash (file:///tmp/safe%2F..%2Fetc/passwd)', async () => {
// Node's fileURLToPath rejects encoded slashes outright with a clear error.
// Either "encoded /" rejection OR "Path must be within" safe-dirs rejection is acceptable.
await expect(
validateNavigationUrl('file:///tmp/safe%2F..%2Fetc/passwd')
).rejects.toThrow(/encoded \/|Path must be within/i);
});
});