Merge remote-tracking branch 'origin/main' into garrytan/boston-v2

# Conflicts:
#	CHANGELOG.md
#	VERSION
#	browse/src/server.ts
#	package.json
This commit is contained in:
Garry Tan
2026-05-14 21:50:22 -07:00
25 changed files with 1250 additions and 18 deletions
@@ -0,0 +1,68 @@
// Unit test for buildCommandResponse — the exported response builder that
// sanitizes lone Unicode surrogates at the HTTP boundary (#1440, D7 + D13).
//
// The function is exported from server.ts specifically so we can test it
// without spinning up a Bun server. Codex flagged in D13 finding 14 that
// "mock cr.result" wasn't testable when handleCommand was the only entry
// point; this refactor solves that.
import { describe, expect, test } from 'bun:test';
import { buildCommandResponse } from '../src/server';
describe('buildCommandResponse', () => {
test('sanitizes lone surrogates in text/plain body', async () => {
const cr = { status: 200, result: `pre\uD800post`, json: false };
const res = buildCommandResponse(cr as any);
expect(res.headers.get('content-type')).toBe('text/plain');
expect(await res.text()).toBe(`prepost`);
});
test('sanitizes lone escape sequences in application/json body', async () => {
// cr.result is already JSON-stringified by handleCommand callers when
// cr.json=true. Surrogate escape sequences in the stringified form must
// be neutralized.
const cr = { status: 200, result: '{"name":"\\uD800"}', json: true };
const res = buildCommandResponse(cr as any);
expect(res.headers.get('content-type')).toBe('application/json');
expect(await res.text()).toBe('{"name":"\\uFFFD"}');
});
test('non-string cr.result passes through unchanged', async () => {
// Some commands return Buffers or other ArrayBuffer-shaped bodies (e.g.
// screenshots). Sanitizer must NOT touch them.
const buf = new Uint8Array([1, 2, 3, 4]);
const cr = { status: 200, result: buf, json: false };
const res = buildCommandResponse(cr as any);
// body returned verbatim; reading as array buffer should give same bytes
const out = new Uint8Array(await res.arrayBuffer());
expect(out.length).toBe(4);
expect(out[0]).toBe(1);
expect(out[3]).toBe(4);
});
test('clean text passes through unchanged', async () => {
const cr = { status: 200, result: 'Hello, world!', json: false };
const res = buildCommandResponse(cr as any);
expect(await res.text()).toBe('Hello, world!');
});
test('status code propagates', async () => {
const cr = { status: 404, result: 'Not found', json: false };
const res = buildCommandResponse(cr as any);
expect(res.status).toBe(404);
});
test('extra headers propagate', async () => {
const cr = { status: 200, result: 'ok', json: false, headers: { 'X-Custom': 'value' } };
const res = buildCommandResponse(cr as any);
expect(res.headers.get('x-custom')).toBe('value');
});
test('JSON error body with lone surrogate is sanitized', async () => {
// Errors set cr.json=true; a stringified error containing surrogates would
// still crash the API without this sanitization.
const cr = { status: 500, result: '{"error":"crash at \\uDC00 byte"}', json: true };
const res = buildCommandResponse(cr as any);
expect(await res.text()).toBe('{"error":"crash at \\uFFFD byte"}');
});
});
+11 -1
View File
@@ -584,7 +584,17 @@ describe('Envelope sentinel escape', () => {
test('scoped snapshot branch applies escapeEnvelopeSentinels to untrusted lines', () => {
const branchStart = SNAPSHOT_SRC.indexOf('splitForScoped');
expect(branchStart).toBeGreaterThan(-1);
const branchEnd = SNAPSHOT_SRC.indexOf("return output.join('\\n');", branchStart);
// Match either the original return (pre-#1440) or the surrogate-sanitized
// form (post-#1440) — both end the scoped branch.
const candidates = [
"return output.join('\\n');",
"return stripLoneSurrogates(output.join('\\n'));",
];
let branchEnd = -1;
for (const c of candidates) {
const idx = SNAPSHOT_SRC.indexOf(c, branchStart);
if (idx > branchStart) { branchEnd = idx; break; }
}
expect(branchEnd).toBeGreaterThan(branchStart);
const branch = SNAPSHOT_SRC.slice(branchStart, branchEnd);
// The escape helper must be invoked on the untrusted lines, and
+112
View File
@@ -0,0 +1,112 @@
// Unit tests for browse/src/sanitize.ts (#1440).
// Covers stripLoneSurrogates (raw UTF-16) and stripLoneSurrogateEscapes
// (\uXXXX escape text) used by the response chokepoints.
import { describe, expect, test } from 'bun:test';
import { stripLoneSurrogates, stripLoneSurrogateEscapes, sanitizeBody } from '../src/sanitize';
describe('stripLoneSurrogates', () => {
test('replaces lone high surrogate with U+FFFD', () => {
const lone = '\uD800x';
const out = stripLoneSurrogates(lone);
expect(out).toBe('x');
});
test('replaces lone low surrogate with U+FFFD', () => {
const lone = 'x\uDC00';
expect(stripLoneSurrogates(lone)).toBe('x');
});
test('leaves valid surrogate pairs (emoji) unchanged', () => {
const smiley = '😀'; // U+1F600 = 😀
expect(stripLoneSurrogates(smiley)).toBe(smiley);
});
test('empty string is unchanged', () => {
expect(stripLoneSurrogates('')).toBe('');
});
test('mixed valid + lone surrogates', () => {
const input = `a\uD800b😀c\uDC00d`;
const out = stripLoneSurrogates(input);
expect(out).toBe(`ab😀cd`);
});
test('clean text passes through unchanged', () => {
const text = 'The quick brown fox jumps over 13 lazy dogs.';
expect(stripLoneSurrogates(text)).toBe(text);
});
test('high surrogate immediately followed by high surrogate replaces both individually', () => {
const input = '\uD800\uD801'; // two lone highs in a row, neither paired
const out = stripLoneSurrogates(input);
expect(out).toBe('');
});
});
describe('stripLoneSurrogateEscapes', () => {
test('replaces lone high surrogate ESCAPE with \\uFFFD', () => {
const json = '{"name":"\\uD800"}';
expect(stripLoneSurrogateEscapes(json)).toBe('{"name":"\\uFFFD"}');
});
test('replaces lone low surrogate ESCAPE with \\uFFFD', () => {
const json = '{"name":"\\uDC00"}';
expect(stripLoneSurrogateEscapes(json)).toBe('{"name":"\\uFFFD"}');
});
test('leaves valid escape pair unchanged', () => {
// 😀 = 😀 — must NOT be touched
const json = '{"emoji":"\\uD83D\\uDE00"}';
expect(stripLoneSurrogateEscapes(json)).toBe(json);
});
test('mixed escape pairs and lone escapes', () => {
const json = '{"a":"\\uD800","b":"\\uD83D\\uDE00","c":"\\uDC00"}';
expect(stripLoneSurrogateEscapes(json)).toBe('{"a":"\\uFFFD","b":"\\uD83D\\uDE00","c":"\\uFFFD"}');
});
test('clean JSON passes through unchanged', () => {
const json = '{"results":[{"status":200,"command":"text"}]}';
expect(stripLoneSurrogateEscapes(json)).toBe(json);
});
test('case-insensitive matching: \\uD8aa works like \\uD8AA', () => {
expect(stripLoneSurrogateEscapes('\\uD8aa')).toBe('\\uFFFD');
});
});
describe('sanitizeBody', () => {
test('text/plain body: applies raw-surrogate strip only', () => {
const input = `pre\uD800post`;
expect(sanitizeBody(input, false)).toBe(`prepost`);
});
test('JSON body: applies both raw and escape passes', () => {
// Both raw and escape variants in the same body
const input = `{"raw":"\uD800","esc":"\\uD800"}`;
const out = sanitizeBody(input, true);
expect(out).toBe(`{"raw":"","esc":"\\uFFFD"}`);
});
test('clean text/plain body unchanged', () => {
const text = 'Hello world\nLine 2';
expect(sanitizeBody(text, false)).toBe(text);
});
test('clean JSON body unchanged', () => {
const json = '{"ok":true}';
expect(sanitizeBody(json, true)).toBe(json);
});
});
describe('perf smoke', () => {
test('1MB of clean text sanitizes in <500ms', () => {
const big = 'A'.repeat(1024 * 1024);
const start = performance.now();
const out = stripLoneSurrogates(big);
const elapsed = performance.now() - start;
expect(out.length).toBe(big.length);
expect(elapsed).toBeLessThan(500);
});
});