From 0098d574e68397141d072551f475ca587b99e8e7 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 20 Apr 2026 04:42:20 +0800 Subject: [PATCH] test(security): assert tool-result ML scan surface (Read/Glob/Grep ingress) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 4 new assertions in sidebar-security.test.ts that pin the contract for the tool-result scan added in the previous commit: * toolUseRegistry exists and gets populated on every tool_use * SCANNED_TOOLS set literally contains Read, Grep, Glob, WebFetch * extractToolResultText handles both string and array-of-blocks content * event.type === 'user' + block.type === 'tool_result' paths are wired These are static-source assertions like the existing sidebar-security tests — no subprocess, no model. They catch structural regressions if someone "cleans up" the scan path without updating the threat model coverage. sidebar-security.test.ts now 16 tests / 42 expect calls. Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/test/sidebar-security.test.ts | 38 ++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/browse/test/sidebar-security.test.ts b/browse/test/sidebar-security.test.ts index 329c5989..2f8338a1 100644 --- a/browse/test/sidebar-security.test.ts +++ b/browse/test/sidebar-security.test.ts @@ -122,4 +122,42 @@ describe('Sidebar prompt injection defense', () => { // Backward compatibility: if old queue entries lack args, use defaults expect(AGENT_SRC).toContain("'--allowedTools', 'Bash,Read,Glob,Grep,Write'"); }); + + // --- Tool-result ML scan (Read/Glob/Grep ingress coverage) --- + + test('sidebar-agent registers tool_use IDs for later correlation', () => { + // Tool results arrive in user-role messages with tool_use_id pointing + // back to the original tool_use block. We need a registry to know which + // tool produced the content we're scanning. + expect(AGENT_SRC).toContain('toolUseRegistry'); + expect(AGENT_SRC).toContain('toolUseRegistry.set'); + }); + + test('sidebar-agent scans Read/Glob/Grep/WebFetch tool outputs', () => { + // Codex review gap: untrusted content read via these tools enters + // Claude's context without passing through content-security.ts. + // Verify the SCANNED_TOOLS set includes each. + const scannedToolsMatch = AGENT_SRC.match(/SCANNED_TOOLS = new Set\(\[([^\]]+)\]\)/); + expect(scannedToolsMatch).toBeTruthy(); + const toolList = scannedToolsMatch![1]; + expect(toolList).toContain("'Read'"); + expect(toolList).toContain("'Grep'"); + expect(toolList).toContain("'Glob'"); + expect(toolList).toContain("'WebFetch'"); + }); + + test('sidebar-agent extracts text from tool_result content (string or blocks)', () => { + // Content can be a string OR an array of content blocks (text, image). + // Only text blocks matter for injection detection. + expect(AGENT_SRC).toContain('extractToolResultText'); + expect(AGENT_SRC).toContain('typeof content === \'string\''); + expect(AGENT_SRC).toContain('b.type === \'text\''); + }); + + test('sidebar-agent handles user-role messages for tool_result events', () => { + // Tool results come in user-role messages. Without this handler the + // entire ingress gap stays open. + expect(AGENT_SRC).toContain("event.type === 'user'"); + expect(AGENT_SRC).toContain("block.type === 'tool_result'"); + }); });