diff --git a/browse/src/sidebar-agent.ts b/browse/src/sidebar-agent.ts index d6838fac..9b7447c0 100644 --- a/browse/src/sidebar-agent.ts +++ b/browse/src/sidebar-agent.ts @@ -624,22 +624,26 @@ async function askClaude(queueEntry: QueueEntry): Promise { scan: async (toolName: string, text: string) => { if (toolResultBlockFired) return; // Parallel L4 + L4c ensemble scan (DeBERTa no-op when disabled). - const [contentSignal, debertaSignal] = await Promise.all([ + // We run L4/L4c AND Haiku in parallel on tool outputs regardless of + // L4's score, because BrowseSafe-Bench shows L4 (TestSavantAI) has + // low recall on browser-agent-specific attacks (~15% at v1). Gating + // Haiku on L4 meant our best signal almost never ran. The cost is + // ~$0.002 + ~300ms per tool output, bounded by the Haiku timeout + // and offset by Haiku actually seeing the real attack context. + // + // Haiku only runs when the Claude CLI is available (checkHaikuAvailable + // caches the probe). In environments without it, the call returns a + // degraded signal and the verdict falls back to L4 alone. + const [contentSignal, debertaSignal, transcriptSignal] = await Promise.all([ scanPageContent(text), scanPageContentDeberta(text), - ]); - // Short-circuit if neither content layer crossed WARN — no point - // spinning up Haiku for a clean scan. - const maxContent = Math.max(contentSignal.confidence, debertaSignal.confidence); - if (maxContent < THRESHOLDS.WARN) return; - const signals: LayerSignal[] = [contentSignal, debertaSignal]; - if (shouldRunTranscriptCheck(signals)) { - signals.push(await checkTranscript({ + checkTranscript({ user_message: queueEntry.message ?? '', tool_calls: [{ tool_name: toolName, tool_input: {} }], tool_output: text, - })); - } + }), + ]); + const signals: LayerSignal[] = [contentSignal, debertaSignal, transcriptSignal]; const result = combineVerdict(signals, { toolOutput: true }); if (result.verdict !== 'block') return; toolResultBlockFired = true; diff --git a/browse/test/security-source-contracts.test.ts b/browse/test/security-source-contracts.test.ts index 97d933fc..2811c3f4 100644 --- a/browse/test/security-source-contracts.test.ts +++ b/browse/test/security-source-contracts.test.ts @@ -116,8 +116,14 @@ describe('askClaude — pre-spawn + tool-result defense wiring', () => { expect(AGENT_SRC).toContain('}, 2000);'); }); - test('tool-result scan short-circuits when both content layers below WARN', () => { - expect(AGENT_SRC).toMatch(/maxContent < THRESHOLDS\.WARN/); + test('tool-result scan runs all three classifiers in parallel (no L4 gate)', () => { + // Regression guard for the Haiku-always change. Previously the scan + // short-circuited when L4/L4c both returned below WARN, which meant + // Haiku (our best signal per BrowseSafe-Bench) rarely ran. Now we run + // all three in parallel and let combineVerdict decide. + expect(AGENT_SRC).toMatch(/scanPageContent\(text\),[\s\S]*scanPageContentDeberta\(text\),[\s\S]*checkTranscript\(/); + // The old short-circuit must be gone. + expect(AGENT_SRC).not.toMatch(/if \(maxContent < THRESHOLDS\.WARN\) return;/); }); test('onCanaryLeaked fires both security_event and agent_error for legacy clients', () => {