diff --git a/browse/src/server.ts b/browse/src/server.ts index 3a825c1e..e3f24fa0 100644 --- a/browse/src/server.ts +++ b/browse/src/server.ts @@ -25,6 +25,7 @@ import { runContentFilters, type ContentFilterResult, markHiddenElements, getCleanTextWithStripping, cleanupHiddenMarkers, } from './content-security'; +import { generateCanary, injectCanary } from './security'; import { handleSnapshot, SNAPSHOT_FLAGS } from './snapshot'; import { initRegistry, validateToken as validateScopedToken, checkScope, checkDomain, @@ -551,7 +552,13 @@ function spawnClaude(userMessage: string, extensionUrl?: string | null, forTabId const escapeXml = (s: string) => s.replace(/&/g, '&').replace(//g, '>'); const escapedMessage = escapeXml(userMessage); - const systemPrompt = [ + // Fresh canary per message. The sidebar-agent checks every outbound channel + // (stream text, tool_use arguments, URLs, file writes) for this token. + // If Claude echoes it anywhere, that's evidence a prompt injection overrode + // the system prompt — session is killed, user sees the banner. + const canary = generateCanary(); + + const baseSystemPrompt = [ '', `Browser co-pilot. Binary: ${B}`, 'Run `' + B + ' url` first to check the actual page. NEVER assume the URL.', @@ -576,6 +583,10 @@ function spawnClaude(userMessage: string, extensionUrl?: string | null, forTabId '', ].join('\n'); + // Append the canary instruction. injectCanary() tells Claude never to + // output the token on any channel. + const systemPrompt = injectCanary(baseSystemPrompt, canary); + const prompt = `${systemPrompt}\n\n\n${escapedMessage}\n`; // Never resume — each message is a fresh context. Resuming carries stale // page URLs and old navigation state that makes the agent fight the user. @@ -607,6 +618,7 @@ function spawnClaude(userMessage: string, extensionUrl?: string | null, forTabId sessionId: sidebarSession?.claudeSessionId || null, pageUrl: pageUrl, tabId: agentTabId, + canary, // sidebar-agent scans all outbound channels for this token }); try { fs.mkdirSync(gstackDir, { recursive: true, mode: 0o700 });