mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 03:35:09 +02:00
feat(security): wire canary injection into sidebar spawnClaude
Every sidebar message now gets a fresh CANARY-XXXXXXXXXXXX token embedded in the system prompt with an instruction for Claude to never output it on any channel. The token flows through the queue entry so sidebar-agent.ts can check every outbound operation for leaks. If Claude echoes the canary into any outbound channel (text stream, tool arguments, URLs, file write paths), the sidebar-agent terminates the session and the user sees the approved canary leak banner. This operation is pure string manipulation — safe in the compiled browse binary. The actual output-stream check (which also has to be safe in compiled contexts) lives in sidebar-agent.ts (next commit). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+13
-1
@@ -25,6 +25,7 @@ import {
|
||||
runContentFilters, type ContentFilterResult,
|
||||
markHiddenElements, getCleanTextWithStripping, cleanupHiddenMarkers,
|
||||
} from './content-security';
|
||||
import { generateCanary, injectCanary } from './security';
|
||||
import { handleSnapshot, SNAPSHOT_FLAGS } from './snapshot';
|
||||
import {
|
||||
initRegistry, validateToken as validateScopedToken, checkScope, checkDomain,
|
||||
@@ -551,7 +552,13 @@ function spawnClaude(userMessage: string, extensionUrl?: string | null, forTabId
|
||||
const escapeXml = (s: string) => s.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
||||
const escapedMessage = escapeXml(userMessage);
|
||||
|
||||
const systemPrompt = [
|
||||
// Fresh canary per message. The sidebar-agent checks every outbound channel
|
||||
// (stream text, tool_use arguments, URLs, file writes) for this token.
|
||||
// If Claude echoes it anywhere, that's evidence a prompt injection overrode
|
||||
// the system prompt — session is killed, user sees the banner.
|
||||
const canary = generateCanary();
|
||||
|
||||
const baseSystemPrompt = [
|
||||
'<system>',
|
||||
`Browser co-pilot. Binary: ${B}`,
|
||||
'Run `' + B + ' url` first to check the actual page. NEVER assume the URL.',
|
||||
@@ -576,6 +583,10 @@ function spawnClaude(userMessage: string, extensionUrl?: string | null, forTabId
|
||||
'</system>',
|
||||
].join('\n');
|
||||
|
||||
// Append the canary instruction. injectCanary() tells Claude never to
|
||||
// output the token on any channel.
|
||||
const systemPrompt = injectCanary(baseSystemPrompt, canary);
|
||||
|
||||
const prompt = `${systemPrompt}\n\n<user-message>\n${escapedMessage}\n</user-message>`;
|
||||
// Never resume — each message is a fresh context. Resuming carries stale
|
||||
// page URLs and old navigation state that makes the agent fight the user.
|
||||
@@ -607,6 +618,7 @@ function spawnClaude(userMessage: string, extensionUrl?: string | null, forTabId
|
||||
sessionId: sidebarSession?.claudeSessionId || null,
|
||||
pageUrl: pageUrl,
|
||||
tabId: agentTabId,
|
||||
canary, // sidebar-agent scans all outbound channels for this token
|
||||
});
|
||||
try {
|
||||
fs.mkdirSync(gstackDir, { recursive: true, mode: 0o700 });
|
||||
|
||||
Reference in New Issue
Block a user