mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 03:35:09 +02:00
feat(security): wire canary injection into sidebar spawnClaude
Every sidebar message now gets a fresh CANARY-XXXXXXXXXXXX token embedded in the system prompt with an instruction for Claude to never output it on any channel. The token flows through the queue entry so sidebar-agent.ts can check every outbound operation for leaks. If Claude echoes the canary into any outbound channel (text stream, tool arguments, URLs, file write paths), the sidebar-agent terminates the session and the user sees the approved canary leak banner. This operation is pure string manipulation — safe in the compiled browse binary. The actual output-stream check (which also has to be safe in compiled contexts) lives in sidebar-agent.ts (next commit). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+13
-1
@@ -25,6 +25,7 @@ import {
|
|||||||
runContentFilters, type ContentFilterResult,
|
runContentFilters, type ContentFilterResult,
|
||||||
markHiddenElements, getCleanTextWithStripping, cleanupHiddenMarkers,
|
markHiddenElements, getCleanTextWithStripping, cleanupHiddenMarkers,
|
||||||
} from './content-security';
|
} from './content-security';
|
||||||
|
import { generateCanary, injectCanary } from './security';
|
||||||
import { handleSnapshot, SNAPSHOT_FLAGS } from './snapshot';
|
import { handleSnapshot, SNAPSHOT_FLAGS } from './snapshot';
|
||||||
import {
|
import {
|
||||||
initRegistry, validateToken as validateScopedToken, checkScope, checkDomain,
|
initRegistry, validateToken as validateScopedToken, checkScope, checkDomain,
|
||||||
@@ -551,7 +552,13 @@ function spawnClaude(userMessage: string, extensionUrl?: string | null, forTabId
|
|||||||
const escapeXml = (s: string) => s.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
const escapeXml = (s: string) => s.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
||||||
const escapedMessage = escapeXml(userMessage);
|
const escapedMessage = escapeXml(userMessage);
|
||||||
|
|
||||||
const systemPrompt = [
|
// Fresh canary per message. The sidebar-agent checks every outbound channel
|
||||||
|
// (stream text, tool_use arguments, URLs, file writes) for this token.
|
||||||
|
// If Claude echoes it anywhere, that's evidence a prompt injection overrode
|
||||||
|
// the system prompt — session is killed, user sees the banner.
|
||||||
|
const canary = generateCanary();
|
||||||
|
|
||||||
|
const baseSystemPrompt = [
|
||||||
'<system>',
|
'<system>',
|
||||||
`Browser co-pilot. Binary: ${B}`,
|
`Browser co-pilot. Binary: ${B}`,
|
||||||
'Run `' + B + ' url` first to check the actual page. NEVER assume the URL.',
|
'Run `' + B + ' url` first to check the actual page. NEVER assume the URL.',
|
||||||
@@ -576,6 +583,10 @@ function spawnClaude(userMessage: string, extensionUrl?: string | null, forTabId
|
|||||||
'</system>',
|
'</system>',
|
||||||
].join('\n');
|
].join('\n');
|
||||||
|
|
||||||
|
// Append the canary instruction. injectCanary() tells Claude never to
|
||||||
|
// output the token on any channel.
|
||||||
|
const systemPrompt = injectCanary(baseSystemPrompt, canary);
|
||||||
|
|
||||||
const prompt = `${systemPrompt}\n\n<user-message>\n${escapedMessage}\n</user-message>`;
|
const prompt = `${systemPrompt}\n\n<user-message>\n${escapedMessage}\n</user-message>`;
|
||||||
// Never resume — each message is a fresh context. Resuming carries stale
|
// Never resume — each message is a fresh context. Resuming carries stale
|
||||||
// page URLs and old navigation state that makes the agent fight the user.
|
// page URLs and old navigation state that makes the agent fight the user.
|
||||||
@@ -607,6 +618,7 @@ function spawnClaude(userMessage: string, extensionUrl?: string | null, forTabId
|
|||||||
sessionId: sidebarSession?.claudeSessionId || null,
|
sessionId: sidebarSession?.claudeSessionId || null,
|
||||||
pageUrl: pageUrl,
|
pageUrl: pageUrl,
|
||||||
tabId: agentTabId,
|
tabId: agentTabId,
|
||||||
|
canary, // sidebar-agent scans all outbound channels for this token
|
||||||
});
|
});
|
||||||
try {
|
try {
|
||||||
fs.mkdirSync(gstackDir, { recursive: true, mode: 0o700 });
|
fs.mkdirSync(gstackDir, { recursive: true, mode: 0o700 });
|
||||||
|
|||||||
Reference in New Issue
Block a user