mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-02 03:35:09 +02:00
feat(security): canary leak check across all outbound channels
The sidebar-agent now scans every Claude stream event for the session's canary token before relaying any data to the sidepanel. Channels covered (per CEO review cross-model tension #2): * Assistant text blocks * Assistant text_delta streaming * tool_use arguments (recursively, via checkCanaryInStructure — catches URLs, commands, file paths nested at any depth) * tool_use content_block_start * tool_input_delta partial JSON * Final result payload If the canary leaks on any channel, onCanaryLeaked() fires once per session: 1. logAttempt() writes the event to ~/.gstack/security/attempts.jsonl with the canary's salted hash (never the payload content). 2. sends a `security_event` to the sidepanel so it can render the approved canary-leak banner (variant A mockup — ceo-plan 2026-04-19). 3. sends an `agent_error` for backward-compat with existing error surfaces. 4. SIGTERM's the claude subprocess (SIGKILL after 2s if still alive). The leaked content itself is never relayed to the sidepanel — the event is dropped at the boundary. Canary detection is pure-string substring match, so this all runs safely in the sidebar-agent (non-compiled bun) context. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+130
-4
@@ -13,6 +13,7 @@ import { spawn } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { safeUnlink } from './error-handling';
|
||||
import { checkCanaryInStructure, logAttempt, hashPayload, extractDomain } from './security';
|
||||
|
||||
const QUEUE = process.env.SIDEBAR_QUEUE_PATH || path.join(process.env.HOME || '/tmp', '.gstack', 'sidebar-agent-queue.jsonl');
|
||||
const KILL_FILE = path.join(path.dirname(QUEUE), 'sidebar-agent-kill');
|
||||
@@ -36,6 +37,7 @@ interface QueueEntry {
|
||||
pageUrl?: string | null;
|
||||
sessionId?: string | null;
|
||||
ts?: string;
|
||||
canary?: string; // session-scoped token; leak = prompt injection evidence
|
||||
}
|
||||
|
||||
function isValidQueueEntry(e: unknown): e is QueueEntry {
|
||||
@@ -55,6 +57,7 @@ function isValidQueueEntry(e: unknown): e is QueueEntry {
|
||||
if (obj.message !== undefined && obj.message !== null && typeof obj.message !== 'string') return false;
|
||||
if (obj.pageUrl !== undefined && obj.pageUrl !== null && typeof obj.pageUrl !== 'string') return false;
|
||||
if (obj.sessionId !== undefined && obj.sessionId !== null && typeof obj.sessionId !== 'string') return false;
|
||||
if (obj.canary !== undefined && typeof obj.canary !== 'string') return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -228,7 +231,63 @@ function summarizeToolInput(tool: string, input: any): string {
|
||||
return describeToolCall(tool, input);
|
||||
}
|
||||
|
||||
async function handleStreamEvent(event: any, tabId?: number): Promise<void> {
|
||||
/**
|
||||
* Scan a Claude stream event for the session canary. Returns the channel where
|
||||
* it leaked, or null if clean. Covers every outbound channel: text blocks,
|
||||
* text deltas, tool_use arguments (including nested URL/path/command strings),
|
||||
* and result payloads.
|
||||
*/
|
||||
function detectCanaryLeak(event: any, canary: string): string | null {
|
||||
if (!canary) return null;
|
||||
|
||||
if (event.type === 'assistant' && event.message?.content) {
|
||||
for (const block of event.message.content) {
|
||||
if (block.type === 'text' && typeof block.text === 'string' && block.text.includes(canary)) {
|
||||
return 'assistant_text';
|
||||
}
|
||||
if (block.type === 'tool_use' && checkCanaryInStructure(block.input, canary)) {
|
||||
return `tool_use:${block.name}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (event.type === 'content_block_start' && event.content_block?.type === 'tool_use') {
|
||||
if (checkCanaryInStructure(event.content_block.input, canary)) {
|
||||
return `tool_use:${event.content_block.name}`;
|
||||
}
|
||||
}
|
||||
if (event.type === 'content_block_delta' && event.delta?.type === 'text_delta') {
|
||||
if (typeof event.delta.text === 'string' && event.delta.text.includes(canary)) {
|
||||
return 'text_delta';
|
||||
}
|
||||
}
|
||||
if (event.type === 'content_block_delta' && event.delta?.type === 'input_json_delta') {
|
||||
if (typeof event.delta.partial_json === 'string' && event.delta.partial_json.includes(canary)) {
|
||||
return 'tool_input_delta';
|
||||
}
|
||||
}
|
||||
if (event.type === 'result' && typeof event.result === 'string' && event.result.includes(canary)) {
|
||||
return 'result';
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
interface CanaryContext {
|
||||
canary: string;
|
||||
pageUrl: string;
|
||||
onLeak: (channel: string) => void;
|
||||
}
|
||||
|
||||
async function handleStreamEvent(event: any, tabId?: number, canaryCtx?: CanaryContext): Promise<void> {
|
||||
// Canary check runs BEFORE any outbound send — we never want to relay
|
||||
// a leaked token to the sidepanel UI.
|
||||
if (canaryCtx) {
|
||||
const channel = detectCanaryLeak(event, canaryCtx.canary);
|
||||
if (channel) {
|
||||
canaryCtx.onLeak(channel);
|
||||
return; // drop the event — never relay content that leaked the canary
|
||||
}
|
||||
}
|
||||
|
||||
if (event.type === 'system' && event.session_id) {
|
||||
// Relay claude session ID for --resume support
|
||||
await sendEvent({ type: 'system', claudeSessionId: event.session_id }, tabId);
|
||||
@@ -267,14 +326,62 @@ async function handleStreamEvent(event: any, tabId?: number): Promise<void> {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fire the prompt-injection-detected event to the server. This terminates
|
||||
* the session from the sidepanel's perspective and renders the canary leak
|
||||
* banner. Also logs locally (salted hash + domain only) and fires telemetry
|
||||
* if configured.
|
||||
*/
|
||||
async function onCanaryLeaked(params: {
|
||||
tabId: number;
|
||||
channel: string;
|
||||
canary: string;
|
||||
pageUrl: string;
|
||||
}): Promise<void> {
|
||||
const { tabId, channel, canary, pageUrl } = params;
|
||||
const domain = extractDomain(pageUrl);
|
||||
console.warn(`[sidebar-agent] CANARY LEAK detected on ${channel} for tab ${tabId} (domain=${domain || 'unknown'})`);
|
||||
|
||||
// Local log — salted hash + domain only, never the payload
|
||||
logAttempt({
|
||||
ts: new Date().toISOString(),
|
||||
urlDomain: domain,
|
||||
payloadHash: hashPayload(canary), // hash the canary, not the payload (which might be leaked content)
|
||||
confidence: 1.0,
|
||||
layer: 'canary',
|
||||
verdict: 'block',
|
||||
});
|
||||
|
||||
// Broadcast to sidepanel so it can render the approved banner
|
||||
await sendEvent({
|
||||
type: 'security_event',
|
||||
verdict: 'block',
|
||||
reason: 'canary_leaked',
|
||||
layer: 'canary',
|
||||
channel,
|
||||
domain,
|
||||
}, tabId);
|
||||
|
||||
// Also emit agent_error so the sidepanel's existing error surface
|
||||
// reflects that the session terminated. Keeps old clients working.
|
||||
await sendEvent({
|
||||
type: 'agent_error',
|
||||
error: `Session terminated — prompt injection detected${domain ? ` from ${domain}` : ''}`,
|
||||
}, tabId);
|
||||
}
|
||||
|
||||
async function askClaude(queueEntry: QueueEntry): Promise<void> {
|
||||
const { prompt, args, stateFile, cwd, tabId } = queueEntry;
|
||||
const { prompt, args, stateFile, cwd, tabId, canary, pageUrl } = queueEntry;
|
||||
const tid = tabId ?? 0;
|
||||
|
||||
processingTabs.add(tid);
|
||||
await sendEvent({ type: 'agent_start' }, tid);
|
||||
|
||||
return new Promise((resolve) => {
|
||||
// Canary context is set after proc is spawned (needs proc reference for kill).
|
||||
let canaryCtx: CanaryContext | undefined;
|
||||
let canaryTriggered = false;
|
||||
|
||||
// Use args from queue entry (server sets --model, --allowedTools, prompt framing).
|
||||
// Fall back to defaults only if queue entry has no args (backward compat).
|
||||
// Write doesn't expand attack surface beyond what Bash already provides.
|
||||
@@ -317,6 +424,25 @@ async function askClaude(queueEntry: QueueEntry): Promise<void> {
|
||||
|
||||
proc.stdin.end();
|
||||
|
||||
// Now that proc exists, set up the canary-leak handler. It fires at most
|
||||
// once; on fire we kill the subprocess, emit security_event + agent_error,
|
||||
// and let the normal close handler resolve the promise.
|
||||
if (canary) {
|
||||
canaryCtx = {
|
||||
canary,
|
||||
pageUrl: pageUrl ?? '',
|
||||
onLeak: (channel: string) => {
|
||||
if (canaryTriggered) return;
|
||||
canaryTriggered = true;
|
||||
onCanaryLeaked({ tabId: tid, channel, canary, pageUrl: pageUrl ?? '' });
|
||||
try { proc.kill('SIGTERM'); } catch (err: any) { if (err?.code !== 'ESRCH') throw err; }
|
||||
setTimeout(() => {
|
||||
try { proc.kill('SIGKILL'); } catch (err: any) { if (err?.code !== 'ESRCH') throw err; }
|
||||
}, 2000);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Poll for per-tab cancel signal from server's killAgent()
|
||||
const cancelCheck = setInterval(() => {
|
||||
try {
|
||||
@@ -338,7 +464,7 @@ async function askClaude(queueEntry: QueueEntry): Promise<void> {
|
||||
buffer = lines.pop() || '';
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
try { handleStreamEvent(JSON.parse(line), tid); } catch (err: any) {
|
||||
try { handleStreamEvent(JSON.parse(line), tid, canaryCtx); } catch (err: any) {
|
||||
console.error(`[sidebar-agent] Tab ${tid}: Failed to parse stream line:`, line.slice(0, 100), err.message);
|
||||
}
|
||||
}
|
||||
@@ -354,7 +480,7 @@ async function askClaude(queueEntry: QueueEntry): Promise<void> {
|
||||
activeProc = null;
|
||||
activeProcs.delete(tid);
|
||||
if (buffer.trim()) {
|
||||
try { handleStreamEvent(JSON.parse(buffer), tid); } catch (err: any) {
|
||||
try { handleStreamEvent(JSON.parse(buffer), tid, canaryCtx); } catch (err: any) {
|
||||
console.error(`[sidebar-agent] Tab ${tid}: Failed to parse final buffer:`, buffer.slice(0, 100), err.message);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user