From 89480291f5a00ed481e6151ac9d3279f194108ba Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Tue, 21 Apr 2026 21:34:30 -0700 Subject: [PATCH] fix(sidebar): killAgent resets per-tab state; align tests with current agent event format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two pre-existing bugs surfaced while running the full e2e suite on the sec-wave branch. Both pre-date v1.6.0.0 (same failures on main at e23ff280) but blocked the ship verification, so fixing now. ### Bug 1: killAgent leaked stale per-tab state `killAgent()` reset the legacy globals (agentProcess, agentStatus, etc.) but never touched the per-tab `tabAgents` Map. Meanwhile `/sidebar-command` routes on `tabState.status` from that Map, not the legacy globals. Consequence: after a kill (including the implicit kill in `/sidebar-session/new`), the next /sidebar-command on the same tab saw `tabState.status === 'processing'` and fell into the queue branch, silently NOT spawning an agent. Integration tests that called resetState between cases all failed with empty queues. Fix: when targetTabId is supplied, reset that one tab's state; when called without a tab (session-new, full kill), reset ALL tab states. Matches the semantic boundary already used for the cancel-file write. ### Bug 2: sidebar-integration tests drifted from current event format `agent events appear in /sidebar-chat` posted the raw Claude streaming format (`{type: 'assistant', message: {content: [...]}}`) but `processAgentEvent` in server.ts only handles the simplified types that sidebar-agent.ts pre-processes into (text, text_delta, tool_use, result, agent_error, security_event). The architecture moved pre-processing into sidebar-agent.ts at some point and this test never got updated. Fixed by sending the pre-processed `{type: 'text', text: '...'}` format — which is actually what the server sees in production. Also removed the `entry.prompt` URL-containment check in the queue-write test. The URL is carried on entry.pageUrl (metadata) by design: the system prompt tells Claude to run `browse url` to fetch the actual page rather than trust any URL in the prompt body. That's the URL-based prompt-injection defense. The prompt SHOULD NOT contain the URL, so the test assertion was wrong for the current security posture. ### Verification - `bun test browse/test/sidebar-integration.test.ts` → 13/13 pass (was 6/13 on both main and branch before this commit) - Full `bun run test` → exit 0, zero fail markers - No behavior change for production sidebar flows: killAgent was already supposed to return the agent to idle; it just wasn't fully doing so. Per-tab reset now matches the documented semantics. Co-Authored-By: Claude Opus 4.7 (1M context) --- browse/src/server.ts | 21 +++++++++++++++++++++ browse/test/sidebar-integration.test.ts | 16 ++++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/browse/src/server.ts b/browse/src/server.ts index 2c6d7e0c..45266078 100644 --- a/browse/src/server.ts +++ b/browse/src/server.ts @@ -786,6 +786,27 @@ function killAgent(targetTabId?: number | null): void { agentStartTime = null; currentMessage = null; agentStatus = 'idle'; + // Reset per-tab agent state too. Without this, /sidebar-command on the + // same tab after a kill would see tabState.status === 'processing' (the + // legacy globals-only reset missed it) and fall into the queue branch + // instead of spawning. When a specific tab was targeted, reset only + // that tab; otherwise reset ALL tabs (e.g. session-new kills everything). + if (targetTabId != null) { + const state = tabAgents.get(targetTabId); + if (state) { + state.status = 'idle'; + state.startTime = null; + state.currentMessage = null; + state.queue = []; + } + } else { + for (const state of tabAgents.values()) { + state.status = 'idle'; + state.startTime = null; + state.currentMessage = null; + state.queue = []; + } + } } // Agent health check — detect hung processes diff --git a/browse/test/sidebar-integration.test.ts b/browse/test/sidebar-integration.test.ts index bcafe052..d7a27fea 100644 --- a/browse/test/sidebar-integration.test.ts +++ b/browse/test/sidebar-integration.test.ts @@ -131,8 +131,12 @@ describe('sidebar-command → queue', () => { const lines = content.split('\n').filter(Boolean); expect(lines.length).toBeGreaterThan(0); const entry = JSON.parse(lines[lines.length - 1]); + // Active tab URL is carried on the queue entry metadata (entry.pageUrl), + // NOT inlined into the prompt. The system prompt deliberately tells + // Claude to run `browse url` instead of trusting any URL in the prompt + // body — that's the prompt-injection-via-URL defense. See spawnClaude + // in browse/src/server.ts. expect(entry.pageUrl).toBe('https://example.com/test-page'); - expect(entry.prompt).toContain('https://example.com/test-page'); await api('/sidebar-agent/kill', { method: 'POST' }); }); @@ -185,12 +189,16 @@ describe('sidebar-agent/event → chat buffer', () => { test('agent events appear in /sidebar-chat', async () => { await resetState(); - // Post mock agent events using Claude's streaming format + // Post pre-processed agent event. The server's processAgentEvent + // handles the simplified types that sidebar-agent.ts emits (text, + // text_delta, tool_use, result, agent_error, security_event), NOT + // the raw Claude streaming format — pre-processing lives in + // sidebar-agent.ts, not in the server. await api('/sidebar-agent/event', { method: 'POST', body: JSON.stringify({ - type: 'assistant', - message: { content: [{ type: 'text', text: 'Hello from mock agent' }] }, + type: 'text', + text: 'Hello from mock agent', }), });