fix: resolve merge conflicts — keep security defenses + per-tab isolation

Merged main's security improvements (XML escaping, prompt injection defense, allowed commands whitelist, --model opus, Write tool, stderr capture) with our branch's per-tab isolation (BROWSE_TAB env var, processingTabs set, no --resume). Updated test expectations for expanded system prompt. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-02 11:45:20 +02:00 · 2026-03-29 22:27:42 -07:00
parent fe4441b530
commit 91d2f73a67
3 changed files with 6 additions and 6 deletions
@@ -480,8 +480,8 @@ function spawnClaude(userMessage: string, extensionUrl?: string | null, forTabId
  const prompt = `${systemPrompt}\n\n<user-message>\n${escapedMessage}\n</user-message>`;
  // Never resume — each message is a fresh context. Resuming carries stale
  // page URLs and old navigation state that makes the agent fight the user.
-  const args = ['-p', prompt, '--output-format', 'stream-json', '--verbose',
-    '--allowedTools', 'Bash,Read,Glob,Grep'];
+  const args = ['-p', prompt, '--model', 'opus', '--output-format', 'stream-json', '--verbose',
+    '--allowedTools', 'Bash,Read,Glob,Grep,Write'];

  addChatEntry({ ts: new Date().toISOString(), role: 'agent', type: 'agent_start' });

@@ -110,7 +110,7 @@ describe('Sidebar prompt injection defense', () => {
    // It should NOT rebuild args from scratch (the old bug)
    expect(AGENT_SRC).toContain('args || [');
    // Verify the destructured args come from queueEntry
-    expect(AGENT_SRC).toContain('const { prompt, args, stateFile, cwd } = queueEntry');
+    expect(AGENT_SRC).toContain('const { prompt, args, stateFile, cwd, tabId } = queueEntry');
  });

  test('sidebar-agent falls back to defaults if queue has no args', () => {
@@ -221,14 +221,14 @@ describe('sidebar agent queue poll (sidebar-agent.ts)', () => {
 describe('system prompt size', () => {
  const serverSrc = fs.readFileSync(path.join(ROOT, 'src', 'server.ts'), 'utf-8');

-  test('system prompt is compact (under 20 lines)', () => {
+  test('system prompt is compact (under 30 lines)', () => {
    const start = serverSrc.indexOf('const systemPrompt = [');
    const end = serverSrc.indexOf("].join('\\n');", start);
    const promptBlock = serverSrc.slice(start, end);
    const lines = promptBlock.split('\n').length;
    // Compact prompt = fewer input tokens = faster first response
-    // Slightly higher limit because of per-tab instruction line
-    expect(lines).toBeLessThan(20);
+    // Higher limit accommodates security lines (prompt injection defense, allowed commands)
+    expect(lines).toBeLessThan(30);
  });

  test('system prompt does not contain verbose narration examples', () => {