feat(ui): reviewable security banner with suspected-text + Allow/Block

Banner previously always rendered "Session terminated" — one-way. Now when security_event.reviewable=true: - Title switches to "Review suspected injection" - Subtitle explains the decision ("allow to continue, block to end") - Expandable details auto-open so the user sees context immediately - Suspected text excerpt rendered in a mono pre block, scrollable, capped at 500 chars server-side - Per-layer confidence scores (which layer fired, how confident) - Action row with red [Block session] + neutral [Allow and continue] - Click posts to /security-decision, banner hides, sidebar-agent sees the file and resumes or kills within one poll cycle Existing hard-block banner (terminated session, canary leaks) unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 03:35:09 +02:00 · 2026-04-20 20:25:29 +08:00
parent 13b8db9e7a
commit b12a28654f
3 changed files with 142 additions and 6 deletions
@@ -257,6 +257,66 @@
  font-variant-numeric: tabular-nums;
 }

+.security-banner-suspect {
+  margin: 4px 0 0;
+  padding: 8px 10px;
+  background: var(--zinc-900, #18181B);
+  border: 1px solid var(--zinc-700, #3F3F46);
+  border-radius: var(--radius-sm, 4px);
+  font-family: var(--font-mono);
+  font-size: 11px;
+  line-height: 1.4;
+  color: var(--zinc-300, #D4D4D8);
+  white-space: pre-wrap;
+  word-break: break-word;
+  max-height: 160px;
+  overflow-y: auto;
+}
+
+.security-banner-actions {
+  display: flex;
+  gap: 8px;
+  justify-content: center;
+  margin-top: 14px;
+}
+
+.security-banner-btn {
+  flex: 1;
+  padding: 8px 14px;
+  border-radius: var(--radius-md, 6px);
+  font-size: 12px;
+  font-weight: 600;
+  cursor: pointer;
+  border: 1px solid transparent;
+  transition: background 0.15s, border-color 0.15s;
+}
+
+.security-banner-btn-block {
+  background: var(--red-600, #DC2626);
+  color: white;
+  border-color: var(--red-700, #B91C1C);
+}
+
+.security-banner-btn-block:hover {
+  background: var(--red-700, #B91C1C);
+}
+
+.security-banner-btn-allow {
+  background: transparent;
+  color: var(--zinc-200, #E4E4E7);
+  border-color: var(--zinc-600, #52525B);
+}
+
+.security-banner-btn-allow:hover {
+  background: var(--zinc-800, #27272A);
+  border-color: var(--zinc-500, #71717A);
+}
+
+.security-banner-btn:focus-visible {
+  outline: 2px solid var(--amber-400);
+  outline-offset: 2px;
+}
+
 .conn-btn {
  font-size: 9px;
  font-family: var(--font-mono);
@@ -47,6 +47,12 @@
    <div class="security-banner-details" id="security-banner-details" hidden>
      <div class="security-banner-section-label">SECURITY LAYERS</div>
      <div class="security-banner-layers" id="security-banner-layers"></div>
+      <div class="security-banner-section-label" id="security-banner-suspect-label" hidden>SUSPECTED TEXT</div>
+      <pre class="security-banner-suspect" id="security-banner-suspect" hidden></pre>
+    </div>
+    <div class="security-banner-actions" id="security-banner-actions" hidden>
+      <button type="button" class="security-banner-btn security-banner-btn-block" id="security-banner-btn-block">Block session</button>
+      <button type="button" class="security-banner-btn security-banner-btn-allow" id="security-banner-btn-allow">Allow and continue</button>
    </div>
  </div>

@@ -127,12 +127,53 @@ function showSecurityBanner(event) {
  const expandBtn = document.getElementById('security-banner-expand');
  const details = document.getElementById('security-banner-details');
  const chevron = banner.querySelector('.security-banner-chevron');
+  const suspectLabel = document.getElementById('security-banner-suspect-label');
+  const suspectEl = document.getElementById('security-banner-suspect');
+  const actions = document.getElementById('security-banner-actions');
+  const btnAllow = document.getElementById('security-banner-btn-allow');
+  const btnBlock = document.getElementById('security-banner-btn-block');
+
+  // Reviewable path: the agent paused and is waiting for our decision.
+  // Title + subtitle change to framing-as-review, action buttons appear,
+  // suspected-text excerpt shows in the expandable details.
+  const reviewable = !!event.reviewable;
+  const tabId = Number(event.tabId);

  // Title + subtitle
-  if (title) title.textContent = 'Session terminated';
+  if (title) title.textContent = reviewable ? 'Review suspected injection' : 'Session terminated';
  if (subtitle) {
    const fromDomain = event.domain ? ` from ${event.domain}` : '';
-    subtitle.textContent = `— prompt injection detected${fromDomain}`;
+    const toolLabel = event.tool ? ` in ${event.tool} output` : '';
+    subtitle.textContent = reviewable
+      ? `possible prompt injection${toolLabel}${fromDomain} — allow to continue, block to end session`
+      : `— prompt injection detected${fromDomain}`;
+  }
+
+  // Suspected text excerpt (reviewable only)
+  if (suspectEl && suspectLabel) {
+    if (reviewable && typeof event.suspected_text === 'string' && event.suspected_text.length > 0) {
+      suspectEl.textContent = event.suspected_text;
+      suspectEl.hidden = false;
+      suspectLabel.hidden = false;
+    } else {
+      suspectEl.textContent = '';
+      suspectEl.hidden = true;
+      suspectLabel.hidden = true;
+    }
+  }
+
+  // Action buttons — wire fresh handlers each render so we capture the
+  // current tabId. Remove previous listeners by cloning the node.
+  if (actions && btnAllow && btnBlock) {
+    actions.hidden = !reviewable;
+    if (reviewable) {
+      const freshAllow = btnAllow.cloneNode(true);
+      const freshBlock = btnBlock.cloneNode(true);
+      btnAllow.parentNode.replaceChild(freshAllow, btnAllow);
+      btnBlock.parentNode.replaceChild(freshBlock, btnBlock);
+      freshAllow.addEventListener('click', () => postSecurityDecision(tabId, 'allow'));
+      freshBlock.addEventListener('click', () => postSecurityDecision(tabId, 'block'));
+    }
  }

  // Layer signals list (mono scores)
@@ -168,11 +209,13 @@ function showSecurityBanner(event) {
    }
  }

-  // Reset expand state on each render
+  // Reset expand state on each render. For reviewable banners, auto-expand
+  // so the user sees the suspected text without an extra click — they need
+  // that context to decide.
  if (expandBtn && details) {
-    expandBtn.setAttribute('aria-expanded', 'false');
-    details.hidden = true;
-    if (chevron) chevron.style.transform = 'rotate(0deg)';
+    expandBtn.setAttribute('aria-expanded', reviewable ? 'true' : 'false');
+    details.hidden = !reviewable;
+    if (chevron) chevron.style.transform = reviewable ? 'rotate(180deg)' : 'rotate(0deg)';
  }

  banner.style.display = 'block';
@@ -183,6 +226,33 @@ function hideSecurityBanner() {
  if (banner) banner.style.display = 'none';
 }

+/**
+ * Send the user's decision on a reviewable BLOCK event to the server.
+ * Server writes a per-tab decision file that sidebar-agent polls.
+ */
+async function postSecurityDecision(tabId, decision) {
+  if (!serverUrl || !Number.isFinite(tabId)) {
+    hideSecurityBanner();
+    return;
+  }
+  try {
+    await fetch(`${serverUrl}/security-decision`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        ...(serverToken ? { Authorization: `Bearer ${serverToken}` } : {}),
+      },
+      body: JSON.stringify({ tabId, decision, reason: 'user' }),
+    });
+  } catch (err) {
+    console.error('[sidepanel] postSecurityDecision failed', err);
+  }
+  // Hide the banner optimistically. If the user chose "allow", the session
+  // continues. If "block", sidebar-agent will kill and emit agent_error,
+  // which shows up in chat regardless.
+  hideSecurityBanner();
+}
+
 // Shield icon state update — consumes /health.security.status.
 // status ∈ { 'protected', 'degraded', 'inactive' }.
 // 'protected' = all layers ok. 'degraded' = at least one ML layer off or failed