feat(ui): reviewable security banner with suspected-text + Allow/Block

Banner previously always rendered "Session terminated" — one-way. Now
when security_event.reviewable=true:

- Title switches to "Review suspected injection"
- Subtitle explains the decision ("allow to continue, block to end")
- Expandable details auto-open so the user sees context immediately
- Suspected text excerpt rendered in a mono pre block, scrollable,
  capped at 500 chars server-side
- Per-layer confidence scores (which layer fired, how confident)
- Action row with red [Block session] + neutral [Allow and continue]
- Click posts to /security-decision, banner hides, sidebar-agent
  sees the file and resumes or kills within one poll cycle

Existing hard-block banner (terminated session, canary leaks) unchanged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-20 20:25:29 +08:00
parent 13b8db9e7a
commit b12a28654f
3 changed files with 142 additions and 6 deletions
+60
View File
@@ -257,6 +257,66 @@
font-variant-numeric: tabular-nums;
}
.security-banner-suspect {
margin: 4px 0 0;
padding: 8px 10px;
background: var(--zinc-900, #18181B);
border: 1px solid var(--zinc-700, #3F3F46);
border-radius: var(--radius-sm, 4px);
font-family: var(--font-mono);
font-size: 11px;
line-height: 1.4;
color: var(--zinc-300, #D4D4D8);
white-space: pre-wrap;
word-break: break-word;
max-height: 160px;
overflow-y: auto;
}
.security-banner-actions {
display: flex;
gap: 8px;
justify-content: center;
margin-top: 14px;
}
.security-banner-btn {
flex: 1;
padding: 8px 14px;
border-radius: var(--radius-md, 6px);
font-size: 12px;
font-weight: 600;
cursor: pointer;
border: 1px solid transparent;
transition: background 0.15s, border-color 0.15s;
}
.security-banner-btn-block {
background: var(--red-600, #DC2626);
color: white;
border-color: var(--red-700, #B91C1C);
}
.security-banner-btn-block:hover {
background: var(--red-700, #B91C1C);
}
.security-banner-btn-allow {
background: transparent;
color: var(--zinc-200, #E4E4E7);
border-color: var(--zinc-600, #52525B);
}
.security-banner-btn-allow:hover {
background: var(--zinc-800, #27272A);
border-color: var(--zinc-500, #71717A);
}
.security-banner-btn:focus-visible {
outline: 2px solid var(--amber-400);
outline-offset: 2px;
}
.conn-btn {
font-size: 9px;
font-family: var(--font-mono);
+6
View File
@@ -47,6 +47,12 @@
<div class="security-banner-details" id="security-banner-details" hidden>
<div class="security-banner-section-label">SECURITY LAYERS</div>
<div class="security-banner-layers" id="security-banner-layers"></div>
<div class="security-banner-section-label" id="security-banner-suspect-label" hidden>SUSPECTED TEXT</div>
<pre class="security-banner-suspect" id="security-banner-suspect" hidden></pre>
</div>
<div class="security-banner-actions" id="security-banner-actions" hidden>
<button type="button" class="security-banner-btn security-banner-btn-block" id="security-banner-btn-block">Block session</button>
<button type="button" class="security-banner-btn security-banner-btn-allow" id="security-banner-btn-allow">Allow and continue</button>
</div>
</div>
+76 -6
View File
@@ -127,12 +127,53 @@ function showSecurityBanner(event) {
const expandBtn = document.getElementById('security-banner-expand');
const details = document.getElementById('security-banner-details');
const chevron = banner.querySelector('.security-banner-chevron');
const suspectLabel = document.getElementById('security-banner-suspect-label');
const suspectEl = document.getElementById('security-banner-suspect');
const actions = document.getElementById('security-banner-actions');
const btnAllow = document.getElementById('security-banner-btn-allow');
const btnBlock = document.getElementById('security-banner-btn-block');
// Reviewable path: the agent paused and is waiting for our decision.
// Title + subtitle change to framing-as-review, action buttons appear,
// suspected-text excerpt shows in the expandable details.
const reviewable = !!event.reviewable;
const tabId = Number(event.tabId);
// Title + subtitle
if (title) title.textContent = 'Session terminated';
if (title) title.textContent = reviewable ? 'Review suspected injection' : 'Session terminated';
if (subtitle) {
const fromDomain = event.domain ? ` from ${event.domain}` : '';
subtitle.textContent = `— prompt injection detected${fromDomain}`;
const toolLabel = event.tool ? ` in ${event.tool} output` : '';
subtitle.textContent = reviewable
? `possible prompt injection${toolLabel}${fromDomain} — allow to continue, block to end session`
: `— prompt injection detected${fromDomain}`;
}
// Suspected text excerpt (reviewable only)
if (suspectEl && suspectLabel) {
if (reviewable && typeof event.suspected_text === 'string' && event.suspected_text.length > 0) {
suspectEl.textContent = event.suspected_text;
suspectEl.hidden = false;
suspectLabel.hidden = false;
} else {
suspectEl.textContent = '';
suspectEl.hidden = true;
suspectLabel.hidden = true;
}
}
// Action buttons — wire fresh handlers each render so we capture the
// current tabId. Remove previous listeners by cloning the node.
if (actions && btnAllow && btnBlock) {
actions.hidden = !reviewable;
if (reviewable) {
const freshAllow = btnAllow.cloneNode(true);
const freshBlock = btnBlock.cloneNode(true);
btnAllow.parentNode.replaceChild(freshAllow, btnAllow);
btnBlock.parentNode.replaceChild(freshBlock, btnBlock);
freshAllow.addEventListener('click', () => postSecurityDecision(tabId, 'allow'));
freshBlock.addEventListener('click', () => postSecurityDecision(tabId, 'block'));
}
}
// Layer signals list (mono scores)
@@ -168,11 +209,13 @@ function showSecurityBanner(event) {
}
}
// Reset expand state on each render
// Reset expand state on each render. For reviewable banners, auto-expand
// so the user sees the suspected text without an extra click — they need
// that context to decide.
if (expandBtn && details) {
expandBtn.setAttribute('aria-expanded', 'false');
details.hidden = true;
if (chevron) chevron.style.transform = 'rotate(0deg)';
expandBtn.setAttribute('aria-expanded', reviewable ? 'true' : 'false');
details.hidden = !reviewable;
if (chevron) chevron.style.transform = reviewable ? 'rotate(180deg)' : 'rotate(0deg)';
}
banner.style.display = 'block';
@@ -183,6 +226,33 @@ function hideSecurityBanner() {
if (banner) banner.style.display = 'none';
}
/**
* Send the user's decision on a reviewable BLOCK event to the server.
* Server writes a per-tab decision file that sidebar-agent polls.
*/
async function postSecurityDecision(tabId, decision) {
if (!serverUrl || !Number.isFinite(tabId)) {
hideSecurityBanner();
return;
}
try {
await fetch(`${serverUrl}/security-decision`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(serverToken ? { Authorization: `Bearer ${serverToken}` } : {}),
},
body: JSON.stringify({ tabId, decision, reason: 'user' }),
});
} catch (err) {
console.error('[sidepanel] postSecurityDecision failed', err);
}
// Hide the banner optimistically. If the user chose "allow", the session
// continues. If "block", sidebar-agent will kill and emit agent_error,
// which shows up in chat regardless.
hideSecurityBanner();
}
// Shield icon state update — consumes /health.security.status.
// status ∈ { 'protected', 'degraded', 'inactive' }.
// 'protected' = all layers ok. 'degraded' = at least one ML layer off or failed