feat: hidden element stripping for scoped token text extraction

Detects CSS-hidden elements (opacity, font-size, off-screen, same-color,
clip-path) and ARIA label injection patterns. Marks elements with
data-gstack-hidden, extracts text from a clean clone (no DOM mutation),
then removes markers. Only active for scoped tokens on text command.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-05 11:08:15 -07:00
parent 5ba1472b5e
commit ec7f281a40
2 changed files with 152 additions and 1 deletions
+18 -1
View File
@@ -23,6 +23,7 @@ import { COMMAND_DESCRIPTIONS, PAGE_CONTENT_COMMANDS, wrapUntrustedContent } fro
import {
wrapUntrustedPageContent, datamarkContent,
runContentFilters, type ContentFilterResult,
markHiddenElements, getCleanTextWithStripping, cleanupHiddenMarkers,
} from './content-security';
import { handleSnapshot, SNAPSHOT_FLAGS } from './snapshot';
import {
@@ -957,7 +958,23 @@ async function handleCommandInternal(
let result: string;
if (READ_COMMANDS.has(command)) {
result = await handleReadCommand(command, args, browserManager);
const isScoped = tokenInfo && tokenInfo.clientId !== 'root';
// Hidden element stripping for scoped tokens on text command
if (isScoped && command === 'text') {
const page = browserManager.getPage();
const strippedDescs = await markHiddenElements(page);
if (strippedDescs.length > 0) {
console.warn(`[browse] Content security: stripped ${strippedDescs.length} hidden elements for ${tokenInfo.clientId}`);
}
try {
const target = browserManager.getActiveFrameOrPage();
result = await getCleanTextWithStripping(target);
} finally {
await cleanupHiddenMarkers(page);
}
} else {
result = await handleReadCommand(command, args, browserManager);
}
} else if (WRITE_COMMANDS.has(command)) {
result = await handleWriteCommand(command, args, browserManager);
} else if (META_COMMANDS.has(command)) {