From 1a696f1475d16d29fb782025afee82bacf6ba0ab Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Tue, 14 Apr 2026 08:35:14 -0700 Subject: [PATCH] feat: $B ux-audit command + snapshot --heatmap flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New browse meta-command: ux-audit extracts page structure (site ID, navigation, headings, interactive elements, text blocks) as structured JSON for agent-side UX behavioral analysis. Pure data extraction — the agent applies the 6 usability tests and makes judgment calls. Element caps: 50 headings, 100 links, 200 interactive, 50 text blocks. New snapshot flag: -H/--heatmap accepts a JSON color map mapping ref IDs to colors (green/yellow/red/blue/orange/gray). Extends existing snapshot -a annotation system with per-ref colors instead of hardcoded red. Color whitelist validation prevents CSS injection. Composable — any skill can use it. Co-Authored-By: Claude Opus 4.6 (1M context) --- browse/SKILL.md | 2 + browse/src/commands.ts | 3 + browse/src/meta-commands.ts | 110 ++++++++++++++++++++++++++++++++ browse/src/snapshot.ts | 116 ++++++++++++++++++++++++++++++++++ test/skill-validation.test.ts | 1 + 5 files changed, 232 insertions(+) diff --git a/browse/SKILL.md b/browse/SKILL.md index 420e2b0b..5ac0377b 100644 --- a/browse/SKILL.md +++ b/browse/SKILL.md @@ -587,6 +587,7 @@ The snapshot is your primary tool for understanding and interacting with pages. -a --annotate Annotated screenshot with red overlay boxes and ref labels -o --output Output path for annotated screenshot (default: /browse-annotated.png) -C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick). Auto-enabled when -i is used. +-H --heatmap Color-coded overlay screenshot from JSON map: '{"@e1":"green","@e3":"red"}'. Valid colors: green, yellow, red, blue, orange, gray. ``` All flags can be combined freely. `-o` only applies when `-a` is also used. @@ -717,6 +718,7 @@ $B prettyscreenshot --cleanup --scroll-to ".pricing" --width 1440 ~/Desktop/hero | `network [--clear]` | Network requests | | `perf` | Page load timings | | `storage [set k v]` | Read all localStorage + sessionStorage as JSON, or set to write localStorage | +| `ux-audit` | Extract page structure for UX behavioral analysis — site ID, nav, headings, text blocks, interactive elements. Returns JSON for agent interpretation. | ### Visual | Command | Description | diff --git a/browse/src/commands.ts b/browse/src/commands.ts index eacdf0cd..09966d7c 100644 --- a/browse/src/commands.ts +++ b/browse/src/commands.ts @@ -40,6 +40,7 @@ export const META_COMMANDS = new Set([ 'watch', 'state', 'frame', + 'ux-audit', ]); export const ALL_COMMANDS = new Set([...READ_COMMANDS, ...WRITE_COMMANDS, ...META_COMMANDS]); @@ -146,6 +147,8 @@ export const COMMAND_DESCRIPTIONS: Record | style --undo [N]' }, 'cleanup': { category: 'Interaction', description: 'Remove page clutter (ads, cookie banners, sticky elements, social widgets)', usage: 'cleanup [--ads] [--cookies] [--sticky] [--social] [--all]' }, 'prettyscreenshot': { category: 'Visual', description: 'Clean screenshot with optional cleanup, scroll positioning, and element hiding', usage: 'prettyscreenshot [--scroll-to sel|text] [--cleanup] [--hide sel...] [--width px] [path]' }, + // UX Audit + 'ux-audit': { category: 'Inspection', description: 'Extract page structure for UX behavioral analysis — site ID, nav, headings, text blocks, interactive elements. Returns JSON for agent interpretation.', usage: 'ux-audit' }, }; // Load-time validation: descriptions must cover exactly the command sets diff --git a/browse/src/meta-commands.ts b/browse/src/meta-commands.ts index 1fa905e1..e42665df 100644 --- a/browse/src/meta-commands.ts +++ b/browse/src/meta-commands.ts @@ -653,6 +653,116 @@ export async function handleMetaCommand( return `Switched to frame: ${frame.url()}`; } + // ─── UX Audit ───────────────────────────────────── + case 'ux-audit': { + const page = bm.getPage(); + const url = page.url(); + + // Extract page structure for UX behavioral analysis + // Agent interprets the data and applies Krug's 6 usability tests + const data = await page.evaluate(() => { + const HEADING_CAP = 50; + const LINK_CAP = 100; + const INTERACTIVE_CAP = 200; + const TEXT_BLOCK_CAP = 50; + + // Site ID: logo or brand element + const logoEl = document.querySelector('[class*="logo"], [id*="logo"], header img, [aria-label*="home"], a[href="/"]'); + const siteId = logoEl ? { + found: true, + text: (logoEl.textContent || '').trim().slice(0, 100), + tag: logoEl.tagName, + alt: (logoEl as HTMLImageElement).alt || null, + } : { found: false, text: null, tag: null, alt: null }; + + // Page name: main heading + const h1 = document.querySelector('h1'); + const pageName = h1 ? { + found: true, + text: h1.textContent?.trim().slice(0, 200) || '', + } : { found: false, text: null }; + + // Navigation: primary nav elements + const navEls = document.querySelectorAll('nav, [role="navigation"]'); + const navItems: Array<{ text: string; links: number }> = []; + navEls.forEach((nav, i) => { + if (i >= 5) return; + const links = nav.querySelectorAll('a'); + navItems.push({ + text: (nav.getAttribute('aria-label') || `nav-${i}`).slice(0, 50), + links: links.length, + }); + }); + + // "You are here" indicator: current/active nav items + const activeNavItems = document.querySelectorAll('[aria-current], .active, .current, [class*="active"], [class*="current"]'); + const youAreHere = Array.from(activeNavItems).slice(0, 5).map(el => ({ + text: (el.textContent || '').trim().slice(0, 50), + tag: el.tagName, + })); + + // Search: search box presence + const searchEl = document.querySelector('input[type="search"], [role="search"], input[name*="search"], input[placeholder*="search" i], input[aria-label*="search" i]'); + const search = { found: !!searchEl }; + + // Breadcrumbs + const breadcrumbEl = document.querySelector('[aria-label*="breadcrumb" i], .breadcrumb, .breadcrumbs, [class*="breadcrumb"]'); + const breadcrumbs = breadcrumbEl ? { + found: true, + items: Array.from(breadcrumbEl.querySelectorAll('a, span, li')).slice(0, 10).map(el => (el.textContent || '').trim().slice(0, 30)), + } : { found: false, items: [] }; + + // Headings: heading hierarchy + const headings = Array.from(document.querySelectorAll('h1,h2,h3,h4,h5,h6')).slice(0, HEADING_CAP).map(h => ({ + tag: h.tagName, + text: (h.textContent || '').trim().slice(0, 80), + size: getComputedStyle(h).fontSize, + })); + + // Interactive elements: buttons, links, inputs + const interactiveEls = Array.from(document.querySelectorAll('a, button, input, select, textarea, [role="button"], [tabindex]')).slice(0, INTERACTIVE_CAP); + const interactive = interactiveEls.map(el => { + const rect = el.getBoundingClientRect(); + return { + tag: el.tagName, + text: (el.textContent || (el as HTMLInputElement).placeholder || (el as HTMLInputElement).value || '').trim().slice(0, 50), + type: (el as HTMLInputElement).type || null, + role: el.getAttribute('role'), + w: Math.round(rect.width), + h: Math.round(rect.height), + visible: rect.width > 0 && rect.height > 0, + }; + }).filter(el => el.visible); + + // Text blocks: paragraphs and large text areas + const textBlocks = Array.from(document.querySelectorAll('p, [class*="description"], [class*="intro"], [class*="welcome"], [class*="hero"] p, main p')).slice(0, TEXT_BLOCK_CAP).map(el => ({ + text: (el.textContent || '').trim().slice(0, 200), + wordCount: (el.textContent || '').trim().split(/\s+/).filter(Boolean).length, + })); + + // Total visible text word count + const bodyText = (document.body?.innerText || '').trim(); + const totalWords = bodyText.split(/\s+/).filter(Boolean).length; + + return { + url: window.location.href, + title: document.title, + siteId, + pageName, + navigation: navItems, + youAreHere, + search, + breadcrumbs, + headings, + interactive, + textBlocks, + totalWords, + }; + }); + + return JSON.stringify(data, null, 2); + } + default: throw new Error(`Unknown meta command: ${command}`); } diff --git a/browse/src/snapshot.ts b/browse/src/snapshot.ts index ac2761bb..78b793e3 100644 --- a/browse/src/snapshot.ts +++ b/browse/src/snapshot.ts @@ -39,6 +39,7 @@ interface SnapshotOptions { annotate?: boolean; // -a / --annotate: annotated screenshot outputPath?: string; // -o / --output: path for annotated screenshot cursorInteractive?: boolean; // -C / --cursor-interactive: scan cursor:pointer etc. + heatmap?: string; // -H / --heatmap: JSON color map for ref overlays } /** @@ -64,6 +65,7 @@ export const SNAPSHOT_FLAGS: Array<{ { short: '-a', long: '--annotate', description: 'Annotated screenshot with red overlay boxes and ref labels', optionKey: 'annotate' }, { short: '-o', long: '--output', description: 'Output path for annotated screenshot (default: /browse-annotated.png)', takesValue: true, valueHint: '', optionKey: 'outputPath' }, { short: '-C', long: '--cursor-interactive', description: 'Cursor-interactive elements (@c refs — divs with pointer, onclick). Auto-enabled when -i is used.', optionKey: 'cursorInteractive' }, + { short: '-H', long: '--heatmap', description: 'Color-coded overlay screenshot from JSON map: \'{"@e1":"green","@e3":"red"}\'. Valid colors: green, yellow, red, blue, orange, gray.', takesValue: true, valueHint: '', optionKey: 'heatmap' }, ]; interface ParsedNode { @@ -435,6 +437,120 @@ export async function handleSnapshot( } } + // ─── Heatmap mode (-H) ────────────────────────────────────── + if (opts.heatmap) { + const heatmapPath = opts.outputPath || `${TEMP_DIR}/browse-heatmap.png`; + // Validate output path + { + const nodePath = require('path') as typeof import('path'); + const nodeFs = require('fs') as typeof import('fs'); + const absolute = nodePath.resolve(heatmapPath); + const safeDirs = [TEMP_DIR, process.cwd()].map((d: string) => { + try { return nodeFs.realpathSync(d); } catch (err: any) { if (err?.code !== 'ENOENT') throw err; return d; } + }); + let realPath: string; + try { + realPath = nodeFs.realpathSync(absolute); + } catch (err: any) { + if (err.code === 'ENOENT') { + try { + const dir = nodeFs.realpathSync(nodePath.dirname(absolute)); + realPath = nodePath.join(dir, nodePath.basename(absolute)); + } catch (err2: any) { + if (err2?.code !== 'ENOENT') throw err2; + realPath = absolute; + } + } else { + throw new Error(`Cannot resolve real path: ${heatmapPath} (${err.code})`); + } + } + if (!safeDirs.some((dir: string) => isPathWithin(realPath, dir))) { + throw new Error(`Path must be within: ${safeDirs.join(', ')}`); + } + } + + // Parse and validate color map + const VALID_COLORS = new Set(['green', 'yellow', 'red', 'blue', 'orange', 'gray']); + const COLOR_MAP: Record = { + green: { border: '#00b400', bg: 'rgba(0,180,0,0.15)' }, + yellow: { border: '#ffb400', bg: 'rgba(255,180,0,0.15)' }, + red: { border: '#ff0000', bg: 'rgba(255,0,0,0.15)' }, + blue: { border: '#0066ff', bg: 'rgba(0,102,255,0.15)' }, + orange: { border: '#ff6600', bg: 'rgba(255,102,0,0.15)' }, + gray: { border: '#888888', bg: 'rgba(136,136,136,0.15)' }, + }; + + let colorAssignments: Record; + try { + colorAssignments = JSON.parse(opts.heatmap); + } catch { + throw new Error('Invalid heatmap JSON. Expected: \'{"@e1":"green","@e3":"red"}\''); + } + + // Validate colors + for (const [ref, color] of Object.entries(colorAssignments)) { + if (!VALID_COLORS.has(color)) { + throw new Error(`Invalid heatmap color "${color}" for ${ref}. Valid: ${[...VALID_COLORS].join(', ')}`); + } + } + + try { + const boxes: Array<{ ref: string; box: { x: number; y: number; width: number; height: number }; color: string }> = []; + for (const [refKey, color] of Object.entries(colorAssignments)) { + const cleanRef = refKey.startsWith('@') ? refKey.slice(1) : refKey; + const entry = refMap.get(cleanRef); + if (!entry) continue; // Skip refs not found on page + try { + const box = await entry.locator.boundingBox({ timeout: 1000 }); + if (box) { + const colors = COLOR_MAP[color] || COLOR_MAP.gray; + boxes.push({ ref: `@${cleanRef}`, box, color: JSON.stringify(colors) }); + } + } catch { + // Element may be offscreen or hidden — skip + } + } + + await page.evaluate((boxes) => { + for (const { ref, box, color } of boxes) { + const colors = JSON.parse(color); + const overlay = document.createElement('div'); + overlay.className = '__browse_heatmap__'; + overlay.style.cssText = ` + position: absolute; top: ${box.y}px; left: ${box.x}px; + width: ${box.width}px; height: ${box.height}px; + border: 2px solid ${colors.border}; background: ${colors.bg}; + pointer-events: none; z-index: 99999; + font-size: 10px; color: ${colors.border}; font-weight: bold; + `; + const label = document.createElement('span'); + label.textContent = ref; + label.style.cssText = `position: absolute; top: -14px; left: 0; background: ${colors.border}; color: white; padding: 0 3px; font-size: 10px;`; + overlay.appendChild(label); + document.body.appendChild(overlay); + } + }, boxes); + + await page.screenshot({ path: heatmapPath, fullPage: true }); + + // Remove heatmap overlays + await page.evaluate(() => { + document.querySelectorAll('.__browse_heatmap__').forEach(el => el.remove()); + }); + + output.push(''); + output.push(`[heatmap screenshot: ${heatmapPath}]`); + } catch (err: any) { + // Cleanup on failure + try { + await page.evaluate(() => { + document.querySelectorAll('.__browse_heatmap__').forEach(el => el.remove()); + }); + } catch {} + if (!err?.message?.includes('closed') && !err?.message?.includes('Target') && !err?.message?.includes('Execution context') && !err?.message?.includes('screenshot')) throw err; + } + } + // ─── Diff mode (-D) ─────────────────────────────────────── if (opts.diff) { const lastSnapshot = session.getLastSnapshot(); diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts index 1da5db6d..c78c1873 100644 --- a/test/skill-validation.test.ts +++ b/test/skill-validation.test.ts @@ -143,6 +143,7 @@ describe('Command registry consistency', () => { const validKeys = new Set([ 'interactive', 'compact', 'depth', 'selector', 'diff', 'annotate', 'outputPath', 'cursorInteractive', + 'heatmap', ]); for (const flag of SNAPSHOT_FLAGS) { expect(validKeys.has(flag.optionKey)).toBe(true);