From 0940d216ea31240b218842b4db2acb49ed8c6ffa Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 29 Mar 2026 23:44:37 -0700 Subject: [PATCH] feat: aggressive cleanup heuristics + preserve top nav bar Deterministic cleanup improvements (used as first pass before LLM analysis): - New 'clutter' category: audio players, podcast widgets, sidebar puzzles/games, recirculation widgets (taboola, outbrain, nativo), cross-promotion banners - Text-content detection: removes "ADVERTISEMENT", "Article continues below", "Sponsored", "Paid content" labels and their parent wrappers - Sticky fix: preserves the topmost full-width element near viewport top (site nav bar) instead of hiding all sticky/fixed elements. Sorts by vertical position, preserves the first one that spans >80% viewport width. Tests: clutter category, ad label removal, nav bar preservation logic. Co-Authored-By: Claude Opus 4.6 (1M context) --- browse/src/write-commands.ts | 90 +++++++++++++++++++++++++++++----- browse/test/sidebar-ux.test.ts | 52 +++++++++++++++----- 2 files changed, 116 insertions(+), 26 deletions(-) diff --git a/browse/src/write-commands.ts b/browse/src/write-commands.ts index 02fba77c..19283fef 100644 --- a/browse/src/write-commands.ts +++ b/browse/src/write-commands.ts @@ -98,6 +98,26 @@ const CLEANUP_SELECTORS = { // App download banners '[class*="app-banner"]', '[class*="smart-banner"]', '[class*="app-download"]', '[id*="branch-banner"]', '.smartbanner', + // Cross-promotion / "follow us" / "preferred source" widgets + '[class*="promo-banner"]', '[class*="cross-promo"]', '[class*="partner-promo"]', + '[class*="preferred-source"]', '[class*="google-promo"]', + ], + clutter: [ + // Audio/podcast player widgets (not part of the article text) + '[class*="audio-player"]', '[class*="podcast-player"]', '[class*="listen-widget"]', + '[class*="everlit"]', '[class*="Everlit"]', + 'audio', // bare audio elements + // Sidebar games/puzzles widgets + '[class*="puzzle"]', '[class*="daily-game"]', '[class*="games-widget"]', + '[class*="crossword-promo"]', '[class*="mini-game"]', + // "Most Popular" / "Trending" sidebar recirculation (not the top nav trending bar) + 'aside [class*="most-popular"]', 'aside [class*="trending"]', + 'aside [class*="most-read"]', 'aside [class*="recommended"]', + // Related articles / recirculation at bottom + '[class*="related-articles"]', '[class*="more-stories"]', + '[class*="recirculation"]', '[class*="taboola"]', '[class*="outbrain"]', + // Hearst-specific (SF Chronicle, etc.) + '[class*="nativo"]', '[data-tb-region]', ], sticky: [ // Handled via JavaScript evaluation, not pure selectors @@ -486,7 +506,7 @@ export async function handleWriteCommand( case 'cleanup': { // Parse flags let doAds = false, doCookies = false, doSticky = false, doSocial = false; - let doOverlays = false; + let doOverlays = false, doClutter = false; let doAll = false; // Default to --all if no args (most common use case from sidebar button) @@ -501,14 +521,15 @@ export async function handleWriteCommand( case '--sticky': doSticky = true; break; case '--social': doSocial = true; break; case '--overlays': doOverlays = true; break; + case '--clutter': doClutter = true; break; case '--all': doAll = true; break; default: - throw new Error(`Unknown cleanup flag: ${arg}. Use: --ads, --cookies, --sticky, --social, --overlays, --all`); + throw new Error(`Unknown cleanup flag: ${arg}. Use: --ads, --cookies, --sticky, --social, --overlays, --clutter, --all`); } } if (doAll) { - doAds = doCookies = doSticky = doSocial = doOverlays = true; + doAds = doCookies = doSticky = doSocial = doOverlays = doClutter = true; } const removed: string[] = []; @@ -519,6 +540,7 @@ export async function handleWriteCommand( if (doCookies) selectors.push(...CLEANUP_SELECTORS.cookies); if (doSocial) selectors.push(...CLEANUP_SELECTORS.social); if (doOverlays) selectors.push(...CLEANUP_SELECTORS.overlays); + if (doClutter) selectors.push(...CLEANUP_SELECTORS.clutter); if (selectors.length > 0) { const count = await page.evaluate((sels: string[]) => { @@ -539,6 +561,7 @@ export async function handleWriteCommand( if (doCookies) removed.push('cookie banners'); if (doSocial) removed.push('social widgets'); if (doOverlays) removed.push('overlays/popups'); + if (doClutter) removed.push('clutter'); } } @@ -546,23 +569,36 @@ export async function handleWriteCommand( if (doSticky) { const stickyCount = await page.evaluate(() => { let removed = 0; + // Collect all sticky/fixed elements, sort by vertical position + const stickyEls: Array<{ el: Element; top: number; width: number; height: number }> = []; const allElements = document.querySelectorAll('*'); + const viewportWidth = window.innerWidth; for (const el of allElements) { const style = getComputedStyle(el); if (style.position === 'fixed' || style.position === 'sticky') { - const tag = el.tagName.toLowerCase(); - // Skip main nav/header elements at the top of the page - if (tag === 'nav' || tag === 'header') continue; - if (el.getAttribute('role') === 'navigation') continue; - // Skip elements at the very top that look like navbars const rect = el.getBoundingClientRect(); - if (rect.top <= 10 && rect.height < 100 && tag !== 'div') continue; - // Skip the gstack control indicator - if (el.id === 'gstack-ctrl') continue; - (el as HTMLElement).style.setProperty('display', 'none', 'important'); - removed++; + stickyEls.push({ el, top: rect.top, width: rect.width, height: rect.height }); } } + // Sort by vertical position (topmost first) + stickyEls.sort((a, b) => a.top - b.top); + let preservedTopNav = false; + for (const { el, top, width, height } of stickyEls) { + const tag = el.tagName.toLowerCase(); + // Always skip nav/header semantic elements + if (tag === 'nav' || tag === 'header') continue; + if (el.getAttribute('role') === 'navigation') continue; + // Skip the gstack control indicator + if ((el as HTMLElement).id === 'gstack-ctrl') continue; + // Preserve the FIRST full-width element near the top (site's main nav bar) + // This catches divs that act as navbars but aren't semantic