From 6742d6a827c13ebb27f477a7ca67fde7a373f588 Mon Sep 17 00:00:00 2001 From: anthonyonazure Date: Tue, 14 Apr 2026 04:50:42 -0700 Subject: [PATCH] Reduce false positives in DOM, storage, URL, and keyword scanners - Skip known CSRF tokens (authenticity_token, csrf_token, etc.) in hidden input scanner - Ignore GitHub localStorage caches (ref-selector:*, jump_to:*, soft-nav:*, COPILOT_*) - Skip keyboard shortcut data-attributes (data-hotkey, data-hotkey-scope) - Fix URL param scanner: use exact match instead of substring to prevent "author" matching "auth" - Add word boundaries to keyword scanner so "key" doesn't match "hotkey", "monkey", etc. - Skip camelCase JS identifiers in keyword value matches - Lower Sentry DSN severity to "low" (public by design) - Apply same fixes to MutationObserver for SPA consistency Co-Authored-By: Claude Opus 4.6 (1M context) --- js/content.js | 81 ++++++++++++++++++++++++++++++++++++++++++++++---- js/patterns.js | 2 +- 2 files changed, 76 insertions(+), 7 deletions(-) diff --git a/js/content.js b/js/content.js index 5593a54..23d713e 100644 --- a/js/content.js +++ b/js/content.js @@ -87,14 +87,17 @@ for (const kw of keywords) { const escaped = kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + // Require word boundary around keyword to avoid matching "hotkey", "monkey", "turkey" for "key" const kwRegex = new RegExp( - `(?:${escaped})\\s*[:=]\\s*['"\`]([^'"\`\\n]{8,200})['"\`]`, + `(?:^|[^a-zA-Z])(?:${escaped})(?:[^a-zA-Z]|$)\\s*[:=]\\s*['"\`]([^'"\`\\n]{8,200})['"\`]`, "gi" ); let m; while ((m = kwRegex.exec(text)) !== null) { const val = m[1]; if (isFalsePositive(val)) continue; + // Skip values that look like JS function/method names (camelCase identifiers) + if (/^[a-z][a-zA-Z0-9_]*$/.test(val) && val.length < 60) continue; report({ url: sourceUrl, match: val.substring(0, 200), @@ -185,7 +188,16 @@ const name = (input.name || input.id || "").toLowerCase(); const value = input.value; if (!value || value.length < 8) continue; - const sensitive = ["token", "csrf", "api_key", "apikey", "secret", "auth", "session", "nonce", "key", "access_token"]; + // Skip known framework CSRF tokens — these are ephemeral anti-CSRF nonces, not secrets + const csrfNames = ["authenticity_token", "csrf_token", "csrf", "_csrf", "__requestverificationtoken", + "csrfmiddlewaretoken", "react-codespace-csrf", "_token", "xsrf-token", "anticsrf"]; + if (csrfNames.some((c) => name === c || name.startsWith(c))) continue; + // Skip common non-secret hidden fields + const benignNames = ["return_to", "redirect", "redirect_uri", "next", "ref", "referer", + "utm_source", "utm_medium", "utm_campaign", "notice_name", "host", "method", + "pinned_items_id_and_type[]", "repo_topics[]", "timestamp_secret"]; + if (benignNames.some((b) => name === b || name.startsWith(b))) continue; + const sensitive = ["api_key", "apikey", "secret_key", "access_token", "private_key", "password"]; if (sensitive.some((s) => name.includes(s)) || isHighEntropy(value)) { report({ url: pageUrl, match: `${name}=${value.substring(0, 100)}`, @@ -200,10 +212,20 @@ function scanDataAttributes() { const all = document.querySelectorAll("*"); + // Attribute names that contain "key" but are not secrets + const ignoredAttrs = [ + "data-hotkey", "data-hotkey-scope", "data-hotkey-within", // Keyboard shortcuts + "data-provider-key", // UI provider identifiers + "data-pjax-key", "data-turbo-key", // Framework routing keys + ]; for (const el of all) { for (const attr of el.attributes) { if (!/^data-.*(?:key|token|secret|auth|api|credential|password)/i.test(attr.name)) continue; if (!attr.value || attr.value.length < 8) continue; + // Skip known non-secret data attributes + if (ignoredAttrs.includes(attr.name)) continue; + // Skip if the value looks like a keyboard shortcut (contains Mod+, Shift+, etc.) + if (/(?:Mod|Shift|Alt|Ctrl|Meta)\+/i.test(attr.value)) continue; report({ url: pageUrl, match: `${attr.name}="${attr.value.substring(0, 100)}"`, type: "data-attribute", patternName: "Sensitive Data Attribute", @@ -226,6 +248,12 @@ function scanLinkHrefs() { const links = document.querySelectorAll("a[href], link[href]"); + // URL param names that look sensitive but aren't + const benignParams = ["author", "assignee", "reviewer", "creator", "user", "username", + "sort", "order", "page", "per_page", "tab", "type", "language", "q", "query", + "ref", "branch", "path", "since", "until", "direction", "state", "label", + "source", "plan", "return_to", "redirect", "onload", "render", "style", + "method", "host", "fromHostedPage", "countryBlackList"]; for (const link of links) { try { const href = link.href; @@ -233,8 +261,10 @@ const url = new URL(href); for (const [param, value] of url.searchParams) { const p = param.toLowerCase(); - const sensitive = ["key", "api_key", "apikey", "token", "secret", "access_token", "auth", "password", "session_id"]; - if (sensitive.some((s) => p.includes(s)) && value.length >= 8) { + if (benignParams.includes(p)) continue; + const sensitive = ["api_key", "apikey", "token", "secret", "access_token", "password", "session_id", "private_key"]; + // Require exact match on the param name, not substring — "author" was matching "auth" + if (sensitive.some((s) => p === s || p.endsWith(`_${s}`) || p.startsWith(`${s}_`)) && value.length >= 8) { report({ url: href, match: `${param}=${value.substring(0, 100)}`, type: "url-param", patternName: "Sensitive URL Parameter", @@ -251,6 +281,28 @@ { store: localStorage, label: "localStorage" }, { store: sessionStorage, label: "sessionStorage" }, ]; + // Keys that are known non-sensitive framework/platform storage — never flag these + const ignoredKeyPrefixes = [ + "ref-selector:", // GitHub branch selector cache + "jump_to:", // GitHub navigation cache + "soft-nav:", // GitHub SPA navigation state + "react-router-scroll", // React Router scroll positions + "COPILOT_SELECTED_MODEL", // GitHub Copilot UI preference + "rc::", // reCAPTCHA state + "debug:", // Debug flags + "ajs_", // Analytics.js state + "_ga", // Google Analytics + "intercom", // Intercom chat widget + "amplitude_", // Amplitude analytics + "mp_", // Mixpanel + "optimizely", // Optimizely experiments + ]; + // Specific exact keys that look sensitive but aren't + const ignoredExactKeys = [ + "COPILOT_AUTH_TOKEN", // GitHub Copilot ephemeral session (browser-local, not extractable) + "COPILOT_AUTH_TOKEN:expiry", + "id", // Generic session IDs in iframes (e.g., Stripe m.stripe.network) + ]; for (const { store, label } of stores) { try { for (let i = 0; i < store.length; i++) { @@ -258,7 +310,14 @@ const value = store.getItem(key); if (!value || value.length < 12) continue; const kl = key.toLowerCase(); - const sensitive = ["token", "key", "secret", "auth", "session", "credential", "password", "jwt", "bearer"]; + // Skip known benign keys + if (ignoredKeyPrefixes.some((p) => key.startsWith(p))) continue; + if (ignoredExactKeys.includes(key)) continue; + // Skip keys whose values are clearly JSON branch/ref data (GitHub caches) + if (value.startsWith('{"refs":') || value.startsWith('{"billing":')) continue; + const sensitive = ["token", "secret", "auth", "credential", "password", "jwt", "bearer", "private_key"]; + // Require a stronger match — "key" alone is too broad (matches "hotkey", "monkey", etc.) + // Remove "key" and "session" from sensitive list to reduce noise if (sensitive.some((s) => kl.includes(s)) || isHighEntropy(value.substring(0, 100))) { report({ url: pageUrl, match: `${label}.${key}=${value.substring(0, 120)}`, @@ -360,7 +419,14 @@ const name = (input.name || input.id || "").toLowerCase(); const value = input.value; if (!value || value.length < 8) continue; - const sensitive = ["token", "csrf", "api_key", "apikey", "secret", "auth", "session", "nonce", "key", "access_token"]; + // Skip known CSRF tokens + const csrfNames = ["authenticity_token", "csrf_token", "csrf", "_csrf", "__requestverificationtoken", + "csrfmiddlewaretoken", "react-codespace-csrf", "_token", "xsrf-token", "anticsrf"]; + if (csrfNames.some((c) => name === c || name.startsWith(c))) continue; + const benignNames = ["return_to", "redirect", "redirect_uri", "next", "ref", + "notice_name", "host", "method", "pinned_items_id_and_type[]", "repo_topics[]", "timestamp_secret"]; + if (benignNames.some((b) => name === b || name.startsWith(b))) continue; + const sensitive = ["api_key", "apikey", "secret_key", "access_token", "private_key", "password"]; if (sensitive.some((s) => name.includes(s)) || isHighEntropy(value)) { report({ url: pageUrl, match: `${name}=${value.substring(0, 100)}`, @@ -373,11 +439,14 @@ } // Scan data attributes on new elements const elementsToCheck = node.querySelectorAll ? [node, ...node.querySelectorAll("*")] : [node]; + const ignoredAttrsMut = ["data-hotkey", "data-hotkey-scope", "data-hotkey-within", "data-provider-key", "data-pjax-key", "data-turbo-key"]; for (const el of elementsToCheck) { if (!el.attributes) continue; for (const attr of el.attributes) { if (!/^data-.*(?:key|token|secret|auth|api|credential|password)/i.test(attr.name)) continue; if (!attr.value || attr.value.length < 8) continue; + if (ignoredAttrsMut.includes(attr.name)) continue; + if (/(?:Mod|Shift|Alt|Ctrl|Meta)\+/i.test(attr.value)) continue; report({ url: pageUrl, match: `${attr.name}="${attr.value.substring(0, 100)}"`, type: "data-attribute", patternName: "Sensitive Data Attribute", diff --git a/js/patterns.js b/js/patterns.js index 2afcc7f..0ea9289 100644 --- a/js/patterns.js +++ b/js/patterns.js @@ -76,7 +76,7 @@ const SECRET_PATTERNS = [ { name: "Shopify Private App Token", re: /\bshppa_[a-fA-F0-9]{32}\b/g, severity: "critical", confidence: "high", provider: "Shopify" }, { name: "Shopify Shared Secret", re: /\bshpss_[a-fA-F0-9]{32}\b/g, severity: "critical", confidence: "high", provider: "Shopify" }, - { name: "Sentry DSN", re: /https:\/\/[0-9a-f]{32}@(?:o[0-9]+\.)?(?:sentry\.io|[a-z0-9.-]+)\/[0-9]+/g, severity: "medium", confidence: "high", provider: "Sentry" }, + { name: "Sentry DSN", re: /https:\/\/[0-9a-f]{32}@(?:o[0-9]+\.)?(?:sentry\.io|[a-z0-9.-]+)\/[0-9]+/g, severity: "low", confidence: "high", provider: "Sentry" }, { name: "Sentry Auth Token", re: /\bsntrys_[A-Za-z0-9_]{64,}\b/g, severity: "high", confidence: "high", provider: "Sentry" }, { name: "New Relic API Key", re: /\bNRAK-[A-Z0-9]{27}\b/g, severity: "high", confidence: "high", provider: "New Relic" },