Reduce false positives in DOM, storage, URL, and keyword scanners

- Skip known CSRF tokens (authenticity_token, csrf_token, etc.) in hidden input scanner
- Ignore GitHub localStorage caches (ref-selector:*, jump_to:*, soft-nav:*, COPILOT_*)
- Skip keyboard shortcut data-attributes (data-hotkey, data-hotkey-scope)
- Fix URL param scanner: use exact match instead of substring to prevent "author" matching "auth"
- Add word boundaries to keyword scanner so "key" doesn't match "hotkey", "monkey", etc.
- Skip camelCase JS identifiers in keyword value matches
- Lower Sentry DSN severity to "low" (public by design)
- Apply same fixes to MutationObserver for SPA consistency

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
anthonyonazure
2026-04-14 04:50:42 -07:00
committed by moamen
parent 0d5ca42faa
commit 6742d6a827
2 changed files with 76 additions and 7 deletions
+75 -6
View File
@@ -87,14 +87,17 @@
for (const kw of keywords) {
const escaped = kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
// Require word boundary around keyword to avoid matching "hotkey", "monkey", "turkey" for "key"
const kwRegex = new RegExp(
`(?:${escaped})\\s*[:=]\\s*['"\`]([^'"\`\\n]{8,200})['"\`]`,
`(?:^|[^a-zA-Z])(?:${escaped})(?:[^a-zA-Z]|$)\\s*[:=]\\s*['"\`]([^'"\`\\n]{8,200})['"\`]`,
"gi"
);
let m;
while ((m = kwRegex.exec(text)) !== null) {
const val = m[1];
if (isFalsePositive(val)) continue;
// Skip values that look like JS function/method names (camelCase identifiers)
if (/^[a-z][a-zA-Z0-9_]*$/.test(val) && val.length < 60) continue;
report({
url: sourceUrl,
match: val.substring(0, 200),
@@ -185,7 +188,16 @@
const name = (input.name || input.id || "").toLowerCase();
const value = input.value;
if (!value || value.length < 8) continue;
const sensitive = ["token", "csrf", "api_key", "apikey", "secret", "auth", "session", "nonce", "key", "access_token"];
// Skip known framework CSRF tokens — these are ephemeral anti-CSRF nonces, not secrets
const csrfNames = ["authenticity_token", "csrf_token", "csrf", "_csrf", "__requestverificationtoken",
"csrfmiddlewaretoken", "react-codespace-csrf", "_token", "xsrf-token", "anticsrf"];
if (csrfNames.some((c) => name === c || name.startsWith(c))) continue;
// Skip common non-secret hidden fields
const benignNames = ["return_to", "redirect", "redirect_uri", "next", "ref", "referer",
"utm_source", "utm_medium", "utm_campaign", "notice_name", "host", "method",
"pinned_items_id_and_type[]", "repo_topics[]", "timestamp_secret"];
if (benignNames.some((b) => name === b || name.startsWith(b))) continue;
const sensitive = ["api_key", "apikey", "secret_key", "access_token", "private_key", "password"];
if (sensitive.some((s) => name.includes(s)) || isHighEntropy(value)) {
report({
url: pageUrl, match: `${name}=${value.substring(0, 100)}`,
@@ -200,10 +212,20 @@
function scanDataAttributes() {
const all = document.querySelectorAll("*");
// Attribute names that contain "key" but are not secrets
const ignoredAttrs = [
"data-hotkey", "data-hotkey-scope", "data-hotkey-within", // Keyboard shortcuts
"data-provider-key", // UI provider identifiers
"data-pjax-key", "data-turbo-key", // Framework routing keys
];
for (const el of all) {
for (const attr of el.attributes) {
if (!/^data-.*(?:key|token|secret|auth|api|credential|password)/i.test(attr.name)) continue;
if (!attr.value || attr.value.length < 8) continue;
// Skip known non-secret data attributes
if (ignoredAttrs.includes(attr.name)) continue;
// Skip if the value looks like a keyboard shortcut (contains Mod+, Shift+, etc.)
if (/(?:Mod|Shift|Alt|Ctrl|Meta)\+/i.test(attr.value)) continue;
report({
url: pageUrl, match: `${attr.name}="${attr.value.substring(0, 100)}"`,
type: "data-attribute", patternName: "Sensitive Data Attribute",
@@ -226,6 +248,12 @@
function scanLinkHrefs() {
const links = document.querySelectorAll("a[href], link[href]");
// URL param names that look sensitive but aren't
const benignParams = ["author", "assignee", "reviewer", "creator", "user", "username",
"sort", "order", "page", "per_page", "tab", "type", "language", "q", "query",
"ref", "branch", "path", "since", "until", "direction", "state", "label",
"source", "plan", "return_to", "redirect", "onload", "render", "style",
"method", "host", "fromHostedPage", "countryBlackList"];
for (const link of links) {
try {
const href = link.href;
@@ -233,8 +261,10 @@
const url = new URL(href);
for (const [param, value] of url.searchParams) {
const p = param.toLowerCase();
const sensitive = ["key", "api_key", "apikey", "token", "secret", "access_token", "auth", "password", "session_id"];
if (sensitive.some((s) => p.includes(s)) && value.length >= 8) {
if (benignParams.includes(p)) continue;
const sensitive = ["api_key", "apikey", "token", "secret", "access_token", "password", "session_id", "private_key"];
// Require exact match on the param name, not substring — "author" was matching "auth"
if (sensitive.some((s) => p === s || p.endsWith(`_${s}`) || p.startsWith(`${s}_`)) && value.length >= 8) {
report({
url: href, match: `${param}=${value.substring(0, 100)}`,
type: "url-param", patternName: "Sensitive URL Parameter",
@@ -251,6 +281,28 @@
{ store: localStorage, label: "localStorage" },
{ store: sessionStorage, label: "sessionStorage" },
];
// Keys that are known non-sensitive framework/platform storage — never flag these
const ignoredKeyPrefixes = [
"ref-selector:", // GitHub branch selector cache
"jump_to:", // GitHub navigation cache
"soft-nav:", // GitHub SPA navigation state
"react-router-scroll", // React Router scroll positions
"COPILOT_SELECTED_MODEL", // GitHub Copilot UI preference
"rc::", // reCAPTCHA state
"debug:", // Debug flags
"ajs_", // Analytics.js state
"_ga", // Google Analytics
"intercom", // Intercom chat widget
"amplitude_", // Amplitude analytics
"mp_", // Mixpanel
"optimizely", // Optimizely experiments
];
// Specific exact keys that look sensitive but aren't
const ignoredExactKeys = [
"COPILOT_AUTH_TOKEN", // GitHub Copilot ephemeral session (browser-local, not extractable)
"COPILOT_AUTH_TOKEN:expiry",
"id", // Generic session IDs in iframes (e.g., Stripe m.stripe.network)
];
for (const { store, label } of stores) {
try {
for (let i = 0; i < store.length; i++) {
@@ -258,7 +310,14 @@
const value = store.getItem(key);
if (!value || value.length < 12) continue;
const kl = key.toLowerCase();
const sensitive = ["token", "key", "secret", "auth", "session", "credential", "password", "jwt", "bearer"];
// Skip known benign keys
if (ignoredKeyPrefixes.some((p) => key.startsWith(p))) continue;
if (ignoredExactKeys.includes(key)) continue;
// Skip keys whose values are clearly JSON branch/ref data (GitHub caches)
if (value.startsWith('{"refs":') || value.startsWith('{"billing":')) continue;
const sensitive = ["token", "secret", "auth", "credential", "password", "jwt", "bearer", "private_key"];
// Require a stronger match — "key" alone is too broad (matches "hotkey", "monkey", etc.)
// Remove "key" and "session" from sensitive list to reduce noise
if (sensitive.some((s) => kl.includes(s)) || isHighEntropy(value.substring(0, 100))) {
report({
url: pageUrl, match: `${label}.${key}=${value.substring(0, 120)}`,
@@ -360,7 +419,14 @@
const name = (input.name || input.id || "").toLowerCase();
const value = input.value;
if (!value || value.length < 8) continue;
const sensitive = ["token", "csrf", "api_key", "apikey", "secret", "auth", "session", "nonce", "key", "access_token"];
// Skip known CSRF tokens
const csrfNames = ["authenticity_token", "csrf_token", "csrf", "_csrf", "__requestverificationtoken",
"csrfmiddlewaretoken", "react-codespace-csrf", "_token", "xsrf-token", "anticsrf"];
if (csrfNames.some((c) => name === c || name.startsWith(c))) continue;
const benignNames = ["return_to", "redirect", "redirect_uri", "next", "ref",
"notice_name", "host", "method", "pinned_items_id_and_type[]", "repo_topics[]", "timestamp_secret"];
if (benignNames.some((b) => name === b || name.startsWith(b))) continue;
const sensitive = ["api_key", "apikey", "secret_key", "access_token", "private_key", "password"];
if (sensitive.some((s) => name.includes(s)) || isHighEntropy(value)) {
report({
url: pageUrl, match: `${name}=${value.substring(0, 100)}`,
@@ -373,11 +439,14 @@
}
// Scan data attributes on new elements
const elementsToCheck = node.querySelectorAll ? [node, ...node.querySelectorAll("*")] : [node];
const ignoredAttrsMut = ["data-hotkey", "data-hotkey-scope", "data-hotkey-within", "data-provider-key", "data-pjax-key", "data-turbo-key"];
for (const el of elementsToCheck) {
if (!el.attributes) continue;
for (const attr of el.attributes) {
if (!/^data-.*(?:key|token|secret|auth|api|credential|password)/i.test(attr.name)) continue;
if (!attr.value || attr.value.length < 8) continue;
if (ignoredAttrsMut.includes(attr.name)) continue;
if (/(?:Mod|Shift|Alt|Ctrl|Meta)\+/i.test(attr.value)) continue;
report({
url: pageUrl, match: `${attr.name}="${attr.value.substring(0, 100)}"`,
type: "data-attribute", patternName: "Sensitive Data Attribute",
+1 -1
View File
@@ -76,7 +76,7 @@ const SECRET_PATTERNS = [
{ name: "Shopify Private App Token", re: /\bshppa_[a-fA-F0-9]{32}\b/g, severity: "critical", confidence: "high", provider: "Shopify" },
{ name: "Shopify Shared Secret", re: /\bshpss_[a-fA-F0-9]{32}\b/g, severity: "critical", confidence: "high", provider: "Shopify" },
{ name: "Sentry DSN", re: /https:\/\/[0-9a-f]{32}@(?:o[0-9]+\.)?(?:sentry\.io|[a-z0-9.-]+)\/[0-9]+/g, severity: "medium", confidence: "high", provider: "Sentry" },
{ name: "Sentry DSN", re: /https:\/\/[0-9a-f]{32}@(?:o[0-9]+\.)?(?:sentry\.io|[a-z0-9.-]+)\/[0-9]+/g, severity: "low", confidence: "high", provider: "Sentry" },
{ name: "Sentry Auth Token", re: /\bsntrys_[A-Za-z0-9_]{64,}\b/g, severity: "high", confidence: "high", provider: "Sentry" },
{ name: "New Relic API Key", re: /\bNRAK-[A-Z0-9]{27}\b/g, severity: "high", confidence: "high", provider: "New Relic" },