Files
NeuroSploit/backend/core/xss_context_analyzer.py
2026-02-11 10:47:33 -03:00

445 lines
17 KiB
Python

"""
NeuroSploit v3 - XSS Context Analyzer
Determines whether a payload reflected in HTML is in an executable position
(auto-executing, interactive, or non-executable text content).
Used by XSS testers and response verifier for context-aware validation.
"""
import re
from typing import Dict, Optional
# Auto-executing events (fire without user interaction)
AUTO_FIRE_EVENTS = {
"onload", "onerror", "onabort", "onbegin", "onend", "onanimationend",
"onanimationstart", "ontransitionend", "onhashchange", "onpageshow",
"onpopstate", "onresize", "onscroll", "onstorage", "onunload",
"ontoggle", # when paired with <details open>
}
# Interactive events (require user action)
INTERACTIVE_EVENTS = {
"onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover",
"onmousemove", "onmouseout", "onmouseenter", "onmouseleave",
"onkeypress", "onkeydown", "onkeyup", "onfocus", "onblur",
"onchange", "onsubmit", "onreset", "onselect", "oninput",
"oncontextmenu", "oncopy", "oncut", "onpaste", "ondrag", "ondrop",
"onpointerdown", "onpointerup", "onpointerover", "onpointermove",
"ontouchstart", "ontouchend", "ontouchmove", "onfocusin", "onfocusout",
"onauxclick", "onsearch",
}
ALL_EVENTS = AUTO_FIRE_EVENTS | INTERACTIVE_EVENTS
# Tags that auto-fire events
AUTO_FIRE_TAGS = {
"script": True, # auto-executes content
"img": {"onerror"},
"video": {"onerror"},
"audio": {"onerror"},
"source": {"onerror"},
"object": {"onerror"},
"embed": {"onerror"},
"body": {"onload"},
"svg": {"onload"},
"math": set(),
"input": {"onfocus"}, # with autofocus
"select": {"onfocus"},
"textarea": {"onfocus"},
"details": {"ontoggle"}, # with open attribute
}
# Safe containers that suppress execution
SAFE_CONTAINERS = {"textarea", "title", "noscript", "xmp", "plaintext", "listing"}
# Pattern to find the innermost enclosing tag
_RE_BEFORE_TAG = re.compile(r'<(\w+)(?:\s[^>]*)?>(?=[^<]*$)', re.IGNORECASE)
_RE_OPEN_SCRIPT = re.compile(r'<script\b[^>]*>', re.IGNORECASE)
_RE_CLOSE_SCRIPT = re.compile(r'</script\b', re.IGNORECASE)
_RE_COMMENT_OPEN = re.compile(r'<!--(?!.*-->)', re.DOTALL)
_RE_STYLE_OPEN = re.compile(r'<style\b[^>]*>', re.IGNORECASE)
_RE_STYLE_CLOSE = re.compile(r'</style\b', re.IGNORECASE)
_RE_EVENT_ATTR = re.compile(r'(on\w+)\s*=\s*["\']?', re.IGNORECASE)
_RE_JS_URI = re.compile(r'(?:href|src|action|formaction)\s*=\s*["\']?\s*javascript:', re.IGNORECASE)
def analyze_xss_execution_context(
html_body: str,
payload: str,
payload_lower: Optional[str] = None,
) -> Dict:
"""
Determine whether a payload reflected in HTML is in an executable position.
Returns:
{
"executable": bool, # True if payload can auto-execute (no user action)
"interactive": bool, # True if payload executes WITH user interaction
"context": str, # Context identifier
"confidence": float, # 0.0 - 1.0
"detail": str, # Human-readable explanation
}
"""
result = {
"executable": False,
"interactive": False,
"context": "not_found",
"confidence": 0.0,
"detail": "Payload not found in response",
}
if not html_body or not payload:
return result
if payload_lower is None:
payload_lower = payload.lower()
body_lower = html_body.lower()
# Find payload position (try exact first, then case-insensitive)
pos = html_body.find(payload)
if pos == -1:
pos = body_lower.find(payload_lower)
if pos == -1:
return result
# Extract surrounding context
before_start = max(0, pos - 300)
after_end = min(len(html_body), pos + len(payload) + 150)
before = html_body[before_start:pos]
after = html_body[pos + len(payload):after_end]
before_lower = before.lower()
after_lower = after.lower()
# Check for HTML encoding of the payload
encoded_payload = payload.replace("<", "&lt;").replace(">", "&gt;")
if encoded_payload != payload and encoded_payload in html_body:
# The payload appears HTML-encoded
result.update({
"context": "encoded",
"confidence": 0.1,
"detail": f"Payload appears HTML-encoded (&lt;/&gt;)",
})
return result
# --- Check 1: Inside HTML comment ---
if "<!--" in before and "-->" not in before[before.rfind("<!--"):]:
result.update({
"context": "html_comment",
"confidence": 0.1,
"detail": "Payload inside HTML comment",
})
return result
# --- Check 2: Inside <script> tag ---
script_opens = list(_RE_OPEN_SCRIPT.finditer(before))
script_closes = list(_RE_CLOSE_SCRIPT.finditer(before))
if script_opens:
last_open = script_opens[-1].end()
last_close = script_closes[-1].start() if script_closes else -1
if last_open > last_close:
# We're inside a <script> block
# Check if payload breaks out of a JS string
if _payload_breaks_js_string(before[last_open:], payload):
result.update({
"executable": True,
"context": "script_breakout",
"confidence": 0.95,
"detail": "Payload breaks out of JS string inside <script> tag",
})
return result
# Check if payload introduces new code (not just a data value)
if any(kw in payload_lower for kw in ["alert(", "confirm(", "prompt(", "eval(", "function(", "document.", "window."]):
result.update({
"executable": True,
"context": "script_body",
"confidence": 0.90,
"detail": "Payload with JS execution inside <script> tag",
})
return result
result.update({
"executable": True,
"context": "script_body",
"confidence": 0.85,
"detail": "Payload inside <script> tag",
})
return result
# --- Check 3: Inside <style> tag (safe) ---
style_opens = list(_RE_STYLE_OPEN.finditer(before_lower))
style_closes = list(_RE_STYLE_CLOSE.finditer(before_lower))
if style_opens:
last_open = style_opens[-1].end()
last_close = style_closes[-1].start() if style_closes else -1
if last_open > last_close:
result.update({
"context": "safe_container",
"confidence": 0.1,
"detail": "Payload inside <style> tag",
})
return result
# --- Check 4: Inside a safe container ---
for container in SAFE_CONTAINERS:
open_pat = f"<{container}"
close_pat = f"</{container}"
if open_pat in before_lower:
last_open = before_lower.rfind(open_pat)
last_close = before_lower.rfind(close_pat)
if last_open > last_close:
result.update({
"context": "safe_container",
"confidence": 0.1,
"detail": f"Payload inside <{container}> (safe container)",
})
return result
# --- Check 5: Payload itself introduces a new HTML tag ---
if "<" in payload:
return _analyze_injected_tag(payload, payload_lower, result)
# --- Check 6: Determine if we're inside an HTML tag (attributes) or text content ---
# Find the last `<` in `before` and check if there's a `>` after it
last_lt = before.rfind("<")
in_tag = False
tag_name = ""
tag_region_before = ""
if last_lt >= 0:
# Text between last < and payload position
tag_region_before = before[last_lt:]
# If no > after the last <, we're inside an open tag (attribute region)
if ">" not in tag_region_before:
in_tag = True
# Extract tag name
tm = re.match(r'<(\w+)', tag_region_before)
if tm:
tag_name = tm.group(1).lower()
if in_tag and tag_name:
# We're inside a tag's attribute region
# Build the full attribute region: from <tag... to the closing >
first_gt = after.find(">")
after_to_close = after[:first_gt] if first_gt >= 0 else after
full_attr = tag_region_before + payload + after_to_close
full_attr_lower = full_attr.lower()
# Check if payload is the VALUE of an event handler attribute
# Look for on*= patterns in the text BEFORE the payload (within the tag)
before_in_tag = tag_region_before.lower()
for m in _RE_EVENT_ATTR.finditer(before_in_tag):
event_name = m.group(1).lower()
# This event is BEFORE the payload — payload is (part of) its value
if event_name in AUTO_FIRE_EVENTS:
result.update({
"executable": True,
"interactive": False,
"context": "event_handler_auto",
"confidence": 0.95,
"detail": f"Payload is value of auto-firing event '{event_name}' on <{tag_name}>",
})
return result
elif event_name in INTERACTIVE_EVENTS:
result.update({
"executable": False,
"interactive": True,
"context": "event_handler",
"confidence": 0.90,
"detail": f"Payload is value of interactive event '{event_name}' on <{tag_name}> (requires user action)",
})
return result
# Check if we're inside a javascript: URI attribute
if _RE_JS_URI.search(before_in_tag):
result.update({
"executable": False,
"interactive": True,
"context": "javascript_uri",
"confidence": 0.90,
"detail": f"Payload inside javascript: URI on <{tag_name}>",
})
return result
# Check if payload creates an event handler via attribute breakout
if _payload_creates_event(payload_lower):
# Check if autofocus is also present (makes onfocus auto-fire)
combined = (payload_lower + after_to_close.lower())
has_autofocus = "autofocus" in combined
for evt in ALL_EVENTS:
pat = rf'{evt}\s*='
if re.search(pat, payload_lower):
if evt == "onfocus" and has_autofocus:
result.update({
"executable": True,
"interactive": False,
"context": "attribute_breakout_auto",
"confidence": 0.95,
"detail": f"Payload breaks attribute to create {evt}+autofocus on <{tag_name}> (auto-fires)",
})
return result
elif evt in AUTO_FIRE_EVENTS:
result.update({
"executable": True,
"interactive": False,
"context": "attribute_breakout_auto",
"confidence": 0.90,
"detail": f"Payload breaks attribute to create auto-firing {evt} on <{tag_name}>",
})
return result
else:
result.update({
"executable": False,
"interactive": True,
"context": "attribute_breakout_event",
"confidence": 0.90,
"detail": f"Payload breaks attribute to create {evt} on <{tag_name}> (requires interaction)",
})
return result
# Inside a regular attribute value (not event handler, not JS URI)
result.update({
"context": "attribute_value",
"confidence": 0.3,
"detail": f"Payload inside non-event attribute of <{tag_name}>",
})
return result
# --- Check 7: Payload contains event handler patterns but is in text content ---
# (e.g., "onclick=alert(1)" as literal text, NOT inside a tag)
# This is NOT executable — it's just text
# --- Check 8: Plain text content ---
result.update({
"context": "text_content",
"confidence": 0.2,
"detail": "Payload reflected as plain text content in HTML body",
})
return result
def _payload_breaks_js_string(js_before: str, payload: str) -> bool:
"""Check if payload breaks out of a JS string context."""
# Look for string delimiters just before payload
stripped = js_before.rstrip()
if not stripped:
return False
# Payload starts with string terminator + code
p = payload.lstrip()
if p and p[0] in ("'", '"', '`'):
return True
# Payload contains </script>
if "</script>" in payload.lower():
return True
return False
def _payload_creates_event(payload_lower: str) -> bool:
"""Check if payload string creates an event handler (attribute breakout)."""
for evt in ALL_EVENTS:
if evt in payload_lower and "=" in payload_lower:
# e.g., " onfocus=alert(1) autofocus x="
pat = rf'{evt}\s*='
if re.search(pat, payload_lower):
return True
return False
def _analyze_injected_tag(payload: str, payload_lower: str, result: Dict) -> Dict:
"""Analyze a payload that introduces new HTML tags."""
# Extract tags from payload
tags = re.findall(r'<(\w+)', payload_lower)
if not tags:
result.update({
"context": "text_content",
"confidence": 0.3,
"detail": "Payload contains < but no recognizable tags",
})
return result
primary_tag = tags[0]
# <script> tag = auto-execute
if "script" in tags:
result.update({
"executable": True,
"context": "injected_script_tag",
"confidence": 0.95,
"detail": f"Payload injects <script> tag",
})
return result
# Check for event handlers in the payload
events_in_payload = set()
for m in _RE_EVENT_ATTR.finditer(payload_lower):
events_in_payload.add(m.group(1).lower())
auto_events = events_in_payload & AUTO_FIRE_EVENTS
interactive_events = events_in_payload & INTERACTIVE_EVENTS
# Check for autofocus (makes onfocus auto-fire)
has_autofocus = "autofocus" in payload_lower
if has_autofocus and "onfocus" in events_in_payload:
auto_events.add("onfocus")
interactive_events.discard("onfocus")
# Check for <details open ontoggle>
if "details" in tags and "open" in payload_lower and "ontoggle" in events_in_payload:
auto_events.add("ontoggle")
interactive_events.discard("ontoggle")
# img/video/audio with src=x onerror → auto-fires
if primary_tag in ("img", "video", "audio", "source", "object", "embed", "input"):
if "onerror" in events_in_payload and ("src=" in payload_lower or "src =" in payload_lower):
auto_events.add("onerror")
interactive_events.discard("onerror")
# svg/body onload → auto-fires
if primary_tag in ("svg", "body", "math") and "onload" in events_in_payload:
auto_events.add("onload")
interactive_events.discard("onload")
# SVG animate/set onbegin → auto-fires
if primary_tag in ("animate", "animatetransform", "set", "discard") and "onbegin" in events_in_payload:
auto_events.add("onbegin")
interactive_events.discard("onbegin")
# javascript: URI
if "javascript:" in payload_lower:
result.update({
"executable": False,
"interactive": True,
"context": "injected_js_uri",
"confidence": 0.90,
"detail": f"Payload injects <{primary_tag}> with javascript: URI",
})
return result
if auto_events:
result.update({
"executable": True,
"interactive": False,
"context": "injected_tag_auto",
"confidence": 0.95,
"detail": f"Payload injects <{primary_tag}> with auto-firing event(s): {', '.join(auto_events)}",
})
return result
if interactive_events:
result.update({
"executable": False,
"interactive": True,
"context": "injected_tag_interactive",
"confidence": 0.85,
"detail": f"Payload injects <{primary_tag}> with interactive event(s): {', '.join(interactive_events)}",
})
return result
# Tag injected but no events
result.update({
"context": "injected_tag_no_event",
"confidence": 0.4,
"detail": f"Payload injects <{primary_tag}> but without executable event handlers",
})
return result