mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-05-26 17:17:51 +02:00
fix(security): use defusedxml for untrusted XML parsing (#259)
Detected by Aeon + Semgrep (5x use-defused-xml ERROR). Severity: medium CWE-776 (billion laughs) / CWE-611 (XML external entity) Five XML parse sites pass response bodies into the Python stdlib xml.etree.ElementTree without protection against entity expansion attacks. Python's ElementTree still permits internal entity references by default (per the docs vulnerabilities table), so a malicious or compromised upstream can ship a "billion laughs"-style payload that expands to gigabytes in memory. The user-controllable site is sb_monitor._parse_rss: the OpenClaw skill exposes add_custom_feed(name, url, ...) to the agent, then poll_custom_feeds fetches feed.url and passes the body to xml.etree.ElementTree.fromstring with no host allowlist or entity-bomb defence. The other four sites (psk_reporter_fetcher, aircraft_database, cctv_pipeline x2) parse XML from hard-coded upstreams (pskreporter.info, s3.opensky-network.org, datos.madrid.es); defence-in-depth for upstream-compromise/MITM. Switch all five call sites to defusedxml.ElementTree. Same fromstring/find/findall/iter/findtext API, but rejects entity references by default (raises defusedxml.EntitiesForbidden). Confirmed locally that a 4-deep billion-laughs payload that expands to 3000 chars under stdlib ET is rejected by defusedxml. Added defusedxml>=0.7.1 to backend/pyproject.toml dependencies. Co-authored-by: aeonframework <aeon-bot@aaronjmars.com>
This commit is contained in:
@@ -15,6 +15,7 @@ dependencies = [
|
||||
"cachetools==5.5.2",
|
||||
"cloudscraper==1.2.71",
|
||||
"cryptography>=41.0.0",
|
||||
"defusedxml>=0.7.1",
|
||||
"fastapi==0.115.12",
|
||||
"feedparser==6.0.10",
|
||||
"httpx==0.28.1",
|
||||
|
||||
@@ -987,7 +987,7 @@ _KML_NS = {"kml": "http://www.opengis.net/kml/2.2"}
|
||||
|
||||
def _find_kml_element(element, tag):
|
||||
"""Find first descendant matching tag, ignoring XML namespace prefix."""
|
||||
import xml.etree.ElementTree as ET
|
||||
import defusedxml.ElementTree as ET
|
||||
el = element.find(f".//{tag}")
|
||||
if el is not None:
|
||||
return el
|
||||
@@ -1015,7 +1015,7 @@ class MadridCityIngestor(BaseCCTVIngestor):
|
||||
KML_URL = "http://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
import xml.etree.ElementTree as ET
|
||||
import defusedxml.ElementTree as ET
|
||||
|
||||
try:
|
||||
response = fetch_with_curl(self.KML_URL, timeout=20)
|
||||
|
||||
@@ -16,9 +16,9 @@ import csv
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Any
|
||||
|
||||
import defusedxml.ElementTree as ET
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -6,8 +6,8 @@ Docs: https://pskreporter.info/pskdev.html
|
||||
"""
|
||||
|
||||
import logging
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
import defusedxml.ElementTree as ET
|
||||
import requests
|
||||
from cachetools import TTLCache, cached
|
||||
|
||||
|
||||
@@ -701,7 +701,7 @@ async def _fetch_feed(feed: CustomFeed) -> list[dict]:
|
||||
|
||||
def _parse_rss(xml_text: str, feed: CustomFeed) -> list[dict]:
|
||||
"""Parse an RSS/Atom feed into normalized items."""
|
||||
import xml.etree.ElementTree as ET
|
||||
import defusedxml.ElementTree as ET
|
||||
|
||||
items = []
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user