From 8e276581570e78e6eb5bb7a19ba65757e3b40617 Mon Sep 17 00:00:00 2001 From: "@aaronjmars" <61592645+aaronjmars@users.noreply.github.com> Date: Wed, 20 May 2026 22:01:25 -0400 Subject: [PATCH] fix(security): use defusedxml for untrusted XML parsing (#259) Detected by Aeon + Semgrep (5x use-defused-xml ERROR). Severity: medium CWE-776 (billion laughs) / CWE-611 (XML external entity) Five XML parse sites pass response bodies into the Python stdlib xml.etree.ElementTree without protection against entity expansion attacks. Python's ElementTree still permits internal entity references by default (per the docs vulnerabilities table), so a malicious or compromised upstream can ship a "billion laughs"-style payload that expands to gigabytes in memory. The user-controllable site is sb_monitor._parse_rss: the OpenClaw skill exposes add_custom_feed(name, url, ...) to the agent, then poll_custom_feeds fetches feed.url and passes the body to xml.etree.ElementTree.fromstring with no host allowlist or entity-bomb defence. The other four sites (psk_reporter_fetcher, aircraft_database, cctv_pipeline x2) parse XML from hard-coded upstreams (pskreporter.info, s3.opensky-network.org, datos.madrid.es); defence-in-depth for upstream-compromise/MITM. Switch all five call sites to defusedxml.ElementTree. Same fromstring/find/findall/iter/findtext API, but rejects entity references by default (raises defusedxml.EntitiesForbidden). Confirmed locally that a 4-deep billion-laughs payload that expands to 3000 chars under stdlib ET is rejected by defusedxml. Added defusedxml>=0.7.1 to backend/pyproject.toml dependencies. Co-authored-by: aeonframework --- backend/pyproject.toml | 1 + backend/services/cctv_pipeline.py | 4 ++-- backend/services/fetchers/aircraft_database.py | 2 +- backend/services/psk_reporter_fetcher.py | 2 +- openclaw-skills/shadowbroker/sb_monitor.py | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 8358aec..e0dbe40 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "cachetools==5.5.2", "cloudscraper==1.2.71", "cryptography>=41.0.0", + "defusedxml>=0.7.1", "fastapi==0.115.12", "feedparser==6.0.10", "httpx==0.28.1", diff --git a/backend/services/cctv_pipeline.py b/backend/services/cctv_pipeline.py index 243e1be..a489396 100644 --- a/backend/services/cctv_pipeline.py +++ b/backend/services/cctv_pipeline.py @@ -987,7 +987,7 @@ _KML_NS = {"kml": "http://www.opengis.net/kml/2.2"} def _find_kml_element(element, tag): """Find first descendant matching tag, ignoring XML namespace prefix.""" - import xml.etree.ElementTree as ET + import defusedxml.ElementTree as ET el = element.find(f".//{tag}") if el is not None: return el @@ -1015,7 +1015,7 @@ class MadridCityIngestor(BaseCCTVIngestor): KML_URL = "http://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml" def fetch_data(self) -> List[Dict[str, Any]]: - import xml.etree.ElementTree as ET + import defusedxml.ElementTree as ET try: response = fetch_with_curl(self.KML_URL, timeout=20) diff --git a/backend/services/fetchers/aircraft_database.py b/backend/services/fetchers/aircraft_database.py index bf548ef..1e2d1ea 100644 --- a/backend/services/fetchers/aircraft_database.py +++ b/backend/services/fetchers/aircraft_database.py @@ -16,9 +16,9 @@ import csv import logging import threading import time -import xml.etree.ElementTree as ET from typing import Any +import defusedxml.ElementTree as ET import requests logger = logging.getLogger(__name__) diff --git a/backend/services/psk_reporter_fetcher.py b/backend/services/psk_reporter_fetcher.py index 41f5124..91b41d6 100644 --- a/backend/services/psk_reporter_fetcher.py +++ b/backend/services/psk_reporter_fetcher.py @@ -6,8 +6,8 @@ Docs: https://pskreporter.info/pskdev.html """ import logging -import xml.etree.ElementTree as ET +import defusedxml.ElementTree as ET import requests from cachetools import TTLCache, cached diff --git a/openclaw-skills/shadowbroker/sb_monitor.py b/openclaw-skills/shadowbroker/sb_monitor.py index 37b21cb..bbb3c20 100644 --- a/openclaw-skills/shadowbroker/sb_monitor.py +++ b/openclaw-skills/shadowbroker/sb_monitor.py @@ -701,7 +701,7 @@ async def _fetch_feed(feed: CustomFeed) -> list[dict]: def _parse_rss(xml_text: str, feed: CustomFeed) -> list[dict]: """Parse an RSS/Atom feed into normalized items.""" - import xml.etree.ElementTree as ET + import defusedxml.ElementTree as ET items = [] try: