mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-05 13:58:15 +02:00
Pin DeepState mirror, prefer HTTPS for Madrid/KiwiSDR, document outbound data (#362–#364).
Operators can set DEEPSTATE_MIRROR_COMMIT for immutable frontline ingest; Madrid KML tries HTTPS then HTTP without changing camera image URLs or proxy Referers. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -198,6 +198,7 @@ graphify-out/
|
||||
# Internal docs & brainstorming (never commit)
|
||||
# ========================
|
||||
docs/*
|
||||
!docs/OUTBOUND_DATA.md
|
||||
!docs/mesh/
|
||||
docs/mesh/*
|
||||
!docs/mesh/threat-model.md
|
||||
|
||||
@@ -87,6 +87,12 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# Free MAP_KEY from https://firms.modaps.eosdis.nasa.gov/map/#d:24hrs;@0.0,0.0,3.0z
|
||||
# FIRMS_MAP_KEY=
|
||||
|
||||
# Ukraine frontline mirror (GitHub). Default follows cyterat/deepstate-map-data@main.
|
||||
# Pin an immutable commit SHA so ingest cannot silently change if main is force-pushed (#362).
|
||||
# Example (verify on GitHub before use): main @ b479954e94696bc5622c7818fd20a64a699f4fe8
|
||||
# DEEPSTATE_MIRROR_COMMIT=b479954e94696bc5622c7818fd20a64a699f4fe8
|
||||
# DEEPSTATE_MIRROR_REPO=cyterat/deepstate-map-data
|
||||
|
||||
# Ukraine air raid alerts from alerts.in.ua — free token from https://alerts.in.ua/
|
||||
# ALERTS_IN_UA_TOKEN=
|
||||
|
||||
|
||||
@@ -1012,14 +1012,33 @@ def _extract_img_src(html_fragment: str):
|
||||
class MadridCityIngestor(BaseCCTVIngestor):
|
||||
"""Madrid City Hall traffic cameras from datos.madrid.es KML feed."""
|
||||
|
||||
KML_URL = "http://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
KML_URL_HTTPS = "https://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
KML_URL_HTTP = "http://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
|
||||
def _fetch_kml(self):
|
||||
"""Prefer HTTPS; fall back to legacy HTTP if the catalog is HTTP-only (#363)."""
|
||||
last_error: Exception | None = None
|
||||
for url in (self.KML_URL_HTTPS, self.KML_URL_HTTP):
|
||||
try:
|
||||
response = fetch_with_curl(url, timeout=20)
|
||||
response.raise_for_status()
|
||||
if url == self.KML_URL_HTTP:
|
||||
logger.warning(
|
||||
"MadridCityIngestor: HTTPS KML unavailable, using HTTP catalog feed"
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.debug("MadridCityIngestor: KML fetch failed for %s: %s", url, e)
|
||||
if last_error is not None:
|
||||
raise last_error
|
||||
raise RuntimeError("Madrid KML fetch failed")
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
import defusedxml.ElementTree as ET
|
||||
|
||||
try:
|
||||
response = fetch_with_curl(self.KML_URL, timeout=20)
|
||||
response.raise_for_status()
|
||||
response = self._fetch_kml()
|
||||
except Exception as e:
|
||||
logger.error(f"MadridCityIngestor: failed to fetch KML: {e}")
|
||||
return []
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import os
|
||||
import requests
|
||||
import logging
|
||||
import zipfile
|
||||
@@ -20,6 +21,50 @@ logger = logging.getLogger(__name__)
|
||||
# Cache Frontline data for 30 minutes, it doesn't move that fast
|
||||
frontline_cache = TTLCache(maxsize=1, ttl=1800)
|
||||
|
||||
_DEFAULT_DEEPSTATE_MIRROR_REPO = "cyterat/deepstate-map-data"
|
||||
|
||||
|
||||
def _deepstate_mirror_ref() -> tuple[str, str]:
|
||||
"""Return (github_repo_slug, git_ref) for the DeepState mirror.
|
||||
|
||||
When ``DEEPSTATE_MIRROR_COMMIT`` is set, ingest is pinned to that immutable
|
||||
SHA instead of following the mutable ``main`` branch (#362).
|
||||
"""
|
||||
repo = (os.environ.get("DEEPSTATE_MIRROR_REPO") or _DEFAULT_DEEPSTATE_MIRROR_REPO).strip()
|
||||
if repo.count("/") != 1:
|
||||
repo = _DEFAULT_DEEPSTATE_MIRROR_REPO
|
||||
commit = (os.environ.get("DEEPSTATE_MIRROR_COMMIT") or "").strip()
|
||||
ref = commit if commit else "main"
|
||||
return repo, ref
|
||||
|
||||
|
||||
def _latest_deepstate_geo_path(tree_items: list) -> str | None:
|
||||
geo_files = [
|
||||
item["path"]
|
||||
for item in tree_items
|
||||
if isinstance(item, dict)
|
||||
and str(item.get("path", "")).startswith("data/deepstatemap_data_")
|
||||
and str(item.get("path", "")).endswith(".geojson")
|
||||
]
|
||||
return sorted(geo_files)[-1] if geo_files else None
|
||||
|
||||
|
||||
def _annotate_deepstate_geojson(data: dict) -> dict:
|
||||
name_map = {
|
||||
0: "Russian-occupied areas",
|
||||
1: "Russian advance",
|
||||
2: "Liberated area",
|
||||
3: "Russian-occupied areas", # Crimea / LPR / DPR
|
||||
4: "Directions of UA attacks",
|
||||
}
|
||||
if "features" in data:
|
||||
for idx, feature in enumerate(data["features"]):
|
||||
if "properties" not in feature or feature["properties"] is None:
|
||||
feature["properties"] = {}
|
||||
feature["properties"]["name"] = name_map.get(idx, "Russian-occupied areas")
|
||||
feature["properties"]["zone_id"] = idx
|
||||
return data
|
||||
|
||||
|
||||
@cached(frontline_cache)
|
||||
def fetch_ukraine_frontlines():
|
||||
@@ -27,67 +72,34 @@ def fetch_ukraine_frontlines():
|
||||
Fetches the latest GeoJSON data representing the Ukraine frontline.
|
||||
We use the cyterat/deepstate-map-data github mirror since the public API is locked.
|
||||
"""
|
||||
repo, ref = _deepstate_mirror_ref()
|
||||
try:
|
||||
logger.info("Fetching DeepStateMap from GitHub mirror...")
|
||||
logger.info("Fetching DeepStateMap from GitHub mirror (%s @ %s)...", repo, ref)
|
||||
|
||||
# First, query the repo tree to find the latest file name
|
||||
tree_url = (
|
||||
"https://api.github.com/repos/cyterat/deepstate-map-data/git/trees/main?recursive=1"
|
||||
)
|
||||
tree_url = f"https://api.github.com/repos/{repo}/git/trees/{ref}?recursive=1"
|
||||
res_tree = requests.get(tree_url, timeout=10)
|
||||
|
||||
if res_tree.status_code == 200:
|
||||
tree_data = res_tree.json().get("tree", [])
|
||||
# Filter for geojson files in data folder
|
||||
geo_files = [
|
||||
item["path"]
|
||||
for item in tree_data
|
||||
if item["path"].startswith("data/deepstatemap_data_")
|
||||
and item["path"].endswith(".geojson")
|
||||
]
|
||||
|
||||
if geo_files:
|
||||
# Get the alphabetically latest file (since it's named with YYYYMMDD)
|
||||
latest_file = sorted(geo_files)[-1]
|
||||
|
||||
raw_url = f"https://raw.githubusercontent.com/cyterat/deepstate-map-data/main/{latest_file}"
|
||||
logger.info(f"Downloading latest DeepStateMap: {raw_url}")
|
||||
latest_file = _latest_deepstate_geo_path(res_tree.json().get("tree", []))
|
||||
if latest_file:
|
||||
raw_url = f"https://raw.githubusercontent.com/{repo}/{ref}/{latest_file}"
|
||||
logger.info("Downloading DeepStateMap: %s", raw_url)
|
||||
|
||||
res_geo = requests.get(raw_url, timeout=20)
|
||||
if res_geo.status_code == 200:
|
||||
data = res_geo.json()
|
||||
|
||||
# The Cyterat GitHub mirror strips all properties and just provides a raw array of Feature polygons.
|
||||
# Based on DeepStateMap's frontend mapping, the array index corresponds to the zone type:
|
||||
# 0: Russian-occupied areas
|
||||
# 1: Russian advance
|
||||
# 2: Liberated area
|
||||
# 3: Uncontested/Crimea (often folded into occupied)
|
||||
name_map = {
|
||||
0: "Russian-occupied areas",
|
||||
1: "Russian advance",
|
||||
2: "Liberated area",
|
||||
3: "Russian-occupied areas", # Crimea / LPR / DPR
|
||||
4: "Directions of UA attacks",
|
||||
}
|
||||
|
||||
if "features" in data:
|
||||
for idx, feature in enumerate(data["features"]):
|
||||
if "properties" not in feature or feature["properties"] is None:
|
||||
feature["properties"] = {}
|
||||
|
||||
feature["properties"]["name"] = name_map.get(
|
||||
idx, "Russian-occupied areas"
|
||||
)
|
||||
feature["properties"]["zone_id"] = idx
|
||||
|
||||
return data
|
||||
else:
|
||||
logger.error(
|
||||
f"Failed to fetch parsed Github Raw GeoJSON: {res_geo.status_code}"
|
||||
)
|
||||
return _annotate_deepstate_geojson(res_geo.json())
|
||||
logger.error(
|
||||
"Failed to fetch parsed Github Raw GeoJSON: %s", res_geo.status_code
|
||||
)
|
||||
else:
|
||||
logger.error("No deepstatemap_data_*.geojson files in mirror tree at %s", ref)
|
||||
else:
|
||||
logger.error(f"Failed to fetch Github Tree for Deepstatemap: {res_tree.status_code}")
|
||||
logger.error(
|
||||
"Failed to fetch Github tree for Deepstatemap (%s @ %s): %s",
|
||||
repo,
|
||||
ref,
|
||||
res_tree.status_code,
|
||||
)
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.error(f"Error fetching DeepStateMap: {e}")
|
||||
return None
|
||||
|
||||
@@ -32,7 +32,8 @@ logger = logging.getLogger(__name__)
|
||||
_REFRESH_SECONDS = 24 * 3600
|
||||
kiwisdr_cache: TTLCache = TTLCache(maxsize=1, ttl=_REFRESH_SECONDS)
|
||||
|
||||
_SOURCE_URL = "http://rx.linkfanel.net/kiwisdr_com.js"
|
||||
_SOURCE_URL_HTTP = "http://rx.linkfanel.net/kiwisdr_com.js"
|
||||
_SOURCE_URL_HTTPS = "https://rx.linkfanel.net/kiwisdr_com.js"
|
||||
_CACHE_FILE = Path(__file__).resolve().parent.parent / "data" / "kiwisdr_cache.json"
|
||||
# Bundled fallback — shipped with the codebase so the KiwiSDR layer always
|
||||
# has something to render even when the upstream is unreachable, returns
|
||||
@@ -184,6 +185,29 @@ def _validate_fetched_nodes(nodes: list[dict]) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _fetch_mirror_payload_text() -> str | None:
|
||||
"""Try HTTPS first, then HTTP. Shape validation still applies (#364)."""
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
last_error: Exception | None = None
|
||||
for url in (_SOURCE_URL_HTTPS, _SOURCE_URL_HTTP):
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=20)
|
||||
if res and res.status_code == 200:
|
||||
if url == _SOURCE_URL_HTTP:
|
||||
logger.info(
|
||||
"KiwiSDR: HTTPS mirror unavailable; using HTTP with shape validation"
|
||||
)
|
||||
return res.text
|
||||
last_error = RuntimeError(f"HTTP {getattr(res, 'status_code', 'unknown')}")
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.debug("KiwiSDR mirror fetch failed for %s: %s", url, e)
|
||||
if last_error is not None:
|
||||
logger.warning("KiwiSDR mirror fetch failed: %s", last_error)
|
||||
return None
|
||||
|
||||
|
||||
def _load_bundled_fallback() -> list[dict]:
|
||||
"""Last-resort directory shipped with the codebase. Always returns a
|
||||
list (may be empty if the bundle is missing in older deployments)."""
|
||||
@@ -216,8 +240,6 @@ def fetch_kiwisdr_nodes() -> list[dict]:
|
||||
tampered upstream returning garbage is caught by _validate_fetched_nodes()
|
||||
and falls through to whatever previously-trusted snapshot we have.
|
||||
"""
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
# 1. Trust on-disk cache if fresh.
|
||||
cached_nodes = _load_disk_cache()
|
||||
if cached_nodes is not None:
|
||||
@@ -230,14 +252,12 @@ def fetch_kiwisdr_nodes() -> list[dict]:
|
||||
fresh_nodes: list[dict] = []
|
||||
fetch_succeeded = False
|
||||
try:
|
||||
res = fetch_with_curl(_SOURCE_URL, timeout=20)
|
||||
if res and res.status_code == 200:
|
||||
fresh_nodes = _parse_mirror_payload(res.text)
|
||||
body = _fetch_mirror_payload_text()
|
||||
if body:
|
||||
fresh_nodes = _parse_mirror_payload(body)
|
||||
fetch_succeeded = True
|
||||
else:
|
||||
logger.warning(
|
||||
f"KiwiSDR fetch returned HTTP {res.status_code if res else 'no response'}"
|
||||
)
|
||||
logger.warning("KiwiSDR fetch returned no usable mirror payload")
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.warning(f"KiwiSDR fetch exception: {e}")
|
||||
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
"""DeepState GitHub mirror pinning (#362)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import services.geopolitics as gp
|
||||
|
||||
|
||||
def test_deepstate_mirror_ref_defaults(monkeypatch):
|
||||
monkeypatch.delenv("DEEPSTATE_MIRROR_COMMIT", raising=False)
|
||||
monkeypatch.delenv("DEEPSTATE_MIRROR_REPO", raising=False)
|
||||
repo, ref = gp._deepstate_mirror_ref()
|
||||
assert repo == "cyterat/deepstate-map-data"
|
||||
assert ref == "main"
|
||||
|
||||
|
||||
def test_deepstate_mirror_ref_pinned_commit(monkeypatch):
|
||||
monkeypatch.setenv("DEEPSTATE_MIRROR_COMMIT", "abc123def456")
|
||||
monkeypatch.setenv("DEEPSTATE_MIRROR_REPO", "cyterat/deepstate-map-data")
|
||||
repo, ref = gp._deepstate_mirror_ref()
|
||||
assert repo == "cyterat/deepstate-map-data"
|
||||
assert ref == "abc123def456"
|
||||
|
||||
|
||||
def test_fetch_ukraine_frontlines_uses_pinned_tree_url(monkeypatch):
|
||||
monkeypatch.setenv("DEEPSTATE_MIRROR_COMMIT", "deadbeef")
|
||||
gp.frontline_cache.clear()
|
||||
|
||||
tree_resp = MagicMock(status_code=200)
|
||||
tree_resp.json.return_value = {
|
||||
"tree": [{"path": "data/deepstatemap_data_20260101.geojson"}]
|
||||
}
|
||||
geo_resp = MagicMock(status_code=200)
|
||||
geo_resp.json.return_value = {"features": []}
|
||||
|
||||
with patch("services.geopolitics.requests.get", side_effect=[tree_resp, geo_resp]) as get:
|
||||
result = gp.fetch_ukraine_frontlines()
|
||||
|
||||
assert result == {"features": []}
|
||||
tree_call = get.call_args_list[0][0][0]
|
||||
raw_call = get.call_args_list[1][0][0]
|
||||
assert "/git/trees/deadbeef" in tree_call
|
||||
assert "raw.githubusercontent.com/cyterat/deepstate-map-data/deadbeef/" in raw_call
|
||||
|
||||
gp.frontline_cache.clear()
|
||||
@@ -0,0 +1,29 @@
|
||||
"""KiwiSDR mirror prefers HTTPS (#364)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from services.kiwisdr_fetcher import (
|
||||
_SOURCE_URL_HTTP,
|
||||
_SOURCE_URL_HTTPS,
|
||||
_fetch_mirror_payload_text,
|
||||
)
|
||||
|
||||
|
||||
def test_fetch_mirror_tries_https_before_http():
|
||||
calls: list[str] = []
|
||||
|
||||
def fake_fetch(url, **kwargs):
|
||||
calls.append(url)
|
||||
if url == _SOURCE_URL_HTTPS:
|
||||
raise ConnectionError("tls not available")
|
||||
res = MagicMock()
|
||||
res.status_code = 200
|
||||
res.text = "var kiwisdr_com = [];"
|
||||
return res
|
||||
|
||||
with patch("services.network_utils.fetch_with_curl", side_effect=fake_fetch):
|
||||
body = _fetch_mirror_payload_text()
|
||||
|
||||
assert body == "var kiwisdr_com = [];"
|
||||
assert calls == [_SOURCE_URL_HTTPS, _SOURCE_URL_HTTP]
|
||||
@@ -0,0 +1,43 @@
|
||||
# Outbound data and third-party exposure
|
||||
|
||||
Shadowbroker is **self-hosted**: each install uses its own backend egress IP (and optional `OPERATOR_HANDLE` in `User-Agent`). This documents intentional third-party contact for audit issues #348–#366.
|
||||
|
||||
## Architecture
|
||||
|
||||
| Path | Who calls third parties |
|
||||
|------|-------------------------|
|
||||
| UI → `/api/*` → fetchers | **Backend** |
|
||||
| Map basemap tiles/fonts | **Browser** (CARTO, demotiles.maplibre.org) |
|
||||
| CCTV proxy | **Backend** (with upstream-required `Referer` / `Origin`) |
|
||||
|
||||
## Ukraine frontline mirror (#362)
|
||||
|
||||
- **Layer:** `ukraine_frontline` → `frontlines` on the map (DeepStateMap polygons). **Not** UAP (`uap_sightings` / NUFORC).
|
||||
- **Code:** `backend/services/geopolitics.py`
|
||||
- **Default:** `cyterat/deepstate-map-data` @ `main`, latest `data/deepstatemap_data_*.geojson`
|
||||
- **Pin:** `DEEPSTATE_MIRROR_COMMIT=<sha>` — immutable Git snapshot; bump SHA when you want newer lines
|
||||
- **Optional:** `DEEPSTATE_MIRROR_REPO=owner/repo`
|
||||
|
||||
## Madrid CCTV (#363)
|
||||
|
||||
- **Ingest:** HTTPS-first KML on `datos.madrid.es` (catalog only); HTTP fallback if needed
|
||||
- **Feeds:** Still images from URLs inside the KML (`informo.madrid.es`, etc.), proxied with `Referer: https://informo.madrid.es/` — unchanged by KML transport
|
||||
|
||||
## KiwiSDR (#364)
|
||||
|
||||
- HTTPS first, then HTTP; shape validation + bundled `backend/data/kiwisdr_directory.json`
|
||||
|
||||
## Other documented exposures
|
||||
|
||||
- **#354 Basemap:** browser → `*.basemaps.cartocdn.com`, `demotiles.maplibre.org`
|
||||
- **#349 CCTV Referer:** required for many DOT/city streams; backend proxy only
|
||||
- **#361 Operator UA:** `OPERATOR_HANDLE` / `outbound_user_agent()` per install
|
||||
- **#366 Broadcastify:** backend scrape with honest UA
|
||||
- **#348 LiveUAMap:** `SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER` (default on Linux, off Windows)
|
||||
|
||||
## Operator checklist
|
||||
|
||||
1. Set `OPERATOR_HANDLE` if you want a recognizable contact on upstream logs.
|
||||
2. Pin `DEEPSTATE_MIRROR_COMMIT` after reviewing a mirror commit (see `backend/.env.example`).
|
||||
3. Set `SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=false` to disable LiveUAMap contact.
|
||||
4. Self-host map tiles if basemap CDN exposure matters.
|
||||
Reference in New Issue
Block a user