From 363b5a49c853537728fd75f27c6691024de2298d Mon Sep 17 00:00:00 2001 From: BigBodyCobain <43977454+BigBodyCobain@users.noreply.github.com> Date: Wed, 3 Jun 2026 15:01:32 -0600 Subject: [PATCH] Close tg12 outbound audit (#348-#366): operator UA, opt-ins, docs - User-Agent is per-install handle only (no Shadowbroker product token) - LiveUAMap: Windows UI consent when enabling Global Incidents; env override - Meshtastic callsign upstream header off by default (opt-in true) - Expanded docs/OUTBOUND_DATA.md and README link for CCTV, basemap, Broadcastify Co-authored-by: Cursor --- .env.example | 4 +- README.md | 2 + backend/.env.example | 24 ++-- backend/routers/data.py | 36 ++++++ backend/scripts/convert_power_plants.py | 2 +- .../services/fetchers/aircraft_database.py | 2 - backend/services/fetchers/geo.py | 27 ++-- backend/services/fetchers/meshtastic_map.py | 4 +- backend/services/fetchers/route_database.py | 2 - backend/services/liveuamap_scraper.py | 9 +- backend/services/liveuamap_settings.py | 73 +++++++++++ backend/services/mesh/mesh_wormhole_prekey.py | 4 +- backend/services/network_utils.py | 63 +++------ .../tests/test_liveuamap_scraper_opt_in.py | 45 +++++++ .../tests/test_meshtastic_callsign_optout.py | 55 ++------ .../test_per_operator_outbound_attribution.py | 26 ++-- docs/OUTBOUND_DATA.md | 122 ++++++++++++++---- .../src/components/WorldviewLeftPanel.tsx | 98 +++++++++++--- .../src/hooks/useLiveUamapScraperOptIn.ts | 61 +++++++++ 19 files changed, 475 insertions(+), 184 deletions(-) create mode 100644 backend/services/liveuamap_settings.py create mode 100644 backend/tests/test_liveuamap_scraper_opt_in.py create mode 100644 frontend/src/hooks/useLiveUamapScraperOptIn.ts diff --git a/.env.example b/.env.example index f5c3b8e..0457580 100644 --- a/.env.example +++ b/.env.example @@ -39,8 +39,8 @@ ADMIN_KEY= # NUFORC_MAPBOX_TOKEN= # Optional startup-risk controls. -# On Windows, external curl fallback and the Playwright LiveUAMap scraper are -# disabled by default so blocked upstream feeds cannot interrupt start.bat. +# On Windows, external curl fallback is off by default. LiveUAMap uses UI consent +# when you enable Global Incidents (or set SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=true). # SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=false # SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=false # AIS starts by default when AIS_API_KEY is set. Set to 0/false to force-disable. diff --git a/README.md b/README.md index 4c4441d..5ef2af9 100644 --- a/README.md +++ b/README.md @@ -577,6 +577,8 @@ ShadowBroker v0.9.7 is composed of three vertically-stacked planes β€” the **Ope | [OSM Nominatim](https://nominatim.openstreetmap.org) | Place name geocoding (LOCATE bar) | On-demand | No | | [CARTO Basemaps](https://carto.com) | Dark map tiles | Continuous | No | +**Outbound privacy & audit (#348–#366):** Each self-hosted install uses its own backend IP and per-install User-Agent handle. See [docs/OUTBOUND_DATA.md](docs/OUTBOUND_DATA.md) for what contacts third parties, opt-in/env controls, and accepted tradeoffs (CCTV Referer, basemap CDN, LiveUAMap, etc.). + --- ## πŸš€ Getting Started diff --git a/backend/.env.example b/backend/.env.example index 93e9677..578c1b8 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -31,11 +31,9 @@ AIS_API_KEY= # https://aisstream.io/ β€” free tier WebSocket key # Requires MESH_DEBUG_MODE=true; do not enable this for ordinary use. # ALLOW_INSECURE_ADMIN=false -# Per-install operator handle. Round 7a: every outbound third-party API -# call (Wikipedia, Wikidata, Nominatim, GDELT, OpenMHz, Broadcastify, -# weather.gov, NUFORC, etc.) includes this handle in the User-Agent so -# upstreams can rate-limit / contact the specific install instead of -# treating every Shadowbroker user as one entity. +# Per-install operator handle. Round 7a: outbound third-party API calls send +# this handle as the User-Agent (e.g. operator-7f3a92), not a shared app name, +# so upstreams rate-limit one install instead of blocking every user. # # Default empty -> a stable pseudonymous handle (e.g. "operator-7f3a92") is # auto-generated on first run and persisted to backend/data/operator_handle.json. @@ -43,10 +41,8 @@ AIS_API_KEY= # https://aisstream.io/ β€” free tier WebSocket key # set it here. Special characters are sanitized to dashes. # OPERATOR_HANDLE= -# Default outbound User-Agent for all third-party HTTP fetchers. Operators -# who run a public relay and want a completely custom UA can set this; it -# bypasses the per-operator helper entirely. Most installs should leave it -# unset and use OPERATOR_HANDLE instead. +# Full User-Agent override (replaces the operator handle entirely). Rare; +# most installs should use OPERATOR_HANDLE only. # SHADOWBROKER_USER_AGENT= # Nominatim-specific User-Agent override (OSM usage policy). Leave unset to @@ -122,12 +118,16 @@ AIS_API_KEY= # https://aisstream.io/ β€” free tier WebSocket key # can identify per-install traffic instead of aggregated "ShadowBroker" hits. # Leave blank to send a generic UA. If you set MESHTASTIC_OPERATOR_CALLSIGN, # it is included in outbound headers to meshtastic.org by default so they -# can rate-limit per-operator. Set MESHTASTIC_SEND_CALLSIGN_HEADER=false to -# suppress the callsign while still using it locally (e.g. for APRS). +# can rate-limit per-operator. Callsign is NOT sent upstream unless you opt in. # MESHTASTIC_OPERATOR_CALLSIGN= -# MESHTASTIC_SEND_CALLSIGN_HEADER=true +# MESHTASTIC_SEND_CALLSIGN_HEADER=false # MESH_MQTT_PSK= # hex-encoded, empty = default LongFast key +# LiveUAMap Playwright scraper (#348). Linux/macOS: on by default when Global +# Incidents layer is active. Windows: off until the operator enables Global +# Incidents in the UI (consent dialog) or sets SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=true. +# SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=false forces off on all platforms. + # ── Mesh / Reticulum (RNS) ───────────────────────────────────── # Full-node / participant-node posture for public Infonet sync. # MESH_NODE_MODE=participant # participant | relay | perimeter diff --git a/backend/routers/data.py b/backend/routers/data.py index be883c4..223ba1d 100644 --- a/backend/routers/data.py +++ b/backend/routers/data.py @@ -30,6 +30,10 @@ class LayerUpdate(BaseModel): layers: dict[str, bool] +class LiveUamapOptInUpdate(BaseModel): + opted_in: bool + + _LAST_VIEWPORT_UPDATE: tuple | None = None _LAST_VIEWPORT_UPDATE_TS = 0.0 _VIEWPORT_UPDATE_LOCK = threading.Lock() @@ -386,6 +390,38 @@ async def update_viewport(vp: ViewportUpdate, request: Request): # noqa: ARG001 return {"status": "ok"} +@router.get("/api/liveuamap/scraper-status", dependencies=[Depends(require_local_operator)]) +async def api_liveuamap_scraper_status(): + """Whether LiveUAMap Playwright may run (Windows needs UI opt-in unless env forces).""" + from services.liveuamap_settings import liveuamap_scraper_status + + return liveuamap_scraper_status() + + +@router.post("/api/liveuamap/scraper-opt-in", dependencies=[Depends(require_local_operator)]) +@limiter.limit("10/minute") +async def api_liveuamap_scraper_opt_in(body: LiveUamapOptInUpdate, request: Request): + """Persist operator consent for LiveUAMap scraper (#348).""" + from services.liveuamap_settings import liveuamap_scraper_status, set_liveuamap_ui_opt_in + + set_liveuamap_ui_opt_in(body.opted_in) + if body.opted_in: + from services.fetchers._store import is_any_active + + if is_any_active("global_incidents"): + threading.Thread(target=_run_liveuamap_refresh, daemon=True).start() + return liveuamap_scraper_status() + + +def _run_liveuamap_refresh() -> None: + try: + from services.fetchers.geo import update_liveuamap + + update_liveuamap() + except Exception as e: + logger.warning("LiveUAMap refresh after opt-in failed: %s", e) + + @router.post("/api/layers", dependencies=[Depends(require_local_operator)]) @limiter.limit("30/minute") async def update_layers(update: LayerUpdate, request: Request): diff --git a/backend/scripts/convert_power_plants.py b/backend/scripts/convert_power_plants.py index 18dc46c..b492545 100644 --- a/backend/scripts/convert_power_plants.py +++ b/backend/scripts/convert_power_plants.py @@ -29,7 +29,7 @@ def main() -> None: from services.network_utils import outbound_user_agent ua = outbound_user_agent("release-script-power-plants") except Exception: - ua = "Shadowbroker/0.9 (release-script-power-plants; +https://github.com/BigBodyCobain/Shadowbroker/issues)" + ua = "operator-release-script (purpose: power-plants)" req = urllib.request.Request(CSV_URL, headers={"User-Agent": ua}) with urllib.request.urlopen(req, timeout=60) as resp: raw = resp.read().decode("utf-8") diff --git a/backend/services/fetchers/aircraft_database.py b/backend/services/fetchers/aircraft_database.py index 7d51954..910fac4 100644 --- a/backend/services/fetchers/aircraft_database.py +++ b/backend/services/fetchers/aircraft_database.py @@ -38,8 +38,6 @@ _S3_NS = "{http://s3.amazonaws.com/doc/2006-03-01/}" _REFRESH_INTERVAL_S = 5 * 24 * 3600 _LIST_TIMEOUT_S = 30 _DOWNLOAD_TIMEOUT_S = 600 -from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT - _lock = threading.RLock() _aircraft_by_hex: dict[str, dict[str, str]] = {} _last_refresh = 0.0 diff --git a/backend/services/fetchers/geo.py b/backend/services/fetchers/geo.py index 74ab9b5..57078db 100644 --- a/backend/services/fetchers/geo.py +++ b/backend/services/fetchers/geo.py @@ -20,17 +20,9 @@ def _env_flag(name: str) -> str: def liveuamap_scraper_enabled() -> bool: - """Return whether the Playwright-based LiveUAMap scraper should run. + from services.liveuamap_settings import liveuamap_scraper_enabled as _enabled - It is useful enrichment, but it starts a browser/Node driver and must not be - allowed to destabilize Windows local startup. - """ - setting = _env_flag("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER") - if setting in {"1", "true", "yes", "on"}: - return True - if setting in {"0", "false", "no", "off"}: - return False - return os.name != "nt" + return _enabled() # --------------------------------------------------------------------------- @@ -210,10 +202,17 @@ def update_liveuamap(): if not is_any_active("global_incidents"): return if not liveuamap_scraper_enabled(): - logger.info( - "Liveuamap scraper disabled for this runtime; set " - "SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=1 to opt in." - ) + from services.liveuamap_settings import liveuamap_requires_ui_opt_in + + if liveuamap_requires_ui_opt_in(): + logger.info( + "Liveuamap scraper disabled: enable Global Incidents in the UI to " + "consent, or set SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=1." + ) + else: + logger.info( + "Liveuamap scraper disabled; set SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=1 to opt in." + ) return logger.info("Running scheduled Liveuamap scraper...") try: diff --git a/backend/services/fetchers/meshtastic_map.py b/backend/services/fetchers/meshtastic_map.py index 21b212f..4614bd9 100644 --- a/backend/services/fetchers/meshtastic_map.py +++ b/backend/services/fetchers/meshtastic_map.py @@ -188,8 +188,8 @@ def fetch_meshtastic_nodes(): callsign = "" send_callsign_header = str( - _os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true") - ).strip().lower() not in {"0", "false", "no", "off", ""} + _os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "false") + ).strip().lower() in {"1", "true", "yes", "on"} # Round 7a: outbound_user_agent already includes the per-install handle. # The optional Meshtastic callsign is appended as additional context so diff --git a/backend/services/fetchers/route_database.py b/backend/services/fetchers/route_database.py index 52b3b61..addbbe4 100644 --- a/backend/services/fetchers/route_database.py +++ b/backend/services/fetchers/route_database.py @@ -30,8 +30,6 @@ _AIRPORTS_URL = "https://vrs-standing-data.adsb.lol/airports.csv.gz" _REFRESH_INTERVAL_S = 5 * 24 * 3600 _HTTP_TIMEOUT_S = 60 -from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT - _lock = threading.RLock() _routes_by_callsign: dict[str, dict[str, Any]] = {} _airports_by_icao: dict[str, dict[str, Any]] = {} diff --git a/backend/services/liveuamap_scraper.py b/backend/services/liveuamap_scraper.py index d9891fd..541061b 100644 --- a/backend/services/liveuamap_scraper.py +++ b/backend/services/liveuamap_scraper.py @@ -27,8 +27,15 @@ def fetch_liveuamap(): browser = p.chromium.launch( headless=True, args=["--disable-blink-features=AutomationControlled"] ) + from services.network_utils import outbound_user_agent + + # Per-install handle (no shared Shadowbroker product token). Stealth remains + # for Turnstile; see docs/OUTBOUND_DATA.md #348. + playwright_ua = ( + f"Mozilla/5.0 (compatible; {outbound_user_agent('liveuamap')})" + ) context = browser.new_context( - user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + user_agent=playwright_ua, viewport={"width": 1920, "height": 1080}, color_scheme="dark", ) diff --git a/backend/services/liveuamap_settings.py b/backend/services/liveuamap_settings.py new file mode 100644 index 0000000..5b3d9ec --- /dev/null +++ b/backend/services/liveuamap_settings.py @@ -0,0 +1,73 @@ +"""LiveUAMap Playwright scraper opt-in (#348) β€” UI consent on Windows.""" + +from __future__ import annotations + +import json +import logging +import os +import threading +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +_OPT_IN_FILE = Path(__file__).resolve().parent.parent / "data" / "liveuamap_scraper_opt_in.json" +_OPT_IN_LOCK = threading.Lock() + + +def _env_flag(name: str) -> str: + return str(os.getenv(name, "")).strip().lower() + + +def liveuamap_requires_ui_opt_in() -> bool: + """Windows local installs need explicit consent before Playwright contacts LiveUAMap.""" + return os.name == "nt" + + +def get_liveuamap_ui_opt_in() -> bool: + if not _OPT_IN_FILE.exists(): + return False + try: + payload = json.loads(_OPT_IN_FILE.read_text(encoding="utf-8")) + return bool(payload.get("opted_in")) + except (OSError, json.JSONDecodeError, TypeError) as e: + logger.warning("LiveUAMap opt-in file unreadable: %s", e) + return False + + +def set_liveuamap_ui_opt_in(opted_in: bool) -> None: + _OPT_IN_FILE.parent.mkdir(parents=True, exist_ok=True) + with _OPT_IN_LOCK: + _OPT_IN_FILE.write_text( + json.dumps({"opted_in": bool(opted_in)}, indent=2), + encoding="utf-8", + ) + + +def liveuamap_scraper_enabled() -> bool: + """Whether the Playwright LiveUAMap scraper may run on this backend.""" + setting = _env_flag("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER") + if setting in {"1", "true", "yes", "on"}: + return True + if setting in {"0", "false", "no", "off"}: + return False + if not liveuamap_requires_ui_opt_in(): + return True + return get_liveuamap_ui_opt_in() + + +def liveuamap_scraper_status() -> dict[str, Any]: + setting = _env_flag("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER") + env_override = None + if setting in {"1", "true", "yes", "on"}: + env_override = "on" + elif setting in {"0", "false", "no", "off"}: + env_override = "off" + ui_opted_in = get_liveuamap_ui_opt_in() + requires = liveuamap_requires_ui_opt_in() + return { + "platform_requires_opt_in": requires, + "ui_opted_in": ui_opted_in, + "scraper_enabled": liveuamap_scraper_enabled(), + "env_override": env_override, + } diff --git a/backend/services/mesh/mesh_wormhole_prekey.py b/backend/services/mesh/mesh_wormhole_prekey.py index 887a2fe..ad63358 100644 --- a/backend/services/mesh/mesh_wormhole_prekey.py +++ b/backend/services/mesh/mesh_wormhole_prekey.py @@ -234,12 +234,12 @@ def _fetch_dm_prekey_bundle_from_public_lookup(lookup_token: str) -> dict[str, A # Generic UA: any peer-facing crypto request should not carry a # fork-specific identifier β€” that turns prekey lookups into a # software-fingerprinting beacon. - from services.network_utils import DEFAULT_USER_AGENT + from services.network_utils import default_user_agent request = urllib.request.Request( f"{normalized_peer_url}/api/mesh/dm/prekey-bundle?{encoded}", headers={ "Accept": "application/json", - "User-Agent": DEFAULT_USER_AGENT, + "User-Agent": default_user_agent(), }, method="GET", ) diff --git a/backend/services/network_utils.py b/backend/services/network_utils.py index e587221..97c7ec1 100644 --- a/backend/services/network_utils.py +++ b/backend/services/network_utils.py @@ -34,9 +34,9 @@ _session.mount("http://", HTTPAdapter(max_retries=_retry, pool_maxsize=10)) # upstream's only recourse was to block "Shadowbroker" as a whole β€” which # would take out every other install too. # -# Fix: give each install a stable pseudonymous handle and include it in -# the User-Agent. Now an upstream can rate-limit or block the offending -# operator without affecting anyone else. +# Fix: give each install a stable pseudonymous handle used as the entire +# User-Agent product token (no shared "Shadowbroker" label). Upstreams see +# ``operator-7f3a92`` (or ``OPERATOR_HANDLE``), not one monolithic app name. # # The handle: # @@ -51,7 +51,6 @@ _session.mount("http://", HTTPAdapter(max_retries=_retry, pool_maxsize=10)) # - Is NEVER mixed into mesh / Wormhole / Infonet identity. This layer is # strictly for public third-party API attribution. -_SHADOWBROKER_VERSION = "0.9" _OPERATOR_HANDLE_FILE = ( Path(__file__).parent.parent / "data" / "operator_handle.json" ) @@ -175,41 +174,21 @@ def _normalize_handle(raw: str) -> str: return safe[:48] if safe else "anonymous" -_CONTACT_URL = "https://github.com/BigBodyCobain/Shadowbroker/issues" - - def outbound_user_agent(purpose: str = "") -> str: """Build a User-Agent for an outbound third-party HTTP request. - Returns something like:: + Returns the per-install handle only, e.g. ``operator-7f3a92`` or + ``operator-7f3a92 (purpose: wikipedia)``. No shared project name β€” so + upstream abuse teams cannot block every install with one ``Shadowbroker`` + rule. - Shadowbroker/0.9 (operator: operator-7f3a92; purpose: wikipedia; - +https://github.com/BigBodyCobain/Shadowbroker/issues) - - The ``purpose`` is optional but recommended β€” it tells the upstream - what feature of ours is making the call (``wikipedia``, ``openmhz``, - ``nominatim``, etc.), which makes their logs and our complaints - actionable. - - Every outbound call in the backend that previously sent a custom - User-Agent should call this helper instead. Centralizing here means: - - one place to change the contact URL, - - one place to bump the version on release, - - one place a Wikimedia / OpenMHz operator can reach to ask for - the project to back off, with a per-install handle so they can - target the specific install instead of the project as a whole. + Set ``SHADOWBROKER_USER_AGENT`` to override the entire string if needed. """ handle = get_operator_handle() if purpose: purpose_clean = _normalize_handle(purpose) - return ( - f"Shadowbroker/{_SHADOWBROKER_VERSION} " - f"(operator: {handle}; purpose: {purpose_clean}; +{_CONTACT_URL})" - ) - return ( - f"Shadowbroker/{_SHADOWBROKER_VERSION} " - f"(operator: {handle}; +{_CONTACT_URL})" - ) + return f"{handle} (purpose: {purpose_clean})" + return handle def _reset_operator_handle_cache_for_tests() -> None: @@ -220,19 +199,13 @@ def _reset_operator_handle_cache_for_tests() -> None: _OPERATOR_HANDLE_CACHE = "" -# Default outbound User-Agent. Retained for backwards compatibility with -# call sites that haven't been migrated to ``outbound_user_agent()`` yet. -# Operators who want full per-install attribution should set the -# ``OPERATOR_HANDLE`` setting and migrate call sites incrementally. -# -# Operators who run a public-facing relay can also override the whole UA -# string via the ``SHADOWBROKER_USER_AGENT`` env var. That override -# completely bypasses the per-operator helper; only use it if you know -# what you're doing. -DEFAULT_USER_AGENT = os.environ.get( - "SHADOWBROKER_USER_AGENT", - f"Shadowbroker/{_SHADOWBROKER_VERSION}", -) +def default_user_agent() -> str: + """Default User-Agent for ``fetch_with_curl`` and legacy call sites.""" + custom = (os.environ.get("SHADOWBROKER_USER_AGENT") or "").strip() + if custom: + return custom + return outbound_user_agent() + # Find bash for curl fallback β€” Git bash's curl has the TLS features # needed to pass CDN fingerprint checks (brotli, zstd, libpsl) @@ -288,7 +261,7 @@ def fetch_with_curl(url, method="GET", json_data=None, timeout=15, headers=None, both Python requests and the barebones Windows system curl. """ default_headers = { - "User-Agent": DEFAULT_USER_AGENT, + "User-Agent": default_user_agent(), } if headers: default_headers.update(headers) diff --git a/backend/tests/test_liveuamap_scraper_opt_in.py b/backend/tests/test_liveuamap_scraper_opt_in.py new file mode 100644 index 0000000..762a346 --- /dev/null +++ b/backend/tests/test_liveuamap_scraper_opt_in.py @@ -0,0 +1,45 @@ +"""LiveUAMap scraper UI opt-in on Windows (#348).""" +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from services import liveuamap_settings as settings + + +@pytest.fixture +def opt_in_file(tmp_path, monkeypatch): + path = tmp_path / "liveuamap_scraper_opt_in.json" + monkeypatch.setattr(settings, "_OPT_IN_FILE", path) + return path + + +def test_windows_defaults_off_without_opt_in(monkeypatch, opt_in_file): + monkeypatch.setattr(settings.os, "name", "nt") + monkeypatch.delenv("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER", raising=False) + assert settings.liveuamap_scraper_enabled() is False + assert settings.liveuamap_requires_ui_opt_in() is True + + +def test_windows_opt_in_enables_scraper(monkeypatch, opt_in_file): + monkeypatch.setattr(settings.os, "name", "nt") + monkeypatch.delenv("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER", raising=False) + settings.set_liveuamap_ui_opt_in(True) + assert settings.liveuamap_scraper_enabled() is True + assert json.loads(opt_in_file.read_text())["opted_in"] is True + + +def test_linux_enabled_without_opt_in(monkeypatch, opt_in_file): + monkeypatch.setattr(settings.os, "name", "posix") + monkeypatch.delenv("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER", raising=False) + assert settings.liveuamap_requires_ui_opt_in() is False + assert settings.liveuamap_scraper_enabled() is True + + +def test_env_force_off_overrides_ui_opt_in(monkeypatch, opt_in_file): + monkeypatch.setattr(settings.os, "name", "nt") + monkeypatch.setenv("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER", "false") + settings.set_liveuamap_ui_opt_in(True) + assert settings.liveuamap_scraper_enabled() is False diff --git a/backend/tests/test_meshtastic_callsign_optout.py b/backend/tests/test_meshtastic_callsign_optout.py index 2215cb0..e57770d 100644 --- a/backend/tests/test_meshtastic_callsign_optout.py +++ b/backend/tests/test_meshtastic_callsign_optout.py @@ -1,56 +1,27 @@ -"""Issue #203 (tg12): meshtastic_map.py was unconditionally including -``MESHTASTIC_OPERATOR_CALLSIGN`` in the outbound User-Agent header, -which contradicted the README's "no user data transmitted" claim. - -The fix preserves the existing default behavior (callsign sent β€” that's -what operators who configured the variable expected) but adds an -opt-out env var ``MESHTASTIC_SEND_CALLSIGN_HEADER=false`` for -privacy-conscious operators. -""" -import importlib -import sys +"""Issue #350: Meshtastic callsign in outbound UA is opt-in, not default.""" +import os import pytest -def _reload_meshtastic_module(): - """Reload meshtastic_map so settings are re-read on demand.""" - if "services.fetchers.meshtastic_map" in sys.modules: - del sys.modules["services.fetchers.meshtastic_map"] - return importlib.import_module("services.fetchers.meshtastic_map") +def _send_callsign_header_from_env() -> bool: + raw = str(os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "false")).strip().lower() + return raw in {"1", "true", "yes", "on"} -def test_default_behavior_includes_callsign(monkeypatch): - """Operators who set the callsign and don't change anything else - keep their existing behavior (callsign sent in UA).""" - # We test the UA construction logic by exercising the same branches - # the fetcher uses. Direct fetch isn't run because it makes a real - # network call β€” we just verify the env-var-driven decision. - import os +def test_default_does_not_send_callsign(monkeypatch): monkeypatch.setenv("MESHTASTIC_OPERATOR_CALLSIGN", "N0CALL") monkeypatch.delenv("MESHTASTIC_SEND_CALLSIGN_HEADER", raising=False) - - raw = str(os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")).strip().lower() - send_callsign_header = raw not in {"0", "false", "no", "off", ""} - assert send_callsign_header is True + assert _send_callsign_header_from_env() is False -def test_opt_out_suppresses_callsign(monkeypatch): - """Setting MESHTASTIC_SEND_CALLSIGN_HEADER=false suppresses the header.""" - import os +def test_opt_in_sends_callsign(monkeypatch): monkeypatch.setenv("MESHTASTIC_OPERATOR_CALLSIGN", "N0CALL") - monkeypatch.setenv("MESHTASTIC_SEND_CALLSIGN_HEADER", "false") - - raw = str(os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")).strip().lower() - send_callsign_header = raw not in {"0", "false", "no", "off", ""} - assert send_callsign_header is False + monkeypatch.setenv("MESHTASTIC_SEND_CALLSIGN_HEADER", "true") + assert _send_callsign_header_from_env() is True -def test_various_falsy_values_all_opt_out(monkeypatch): - """Common falsy strings should all suppress the callsign header.""" - import os - for falsy in ("0", "false", "FALSE", "no", "off"): +def test_various_falsy_values_do_not_opt_in(monkeypatch): + for falsy in ("0", "false", "FALSE", "no", "off", ""): monkeypatch.setenv("MESHTASTIC_SEND_CALLSIGN_HEADER", falsy) - raw = str(os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")).strip().lower() - send_callsign_header = raw not in {"0", "false", "no", "off", ""} - assert send_callsign_header is False, f"value {falsy!r} did not opt out" + assert _send_callsign_header_from_env() is False, f"value {falsy!r} should not opt in" diff --git a/backend/tests/test_per_operator_outbound_attribution.py b/backend/tests/test_per_operator_outbound_attribution.py index 90cd4d5..d7285b3 100644 --- a/backend/tests/test_per_operator_outbound_attribution.py +++ b/backend/tests/test_per_operator_outbound_attribution.py @@ -133,23 +133,19 @@ class TestOperatorHandleGeneration: class TestOutboundUserAgentString: - def test_includes_operator_handle(self, isolated_handle): + def test_ua_is_operator_handle(self, isolated_handle): ua = isolated_handle.outbound_user_agent() handle = isolated_handle.get_operator_handle() - assert f"operator: {handle}" in ua + assert ua == handle def test_includes_purpose_when_provided(self, isolated_handle): ua = isolated_handle.outbound_user_agent("wikipedia") - assert "purpose: wikipedia" in ua + handle = isolated_handle.get_operator_handle() + assert ua == f"{handle} (purpose: wikipedia)" - def test_includes_contact_path(self, isolated_handle): - ua = isolated_handle.outbound_user_agent() - assert "github.com" in ua.lower() - assert "shadowbroker" in ua.lower() - - def test_version_prefix(self, isolated_handle): - ua = isolated_handle.outbound_user_agent() - assert ua.startswith("Shadowbroker/") + def test_no_shadowbroker_product_token(self, isolated_handle): + ua = isolated_handle.outbound_user_agent("nominatim") + assert "shadowbroker" not in ua.lower() # --------------------------------------------------------------------------- @@ -181,8 +177,8 @@ class TestWikimediaCallsAreNowPerOperator: assert "Api-User-Agent" in headers handle = isolated_handle.get_operator_handle() for header_value in (headers["User-Agent"], headers["Api-User-Agent"]): - assert f"operator: {handle}" in header_value, ( - f"Wikimedia UA must include the per-operator handle; got {header_value!r}" + assert header_value.startswith(handle), ( + f"Wikimedia UA must be the per-operator handle; got {header_value!r}" ) def test_wikipedia_summary_uses_per_operator_ua(self, isolated_handle, monkeypatch): @@ -211,7 +207,8 @@ class TestWikimediaCallsAreNowPerOperator: assert wikipedia_hits, "Wikipedia summary fetch was not called" for _url, headers in wikipedia_hits: handle = isolated_handle.get_operator_handle() - assert f"operator: {handle}" in headers.get("User-Agent", "") + ua = headers.get("User-Agent", "") + assert ua.startswith(handle), f"Wikipedia UA must be the operator handle; got {ua!r}" # --------------------------------------------------------------------------- @@ -233,6 +230,7 @@ class TestNoMonsterUserAgentRemains: """ BANNED_LITERALS = ( + "Shadowbroker/", "ShadowBroker-OSINT/1.0", "ShadowBroker-OSINT/0.9", "ShadowBroker-FeedIngester/1.0", diff --git a/docs/OUTBOUND_DATA.md b/docs/OUTBOUND_DATA.md index df78fee..10309d7 100644 --- a/docs/OUTBOUND_DATA.md +++ b/docs/OUTBOUND_DATA.md @@ -1,43 +1,111 @@ # Outbound data and third-party exposure -Shadowbroker is **self-hosted**: each install uses its own backend egress IP (and optional `OPERATOR_HANDLE` in `User-Agent`). This documents intentional third-party contact for audit issues #348–#366. +Shadowbroker is **self-hosted**: each install uses its own backend egress IP. This document is the operator-facing record for GitHub audit issues **#348–#366** (tg12): what contacts third parties, why, and how to opt out without losing unrelated features. ## Architecture | Path | Who calls third parties | |------|-------------------------| -| UI β†’ `/api/*` β†’ fetchers | **Backend** | -| Map basemap tiles/fonts | **Browser** (CARTO, demotiles.maplibre.org) | -| CCTV proxy | **Backend** (with upstream-required `Referer` / `Origin`) | +| Map UI β†’ `/api/*` β†’ fetchers | **This install’s backend** | +| Basemap tiles / fonts | **Operator’s browser** (CARTO, demotiles.maplibre.org) | +| CCTV still/video proxy | **Backend** (Referer/Origin set per agency β€” see #349) | + +--- + +## Issue disposition summary + +| Issue | Status | Approach | +|-------|--------|----------| +| **#351** | Fixed | Region dossier via backend proxy | +| **#352** | Fixed | Geocode via `/api/geocode` only | +| **#360** | Fixed | Wikipedia/Wikidata via backend | +| **#362** | Fixed | `DEEPSTATE_MIRROR_COMMIT` optional pin | +| **#363** | Fixed | Madrid KML HTTPS-first | +| **#364** | Fixed | KiwiSDR HTTPS-first + validation | +| **#348** | Accepted + gated | Windows UI opt-in; env override; stealth documented | +| **#349** | Accepted + documented | Agency-required Referer on backend proxy only | +| **#350** | Mitigated | Callsign in UA **off by default**; opt-in `MESHTASTIC_SEND_CALLSIGN_HEADER=true` | +| **#354** | Accepted + documented | Default basemap CDN; optional self-hosted tiles | +| **#361** | Mitigated | UA is **install handle only** (`operator-…`), not shared `Shadowbroker/` token | +| **#366** | Accepted + documented | Honest per-install scrape; feature degrades if blocked | + +--- + +## Per-install User-Agent (#361) + +- **Code:** `backend/services/network_utils.py` β€” `outbound_user_agent()`, `OPERATOR_HANDLE` +- **Sent:** `operator-7f3a92` or `your-handle (purpose: nominatim)` β€” **no** shared app product name +- **Why:** Upstreams can rate-limit **one install**; a block on `operator-abc123` does not require blocking every Shadowbroker user +- **Override:** `SHADOWBROKER_USER_AGENT` replaces the entire string +- **Note:** The same handle across Wikipedia, Broadcastify, etc. still correlates **your** traffic across those sites β€” that is intentional per-install attribution, not anonymity + +--- + +## LiveUAMap scraper (#348) + +- **Layer:** `global_incidents` (LiveUAMap map pins; **GDELT** text still loads without LiveUAMap) +- **Code:** `backend/services/liveuamap_scraper.py` (Playwright + stealth for Turnstile) +- **Windows:** Scraper **off** until you enable **Global Incidents** and confirm the UI dialog β†’ `backend/data/liveuamap_scraper_opt_in.json` +- **Linux/macOS:** Scraper runs when the layer is on (unless env forces off) +- **API:** `GET /api/liveuamap/scraper-status`, `POST /api/liveuamap/scraper-opt-in` +- **Env:** `SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=true|false` overrides UI on all platforms +- **Honesty:** Backend-only; no browser-direct LiveUAMap from end users. Stealth remains a functional tradeoff for Turnstile; disable layer or env if unacceptable + +--- + +## CCTV proxy Referer / Origin (#349) + +- **Code:** `backend/routers/cctv.py`, `backend/main.py` +- **Behavior:** Backend proxies streams and sets `Referer` / `Origin` each agency expects (e.g. `https://511ga.org/cctv`, `https://informo.madrid.es/`) +- **Exposure:** Agency sees **backend IP**, not each viewer’s browser +- **Not removed:** Without these headers, most public DOT/city feeds return 403 β€” this is not end-user browser impersonation, it is the same headers a normal browser session would send to play the feed + +--- + +## Meshtastic map callsign (#350) + +- **Layer:** `sigint_meshtastic` must be active for `fetch_meshtastic_nodes()` +- **Default:** `MESHTASTIC_SEND_CALLSIGN_HEADER=false` β€” callsign **not** sent to `meshtastic.liamcottle.net` unless you set `true` +- **Optional:** `MESHTASTIC_OPERATOR_CALLSIGN` for local display; header only when explicitly enabled + +--- + +## Basemap CDN (#354) + +- **Code:** `frontend/src/components/map/styles/mapStyles.ts`, `frontend/public/map-style.json` +- **Hosts:** `*.basemaps.cartocdn.com`, `demotiles.maplibre.org` +- **Exposure:** **Browser** loads tiles (client IP + pan/zoom), not the backend +- **Mitigation:** Self-host raster tiles and point MapLibre `sources` at your tile server (operator choice; not required for core features) + +--- + +## Broadcastify top feeds (#366) + +- **Code:** `backend/services/radio_intercept.py` +- **Behavior:** Backend fetches `https://www.broadcastify.com/listen/top` with per-install handle UA; parses public HTML for feed metadata and CDN stream URLs +- **Exposure:** Your backend IP; 5-minute cache +- **If blocked:** Panel shows empty list β€” feature not removed from the app +- **Not:** Fake Chrome UA or cloudscraper bypass (removed in Round 7a) + +--- ## Ukraine frontline mirror (#362) -- **Layer:** `ukraine_frontline` β†’ `frontlines` on the map (DeepStateMap polygons). **Not** UAP (`uap_sightings` / NUFORC). -- **Code:** `backend/services/geopolitics.py` -- **Default:** `cyterat/deepstate-map-data` @ `main`, latest `data/deepstatemap_data_*.geojson` -- **Pin:** `DEEPSTATE_MIRROR_COMMIT=` β€” immutable Git snapshot; bump SHA when you want newer lines -- **Optional:** `DEEPSTATE_MIRROR_REPO=owner/repo` +- **Layer:** `ukraine_frontline` / `frontlines` +- **Pin:** `DEEPSTATE_MIRROR_COMMIT`, optional `DEEPSTATE_MIRROR_REPO` -## Madrid CCTV (#363) +## Madrid CCTV (#363) / KiwiSDR (#364) -- **Ingest:** HTTPS-first KML on `datos.madrid.es` (catalog only); HTTP fallback if needed -- **Feeds:** Still images from URLs inside the KML (`informo.madrid.es`, etc.), proxied with `Referer: https://informo.madrid.es/` β€” unchanged by KML transport +- Madrid: HTTPS-first KML catalog; image URLs unchanged +- KiwiSDR: HTTPS-first directory fetch; shape validation + bundled fallback -## KiwiSDR (#364) - -- HTTPS first, then HTTP; shape validation + bundled `backend/data/kiwisdr_directory.json` - -## Other documented exposures - -- **#354 Basemap:** browser β†’ `*.basemaps.cartocdn.com`, `demotiles.maplibre.org` -- **#349 CCTV Referer:** required for many DOT/city streams; backend proxy only -- **#361 Operator UA:** `OPERATOR_HANDLE` / `outbound_user_agent()` per install -- **#366 Broadcastify:** backend scrape with honest UA -- **#348 LiveUAMap:** `SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER` (default on Linux, off Windows) +--- ## Operator checklist -1. Set `OPERATOR_HANDLE` if you want a recognizable contact on upstream logs. -2. Pin `DEEPSTATE_MIRROR_COMMIT` after reviewing a mirror commit (see `backend/.env.example`). -3. Set `SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=false` to disable LiveUAMap contact. -4. Self-host map tiles if basemap CDN exposure matters. +1. Set `OPERATOR_HANDLE` if you want a recognizable name on upstream logs. +2. Pin `DEEPSTATE_MIRROR_COMMIT` for reproducible frontlines (optional). +3. Windows: enable Global Incidents in UI only if you accept LiveUAMap server contact. +4. Set `SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=false` to forbid LiveUAMap entirely. +5. Set `MESHTASTIC_SEND_CALLSIGN_HEADER=true` only if you want callsign sent upstream. +6. Self-host map tiles if basemap CDN exposure matters. diff --git a/frontend/src/components/WorldviewLeftPanel.tsx b/frontend/src/components/WorldviewLeftPanel.tsx index 0a9d050..2aa8e4b 100644 --- a/frontend/src/components/WorldviewLeftPanel.tsx +++ b/frontend/src/components/WorldviewLeftPanel.tsx @@ -45,6 +45,8 @@ import { MapPin, } from 'lucide-react'; import { API_BASE } from '@/lib/api'; +import { useLiveUamapScraperOptIn } from '@/hooks/useLiveUamapScraperOptIn'; +import ConfirmDialog from '@/components/ui/ConfirmDialog'; import { onTileLoadingChange, resetTileLoading } from '@/lib/sentinelHub'; import packageJson from '../../package.json'; import { useTheme } from '@/lib/ThemeContext'; @@ -702,6 +704,22 @@ const WorldviewLeftPanel = React.memo(function WorldviewLeftPanel({ const [sarModalOpen, setSarModalOpen] = useState(false); const [sarPendingEnable, setSarPendingEnable] = useState(false); + const [liveuamapModalOpen, setLiveuamapModalOpen] = useState(false); + const [liveuamapPendingEnable, setLiveuamapPendingEnable] = useState<(() => void) | null>(null); + const { needsConsentBeforeEnable, confirmOptIn } = useLiveUamapScraperOptIn(); + + const withGlobalIncidentsConsent = useCallback( + (layerId: string, turningOn: boolean, apply: () => void) => { + if (needsConsentBeforeEnable(layerId, turningOn)) { + setLiveuamapPendingEnable(() => apply); + setLiveuamapModalOpen(true); + return; + } + apply(); + }, + [needsConsentBeforeEnable], + ); + // Auto-detect: if the backend already has Mode B creds configured // (via env or a previous runtime save), promote the stored choice to // 'b_active' without prompting. If it flips back to off, reset so the @@ -1401,13 +1419,20 @@ const WorldviewLeftPanel = React.memo(function WorldviewLeftPanel({ const allOn = Object.entries(activeLayers) .filter(([k]) => !excluded.has(k)) .every(([, v]) => v); - setActiveLayers((prev: ActiveLayers) => { - const next = { ...prev } as ActiveLayers; - for (const k of Object.keys(prev) as Array) { - next[k] = excluded.has(k) ? prev[k] : !allOn; - } - return next; - }); + const enableAll = () => { + setActiveLayers((prev: ActiveLayers) => { + const next = { ...prev } as ActiveLayers; + for (const k of Object.keys(prev) as Array) { + next[k] = excluded.has(k) ? prev[k] : !allOn; + } + return next; + }); + }; + if (!allOn) { + withGlobalIncidentsConsent('global_incidents', true, enableAll); + } else { + enableAll(); + } }} > {Object.entries(activeLayers) @@ -1595,13 +1620,23 @@ const WorldviewLeftPanel = React.memo(function WorldviewLeftPanel({ : 'rgb(100 116 139 / 0.3)', }} onClick={() => { - setActiveLayers((prev: ActiveLayers) => { - const next = { ...prev } as ActiveLayers; - for (const id of sectionLayerIds as Array) { - next[id] = !allOn; - } - return next; - }); + const toggleSection = () => { + setActiveLayers((prev: ActiveLayers) => { + const next = { ...prev } as ActiveLayers; + for (const id of sectionLayerIds as Array) { + next[id] = !allOn; + } + return next; + }); + }; + if ( + !allOn && + (sectionLayerIds as string[]).includes('global_incidents') + ) { + withGlobalIncidentsConsent('global_incidents', true, toggleSection); + } else { + toggleSection(); + } }} title={ allOn ? `Disable all ${section.label}` : `Enable all ${section.label}` @@ -1647,10 +1682,12 @@ const WorldviewLeftPanel = React.memo(function WorldviewLeftPanel({ setSarModalOpen(true); return; } - setActiveLayers((prev: ActiveLayers) => ({ - ...prev, - [layer.id]: !active, - })); + withGlobalIncidentsConsent(layer.id, !active, () => { + setActiveLayers((prev: ActiveLayers) => ({ + ...prev, + [layer.id]: !active, + })); + }); }} >
@@ -1953,6 +1990,31 @@ const WorldviewLeftPanel = React.memo(function WorldviewLeftPanel({ }} /> )} + { + setLiveuamapModalOpen(false); + setLiveuamapPendingEnable(null); + }} + onConfirm={() => { + void (async () => { + try { + await confirmOptIn(); + liveuamapPendingEnable?.(); + } catch (e) { + console.warn('LiveUAMap opt-in failed:', e); + } finally { + setLiveuamapModalOpen(false); + setLiveuamapPendingEnable(null); + } + })(); + }} + /> ); }); diff --git a/frontend/src/hooks/useLiveUamapScraperOptIn.ts b/frontend/src/hooks/useLiveUamapScraperOptIn.ts new file mode 100644 index 0000000..257f582 --- /dev/null +++ b/frontend/src/hooks/useLiveUamapScraperOptIn.ts @@ -0,0 +1,61 @@ +'use client'; + +import { useCallback, useEffect, useState } from 'react'; +import { API_BASE } from '@/lib/api'; + +export type LiveUamapScraperStatus = { + platform_requires_opt_in: boolean; + ui_opted_in: boolean; + scraper_enabled: boolean; + env_override: 'on' | 'off' | null; +}; + +export function useLiveUamapScraperOptIn(enabled = true) { + const [status, setStatus] = useState(null); + + const refreshStatus = useCallback(async () => { + try { + const res = await fetch(`${API_BASE}/api/liveuamap/scraper-status`); + if (!res.ok) return; + const body = (await res.json()) as LiveUamapScraperStatus; + setStatus(body); + } catch { + // Backend may still be starting. + } + }, []); + + useEffect(() => { + if (!enabled) return; + void refreshStatus(); + }, [enabled, refreshStatus]); + + const needsConsentBeforeEnable = useCallback( + (layerId: string, turningOn: boolean) => + layerId === 'global_incidents' && + turningOn && + Boolean(status?.platform_requires_opt_in) && + !status?.ui_opted_in, + [status], + ); + + const confirmOptIn = useCallback(async () => { + const res = await fetch(`${API_BASE}/api/liveuamap/scraper-opt-in`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ opted_in: true }), + }); + if (!res.ok) { + throw new Error(`LiveUAMap opt-in failed (${res.status})`); + } + const body = (await res.json()) as LiveUamapScraperStatus; + setStatus(body); + return body; + }, []); + + return { + status, + refreshStatus, + needsConsentBeforeEnable, + confirmOptIn, + }; +}