mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-05-27 09:32:28 +02:00
71a9d9e144
PR #227 hardened most Wormhole/Infonet control surfaces behind require_local_operator and made the CrowdThreat fetcher opt-in. An audit of the codebase against that PR's stated goals turned up four classes of gap that the original change missed: 1. Two operator-only endpoints were left unprotected: - POST /api/wormhole/join: calls bootstrap_wormhole_identity() and flips the node into Tor mode, exactly the surface #227 hardened on /api/wormhole/identity/bootstrap. - POST /api/sigint/transmit: relays APRS-IS packets over radio using operator-supplied credentials. Anything that reached the API could transmit on the operator's authority. Both now require_local_operator. test_control_surface_auth.py extended with regression coverage for both. 2. Five third-party fetchers were still default-on, phoning home to politically/commercially sensitive upstreams on every poll cycle: - fimi.py -> euvsdisinfo.eu -> FIMI_ENABLED - prediction_markets -> Polymarket + Kalshi -> PREDICTION_MARKETS_ENABLED - financial.py -> Finnhub / yfinance -> FINANCIAL_ENABLED or FINNHUB_API_KEY - nuforc_enrichment -> huggingface.co -> NUFORC_ENABLED - news.py -> configured RSS feeds -> NEWS_ENABLED (default on, kill switch) Same CrowdThreat-style pattern: explicit env-var opt-in, empty the data slot and mark_fresh when disabled. New regression test file test_third_party_fetchers_opt_in.py asserts each fetcher's network entry point is not called when its gate is off. 3. The outbound User-Agent leaked both the operator's personal email and a fork-specific GitHub URL on every fetcher request. Consolidated to a single DEFAULT_USER_AGENT in network_utils.py, project-generic by default (no contact info), overridable via SHADOWBROKER_USER_AGENT for operators who want to identify themselves (e.g. for Nominatim or weather.gov usage-policy compliance). Six call sites updated; the Nominatim-specific override is preserved. 4. The same generic UA now also flows through the peer prekey lookup in mesh_wormhole_prekey.py, so DM first-contact requests no longer identify the caller as a Shadowbroker fork to the peer being queried. .env.example updated to document all new opt-in env vars. Tests: backend/tests/test_control_surface_auth.py (extended), backend/tests/test_crowdthreat_opt_in.py (unchanged, still passes), backend/tests/test_third_party_fetchers_opt_in.py (new, 7 tests). All 31 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
163 lines
5.3 KiB
Python
163 lines
5.3 KiB
Python
"""Static route + airport database loaded from vrs-standing-data.adsb.lol.
|
|
|
|
Replaces the per-batch /api/0/routeset POST with a single daily bulk download.
|
|
Routes change ~weekly when airlines update schedules, so a 24h refresh cadence
|
|
is far more than sufficient and removes ~all live-API pressure on adsb.lol.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
import gzip
|
|
import io
|
|
import logging
|
|
import threading
|
|
import time
|
|
from typing import Any
|
|
|
|
import requests
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_ROUTES_URL = "https://vrs-standing-data.adsb.lol/routes.csv.gz"
|
|
_AIRPORTS_URL = "https://vrs-standing-data.adsb.lol/airports.csv.gz"
|
|
_REFRESH_INTERVAL_S = 5 * 24 * 3600
|
|
_HTTP_TIMEOUT_S = 60
|
|
|
|
from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT
|
|
|
|
_lock = threading.RLock()
|
|
_routes_by_callsign: dict[str, dict[str, Any]] = {}
|
|
_airports_by_icao: dict[str, dict[str, Any]] = {}
|
|
_last_refresh = 0.0
|
|
_refresh_in_progress = False
|
|
|
|
|
|
def _fetch_csv_gz(url: str) -> list[dict[str, str]]:
|
|
response = requests.get(
|
|
url,
|
|
timeout=_HTTP_TIMEOUT_S,
|
|
headers={"User-Agent": _USER_AGENT, "Accept-Encoding": "gzip"},
|
|
)
|
|
response.raise_for_status()
|
|
text = gzip.decompress(response.content).decode("utf-8-sig")
|
|
return list(csv.DictReader(io.StringIO(text)))
|
|
|
|
|
|
def _build_route_index(rows: list[dict[str, str]]) -> dict[str, dict[str, Any]]:
|
|
index: dict[str, dict[str, Any]] = {}
|
|
for row in rows:
|
|
callsign = (row.get("Callsign") or "").strip().upper()
|
|
airport_codes = (row.get("AirportCodes") or "").strip()
|
|
if not callsign or not airport_codes:
|
|
continue
|
|
icaos = [c.strip() for c in airport_codes.split("-") if c.strip()]
|
|
if len(icaos) < 2:
|
|
continue
|
|
index[callsign] = {
|
|
"airline_code": (row.get("AirlineCode") or "").strip(),
|
|
"airport_codes": airport_codes,
|
|
"airport_icaos": icaos,
|
|
}
|
|
return index
|
|
|
|
|
|
def _build_airport_index(rows: list[dict[str, str]]) -> dict[str, dict[str, Any]]:
|
|
index: dict[str, dict[str, Any]] = {}
|
|
for row in rows:
|
|
icao = (row.get("ICAO") or "").strip().upper()
|
|
if not icao:
|
|
continue
|
|
try:
|
|
lat = float(row.get("Latitude") or 0)
|
|
lon = float(row.get("Longitude") or 0)
|
|
except (TypeError, ValueError):
|
|
continue
|
|
index[icao] = {
|
|
"name": (row.get("Name") or "").strip(),
|
|
"iata": (row.get("IATA") or "").strip(),
|
|
"country": (row.get("CountryISO2") or "").strip(),
|
|
"lat": lat,
|
|
"lon": lon,
|
|
}
|
|
return index
|
|
|
|
|
|
def refresh_route_database(force: bool = False) -> bool:
|
|
"""Pull routes.csv.gz + airports.csv.gz and rebuild the in-memory indexes.
|
|
|
|
Returns True if a refresh was performed (success or attempted), False if
|
|
skipped because the cache is still fresh or another refresh is in flight.
|
|
"""
|
|
global _last_refresh, _refresh_in_progress
|
|
|
|
now = time.time()
|
|
with _lock:
|
|
if _refresh_in_progress:
|
|
return False
|
|
if not force and (now - _last_refresh) < _REFRESH_INTERVAL_S and _routes_by_callsign:
|
|
return False
|
|
_refresh_in_progress = True
|
|
|
|
try:
|
|
started = time.time()
|
|
airport_rows = _fetch_csv_gz(_AIRPORTS_URL)
|
|
route_rows = _fetch_csv_gz(_ROUTES_URL)
|
|
airports = _build_airport_index(airport_rows)
|
|
routes = _build_route_index(route_rows)
|
|
with _lock:
|
|
_airports_by_icao.clear()
|
|
_airports_by_icao.update(airports)
|
|
_routes_by_callsign.clear()
|
|
_routes_by_callsign.update(routes)
|
|
_last_refresh = time.time()
|
|
logger.info(
|
|
"route database refreshed in %.1fs: %d routes, %d airports",
|
|
time.time() - started,
|
|
len(routes),
|
|
len(airports),
|
|
)
|
|
return True
|
|
except (requests.RequestException, OSError, ValueError) as exc:
|
|
logger.warning("route database refresh failed: %s", exc)
|
|
return True
|
|
finally:
|
|
with _lock:
|
|
_refresh_in_progress = False
|
|
|
|
|
|
def lookup_route(callsign: str) -> dict[str, Any] | None:
|
|
"""Resolve a callsign to {orig_name, dest_name, orig_loc, dest_loc} or None.
|
|
|
|
Matches the shape produced by the legacy fetch_routes_background cache so
|
|
the caller in flights.py can be a drop-in replacement.
|
|
"""
|
|
key = (callsign or "").strip().upper()
|
|
if not key:
|
|
return None
|
|
with _lock:
|
|
route = _routes_by_callsign.get(key)
|
|
if not route:
|
|
return None
|
|
icaos = route["airport_icaos"]
|
|
orig = _airports_by_icao.get(icaos[0].upper())
|
|
dest = _airports_by_icao.get(icaos[-1].upper())
|
|
if not orig or not dest:
|
|
return None
|
|
return {
|
|
"orig_name": f"{orig['iata']}: {orig['name']}" if orig["iata"] else orig["name"],
|
|
"dest_name": f"{dest['iata']}: {dest['name']}" if dest["iata"] else dest["name"],
|
|
"orig_loc": [orig["lon"], orig["lat"]],
|
|
"dest_loc": [dest["lon"], dest["lat"]],
|
|
}
|
|
|
|
|
|
def route_database_status() -> dict[str, Any]:
|
|
with _lock:
|
|
return {
|
|
"last_refresh": _last_refresh,
|
|
"routes": len(_routes_by_callsign),
|
|
"airports": len(_airports_by_icao),
|
|
"in_progress": _refresh_in_progress,
|
|
}
|