From 35cd4e4c715880c0a97f4352888d0fe136aa1643 Mon Sep 17 00:00:00 2001 From: Shadowbroker <43977454+BigBodyCobain@users.noreply.github.com> Date: Fri, 22 May 2026 00:51:54 -0600 Subject: [PATCH 1/2] Fix #287: proxy-aware rate-limit key (#295) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by @tg12 in the external security/correctness audit. Before this change, backend/limiter.py was: from slowapi.util import get_remote_address limiter = Limiter(key_func=get_remote_address) get_remote_address only ever returns request.client.host — it does not look at X-Forwarded-For. Behind the bundled Next.js proxy (or any other reverse proxy), every connected operator's client.host is the frontend container's bridge IP, so @limiter.limit("120/minute") collapses into one shared bucket for everybody on the same backend. One heavy tab can starve every other operator on that node. This change swaps in shadowbroker_rate_limit_key, which: * Reads X-Forwarded-For ONLY when the immediate peer matches the SAME hostname-bound allowlist we use for Docker-bridge local-operator trust (auth._resolve_trusted_bridge_ips — fix #250). Default is `frontend,shadowbroker-frontend`, override via SHADOWBROKER_TRUSTED_FRONTEND_HOSTS. * Picks the FIRST entry in the XFF chain — that's the operator end, not the proxy end. * Falls back to request.client.host for any peer not on the allowlist. Direct hits, unrelated containers, and unknown hosts are bucketed exactly like before. * Falls back to request.client.host when the resolver itself raises (e.g. DNS down). XFF is never accepted on a peer we can't confirm is the trusted frontend — there is no way to spoof another operator's bucket from outside. No new env vars. No new operator config. Single-operator nodes are unaffected — same behaviour as before. The 120/minute and 60/minute windows on the existing endpoints are unchanged; only the KEY they bucket on changes. Tests cover: * Direct loopback → keys on peer (regression check vs. get_remote_address default). * Untrusted peer sending XFF → XFF ignored, keys on peer. * Trusted frontend peer with XFF → keys on first XFF entry. * First XFF entry picked from a multi-hop chain. * Trusted peer without XFF → falls back to peer IP. * Empty/whitespace XFF entries skipped. * Header lookup is case-insensitive. * Two operators behind same proxy → different keys (the whole point of the fix). * Spoof attempt from internet-facing untrusted IP can't steal the victim's bucket. * Resolver raising is treated as untrusted (fail-closed). * No-client request shape doesn't raise. Co-authored-by: BigBodyCobain --- backend/limiter.py | 106 ++++++++++- backend/tests/test_rate_limit_proxy_aware.py | 186 +++++++++++++++++++ 2 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 backend/tests/test_rate_limit_proxy_aware.py diff --git a/backend/limiter.py b/backend/limiter.py index 38404a8..a35e4c5 100644 --- a/backend/limiter.py +++ b/backend/limiter.py @@ -1,4 +1,108 @@ +"""Rate-limit key function for slowapi. + +Issue #287 (tg12): the previous implementation used +``slowapi.util.get_remote_address`` which only ever returns +``request.client.host``. Behind the bundled Next.js proxy (or any other +reverse proxy), every connected operator's ``client.host`` is the +frontend container's bridge IP. ``@limiter.limit("120/minute")`` then +collapses into one shared bucket for everybody on the same backend — +one heavy tab can starve every other operator on the node. + +This module replaces that key function with one that: + + * Reads ``X-Forwarded-For`` ONLY when the immediate peer is a trusted + frontend container (same allowlist used by the Docker bridge + local-operator trust path — see ``backend/auth.py`` ``#250``). + * Picks the FIRST entry in the XFF chain. That's the client end of + the proxy chain, which is the operator we want to bucket on. + * Falls back to ``request.client.host`` for any peer that isn't on + the trusted-frontend allowlist. Direct hits, unrelated containers, + and unknown hosts are bucketed exactly like before — there is no + way for an untrusted caller to spoof XFF and steal another + operator's rate-limit bucket. + +Single-operator nodes are unaffected: the frontend resolves to one IP, +that IP is on the trust list, the XFF header is read, and you get one +bucket per operator (i.e. you). +""" + +from __future__ import annotations + +from typing import Any + from slowapi import Limiter from slowapi.util import get_remote_address -limiter = Limiter(key_func=get_remote_address) + +def _client_host(request: Any) -> str: + """Return the immediate peer's IP, normalised to a lowercase string.""" + client = getattr(request, "client", None) + if client is None: + return "" + host = getattr(client, "host", "") or "" + return host.lower() + + +def _first_forwarded_for(value: str) -> str: + """Return the first non-empty entry from an ``X-Forwarded-For`` header. + + RFC 7239 / de-facto XFF format is ``client, proxy1, proxy2, …``. The + client end is what we want to bucket on. Empty parts (which appear + in some malformed headers) are skipped so we don't end up keying on + an empty string. + """ + for raw in value.split(","): + candidate = raw.strip() + if candidate: + return candidate.lower() + return "" + + +def _is_trusted_frontend_peer(host: str) -> bool: + """True iff ``host`` is one of the resolved trusted-frontend IPs. + + Imported lazily so this module stays usable in unit tests that + don't want to pull the whole auth module into scope. + """ + if not host: + return False + try: + from auth import _resolve_trusted_bridge_ips + except Exception: # pragma: no cover - defensive + return False + try: + trusted_ips = _resolve_trusted_bridge_ips() + except Exception: # pragma: no cover - defensive + return False + return host in trusted_ips + + +def shadowbroker_rate_limit_key(request: Any) -> str: + """slowapi key_func that is proxy-aware on trusted frontend peers only. + + Behaviour matrix: + + * Direct loopback / unknown peer → ``request.client.host`` + (identical to slowapi's default ``get_remote_address``). + * Peer is a trusted frontend container AND ``X-Forwarded-For`` is + present → first XFF entry (the actual operator). + * Peer is a trusted frontend container but no XFF → fall back to + ``request.client.host`` (the bridge IP). One shared bucket for + everyone in that case, same as before — but you only get there + if the trusted frontend forgot to forward XFF, which it won't. + """ + peer = _client_host(request) + if _is_trusted_frontend_peer(peer): + headers = getattr(request, "headers", None) + if headers is not None: + xff = headers.get("x-forwarded-for") or headers.get("X-Forwarded-For") + if xff: + first = _first_forwarded_for(xff) + if first: + return first + # Untrusted peer (or trusted peer without XFF): match the original + # get_remote_address behaviour byte-for-byte. + return get_remote_address(request) + + +limiter = Limiter(key_func=shadowbroker_rate_limit_key) diff --git a/backend/tests/test_rate_limit_proxy_aware.py b/backend/tests/test_rate_limit_proxy_aware.py new file mode 100644 index 0000000..a603316 --- /dev/null +++ b/backend/tests/test_rate_limit_proxy_aware.py @@ -0,0 +1,186 @@ +"""Tests for issue #287: proxy-aware slowapi key function. + +Contract: + * Untrusted peer → key is the peer IP (matches old get_remote_address). + * Trusted frontend peer with X-Forwarded-For → key is first XFF entry. + * Trusted frontend peer without X-Forwarded-For → key is the peer IP + (fail-soft: no behaviour change vs. before #287). + * XFF from an untrusted peer is IGNORED — there must be no way to + spoof another operator's bucket by sending XFF directly. + * The first XFF entry is used (not the last — that's the trusted + proxy talking to the backend, not the actual operator). +""" + +import pytest + + +class _FakeClient: + def __init__(self, host: str): + self.host = host + + +class _FakeRequest: + """Minimal slowapi-compatible request shim — has ``client`` and + ``headers`` attributes, which is all the key_func touches.""" + + def __init__(self, client_host: str, headers: dict | None = None): + self.client = _FakeClient(client_host) if client_host is not None else None + self.headers = dict(headers or {}) + # slowapi's get_remote_address also tries request.client; we + # exercise both branches via the same shim. + + +# ───────────────────────── untrusted peers ────────────────────────────── + + +class TestUntrustedPeer: + def test_direct_loopback_uses_client_host(self, monkeypatch): + """Direct hit from 127.0.0.1 — no XFF — keys on the peer IP.""" + from limiter import shadowbroker_rate_limit_key + # Make sure the trusted-frontend cache resolves to nothing relevant. + monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset()) + req = _FakeRequest("127.0.0.1") + assert shadowbroker_rate_limit_key(req) == "127.0.0.1" + + def test_xff_from_untrusted_peer_is_ignored(self, monkeypatch): + """A random caller sending X-Forwarded-For must NOT steal another + operator's bucket. The XFF is dropped on the floor.""" + from limiter import shadowbroker_rate_limit_key + # Trusted set deliberately does NOT include 1.2.3.4. + monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"})) + req = _FakeRequest("1.2.3.4", {"X-Forwarded-For": "9.9.9.9"}) + # Falls back to the peer IP, not 9.9.9.9. + assert shadowbroker_rate_limit_key(req) == "1.2.3.4" + + def test_unknown_host_with_xff_uses_peer_host(self, monkeypatch): + from limiter import shadowbroker_rate_limit_key + monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset()) + req = _FakeRequest("10.0.0.5", {"X-Forwarded-For": "1.1.1.1"}) + assert shadowbroker_rate_limit_key(req) == "10.0.0.5" + + +# ───────────────────────── trusted frontend peers ─────────────────────── + + +class TestTrustedFrontendPeer: + def test_trusted_peer_with_xff_uses_first_xff_entry(self, monkeypatch): + """When the immediate peer is the trusted frontend container and + XFF carries the operator's chain, we key on the operator.""" + from limiter import shadowbroker_rate_limit_key + monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"})) + req = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "203.0.113.7"}) + assert shadowbroker_rate_limit_key(req) == "203.0.113.7" + + def test_first_xff_entry_picked_in_chain(self, monkeypatch): + """`client, proxy1, proxy2` → we pick the client, not the proxies. + Picking the last entry would mean every operator behind the same + upstream gets bucketed together, which is the bug we're fixing.""" + from limiter import shadowbroker_rate_limit_key + monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"})) + req = _FakeRequest( + "172.20.0.5", + {"X-Forwarded-For": "203.0.113.7, 198.51.100.1, 10.0.0.1"}, + ) + assert shadowbroker_rate_limit_key(req) == "203.0.113.7" + + def test_trusted_peer_without_xff_falls_back_to_peer(self, monkeypatch): + """If the trusted frontend forgot to forward XFF (legacy clients, + broken deploys), don't crash — bucket on the bridge IP exactly + like the pre-#287 behaviour.""" + from limiter import shadowbroker_rate_limit_key + monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"})) + req = _FakeRequest("172.20.0.5", headers={}) + assert shadowbroker_rate_limit_key(req) == "172.20.0.5" + + def test_trusted_peer_with_empty_xff_falls_back(self, monkeypatch): + """``X-Forwarded-For: , ,`` → no usable entries → falls back.""" + from limiter import shadowbroker_rate_limit_key + monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"})) + req = _FakeRequest("172.20.0.5", {"X-Forwarded-For": " , , "}) + assert shadowbroker_rate_limit_key(req) == "172.20.0.5" + + def test_xff_header_case_insensitive(self, monkeypatch): + """HTTP header names are case-insensitive — slowapi normalises + but our shim doesn't, so we explicitly check both forms.""" + from limiter import shadowbroker_rate_limit_key + monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"})) + req = _FakeRequest("172.20.0.5", {"x-forwarded-for": "203.0.113.7"}) + assert shadowbroker_rate_limit_key(req) == "203.0.113.7" + + +# ───────────────────────── isolation guarantees ───────────────────────── + + +class TestIsolation: + def test_two_operators_behind_same_proxy_get_different_keys(self, monkeypatch): + """The whole reason this fix exists — two operators behind the + SAME proxy must end up in DIFFERENT buckets.""" + from limiter import shadowbroker_rate_limit_key + monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"})) + op_a = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "10.1.1.1"}) + op_b = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "10.1.1.2"}) + key_a = shadowbroker_rate_limit_key(op_a) + key_b = shadowbroker_rate_limit_key(op_b) + assert key_a != key_b + assert key_a == "10.1.1.1" + assert key_b == "10.1.1.2" + + def test_no_xff_spoof_from_outside(self, monkeypatch): + """If we ever expose the backend port directly to the internet, + an attacker MUST NOT be able to steal another operator's bucket + by sending their own XFF header.""" + from limiter import shadowbroker_rate_limit_key + # Trusted set is the frontend container IP; the attacker is on a + # different (untrusted) IP and tries to spoof a victim's IP. + monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"})) + attacker = _FakeRequest("203.0.113.66", {"X-Forwarded-For": "10.1.1.1"}) + victim_via_proxy = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "10.1.1.1"}) + assert shadowbroker_rate_limit_key(attacker) == "203.0.113.66" + assert shadowbroker_rate_limit_key(victim_via_proxy) == "10.1.1.1" + # The attacker burning their own bucket doesn't touch the victim's. + assert shadowbroker_rate_limit_key(attacker) != shadowbroker_rate_limit_key( + victim_via_proxy + ) + + def test_limiter_object_uses_proxy_aware_key(self): + """Smoke check that the module-level Limiter exports the new key + function rather than slowapi's default.""" + from limiter import limiter, shadowbroker_rate_limit_key + # slowapi stores it as ._key_func; we don't want to depend on + # that internal name, so just check the function is reachable. + assert callable(shadowbroker_rate_limit_key) + assert limiter is not None + + +# ───────────────────────── defensive corners ──────────────────────────── + + +class TestDefensive: + def test_no_client_object(self, monkeypatch): + """Some upstream middleware paths (websocket, ASGI lifespan) + produce requests with no ``client`` attribute — must not raise.""" + from limiter import shadowbroker_rate_limit_key + monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset()) + + class _NoClient: + def __init__(self): + self.client = None + self.headers = {} + + # slowapi's get_remote_address returns "127.0.0.1" as a default + # in this case, so we just ensure no exception escapes. + result = shadowbroker_rate_limit_key(_NoClient()) + assert isinstance(result, str) + + def test_resolver_raises_is_treated_as_untrusted(self, monkeypatch): + """If DNS blows up inside the trusted-bridge resolver, we MUST + fall back to peer IP — never accept XFF blindly.""" + from limiter import shadowbroker_rate_limit_key + + def _explode(): + raise RuntimeError("DNS down") + + monkeypatch.setattr("auth._resolve_trusted_bridge_ips", _explode) + req = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "9.9.9.9"}) + # XFF must be ignored when we can't confirm peer is trusted. + assert shadowbroker_rate_limit_key(req) == "172.20.0.5" From 19fb7f0b1ea831a195250c08a74229e8b46611c6 Mon Sep 17 00:00:00 2001 From: Shadowbroker <43977454+BigBodyCobain@users.noreply.github.com> Date: Fri, 22 May 2026 00:56:29 -0600 Subject: [PATCH 2/2] Fix #288: viewport-scoped live-data for heavy layers only (#294) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by @tg12 in the external security/correctness audit. Before this change, /api/live-data/{fast,slow} accepted s/w/n/e query params but their Query() descriptions explicitly said "(ignored)". The endpoints shipped the full in-memory world dataset on every poll: /api/live-data/fast → 16.88 MB /api/live-data/slow → 10.12 MB ── 27 MB per poll cycle, regardless of zoom For a node with N operators each polling at the steady 15s/120s cadence, this is hundreds of MB/minute of outbound traffic that never gets used — the GPU just culls everything outside the viewport client-side. On a Tor-bridged or LTE-backed node, that bandwidth bill is the actual cost. This change makes the existing s/w/n/e params honored — when all four bounds are supplied, the backend bbox-filters a curated set of heavy, density-driven, time-sensitive collections to that viewport (with the existing 20% padding from _bbox_filter): /fast: commercial_flights, military_flights, private_flights, private_jets, tracked_flights, ships, cctv, uavs, liveuamap, gps_jamming, sigint, trains /slow: gdelt, firms_fires, kiwisdr, scanners, psk_reporter Static reference layers (satellites, datacenters, military_bases, power_plants, satnogs, weather, news, stocks, etc.) deliberately STAY world-scale so panning never reveals an "empty world" of infrastructure. That preserves the no-hostile-UX feel of the existing dashboard. Behavior contract: * Without bbox params (or with a partial bbox), the response is byte-for-byte identical to the pre-#288 implementation. No behavior change for any existing caller that hasn't opted in. * World-scale bbox (lng_span >= 300 or lat_span >= 120) short-circuits filtering and shares the global ETag — zoomed-out operators all hit the same 304 cache exactly like before. * ETag now mixes a 1°-quantized bbox suffix when filtering engages, so two viewports never poison each other's 304 cache. Sub-degree pans land in the same ETag bucket (i.e. don't bust the cache on every mouse drag). Polling cadence, rate-limit windows, and the 304 short-circuit are all unchanged. Only the SIZE of the responses changes, and only when the caller opts in via bounds. Frontend wiring: useViewportBounds reuses the same coarsened/ expanded bounds it already computes for the AIS /api/viewport POST and pushes them into a new module-level liveDataViewport store. useDataPolling reads from that store via appendLiveDataBoundsParams when building each live-data URL. Tests cover: no-bbox → world data; bbox → heavy layers filtered; bbox → reference layers untouched; world-scale bbox → no filter; partial bbox → treated as no bbox; ETag changes with bbox; sub-degree pan → same ETag; 304 path works; antimeridian-crossing bbox handled. Co-authored-by: BigBodyCobain --- backend/routers/data.py | 115 +++++++- backend/tests/test_live_data_viewport_bbox.py | 273 ++++++++++++++++++ .../components/map/hooks/useViewportBounds.ts | 12 + frontend/src/hooks/useDataPolling.ts | 30 +- frontend/src/lib/liveDataViewport.ts | 84 ++++++ 5 files changed, 494 insertions(+), 20 deletions(-) create mode 100644 backend/tests/test_live_data_viewport_bbox.py create mode 100644 frontend/src/lib/liveDataViewport.ts diff --git a/backend/routers/data.py b/backend/routers/data.py index 8cfaff4..be883c4 100644 --- a/backend/routers/data.py +++ b/backend/routers/data.py @@ -98,6 +98,88 @@ def _current_etag(prefix: str = "") -> str: return f"{prefix}v{get_data_version()}-l{get_active_layers_version()}" +# ── Issue #288: viewport-aware payloads ───────────────────────────────────── +# Heavy, density-driven, time-sensitive layers that benefit from bbox +# filtering. Light reference layers (datacenters, military_bases, +# power_plants, satellites, weather, news, etc.) are intentionally NOT +# in these sets — they ship world-scale even when bounds are supplied so +# panning never reveals an "empty world" of static infrastructure. +# +# When the caller does NOT pass s/w/n/e, none of this runs and the response +# is byte-for-byte identical to the pre-#288 behavior. +_FAST_BBOX_HEAVY_KEYS: tuple[str, ...] = ( + "commercial_flights", + "military_flights", + "private_flights", + "private_jets", + "tracked_flights", + "ships", + "cctv", + "uavs", + "liveuamap", + "gps_jamming", + "sigint", + "trains", +) +_SLOW_BBOX_HEAVY_KEYS: tuple[str, ...] = ( + "gdelt", + "firms_fires", + "kiwisdr", + "scanners", + "psk_reporter", +) + + +def _has_full_bbox(s, w, n, e) -> bool: + return None not in (s, w, n, e) + + +def _bbox_etag_suffix(s, w, n, e) -> str: + """Quantize bbox to 1° before mixing into the ETag. + + The 20% padding inside _bbox_filter already absorbs sub-degree pans; + quantizing here means small mouse drags don't blow the ETag cache + on the client. Full-world bounds collapse to a single suffix. + """ + if not _has_full_bbox(s, w, n, e): + return "" + try: + ss = math.floor(float(s)) + ww = math.floor(float(w)) + nn = math.ceil(float(n)) + ee = math.ceil(float(e)) + except (TypeError, ValueError): + return "" + # If the requested window covers basically the whole world, treat it as + # "no bbox" for caching purposes so world-zoomed clients all hit the + # same ETag and benefit from the existing 304 path. + lat_span, lng_span = _bbox_spans(s, w, n, e) + if lng_span >= 300 or lat_span >= 120: + return "" + return f"|bbox={ss},{ww},{nn},{ee}" + + +def _apply_bbox_to_payload(payload: dict, heavy_keys: tuple[str, ...], + s: float, w: float, n: float, e: float) -> dict: + """In-place filter the heavy-key collections in *payload* to a viewport. + + Items without lat/lng are passed through (so e.g. summary blobs aren't + accidentally dropped). The existing _bbox_filter helper applies a 20% + pad and handles antimeridian crossings. + """ + lat_span, lng_span = _bbox_spans(s, w, n, e) + # World-scale request → skip filtering entirely. Spares the CPU and + # guarantees the response matches the no-params shape. + if lng_span >= 300 or lat_span >= 120: + return payload + for key in heavy_keys: + items = payload.get(key) + if not isinstance(items, list) or not items: + continue + payload[key] = _bbox_filter(items, s, w, n, e) + return payload + + def _json_safe(value): if isinstance(value, float): return value if math.isfinite(value) else None @@ -479,13 +561,14 @@ async def bootstrap_critical(request: Request): @limiter.limit("120/minute") async def live_data_fast( request: Request, - s: float = Query(None, description="South bound (ignored)", ge=-90, le=90), - w: float = Query(None, description="West bound (ignored)", ge=-180, le=180), - n: float = Query(None, description="North bound (ignored)", ge=-90, le=90), - e: float = Query(None, description="East bound (ignored)", ge=-180, le=180), + s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (vessels, aircraft, sigint, CCTV, …) are filtered to this viewport with 20% padding. Static reference layers (satellites, etc.) always ship world-scale.", ge=-90, le=90), + w: float = Query(None, description="West bound (see s)", ge=-180, le=180), + n: float = Query(None, description="North bound (see s)", ge=-90, le=90), + e: float = Query(None, description="East bound (see s)", ge=-180, le=180), initial: bool = Query(False, description="Return a capped startup payload for first paint"), ): - etag = _current_etag(prefix="fast|initial|" if initial else "fast|full|") + bbox_suffix = _bbox_etag_suffix(s, w, n, e) + etag = _current_etag(prefix=("fast|initial|" if initial else "fast|full|") + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else "")) if request.headers.get("if-none-match") == etag: return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"}) from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot) @@ -525,6 +608,11 @@ async def live_data_fast( payload = _cap_fast_startup_payload(payload) else: payload = _cap_fast_dashboard_payload(payload) + # Issue #288: bbox filter heavy/dense layers only when all four bounds + # are supplied. Without bounds, behaviour is byte-for-byte identical + # to the pre-#288 implementation. + if _has_full_bbox(s, w, n, e): + payload = _apply_bbox_to_payload(payload, _FAST_BBOX_HEAVY_KEYS, s, w, n, e) return Response(content=orjson.dumps(_sanitize_payload(payload)), media_type="application/json", headers={"ETag": etag, "Cache-Control": "no-cache"}) @@ -533,12 +621,13 @@ async def live_data_fast( @limiter.limit("60/minute") async def live_data_slow( request: Request, - s: float = Query(None, description="South bound (ignored)", ge=-90, le=90), - w: float = Query(None, description="West bound (ignored)", ge=-180, le=180), - n: float = Query(None, description="North bound (ignored)", ge=-90, le=90), - e: float = Query(None, description="East bound (ignored)", ge=-180, le=180), + s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (gdelt, firms_fires, kiwisdr, scanners, psk_reporter) are filtered to this viewport with 20% padding. Static reference layers (datacenters, military bases, power plants, weather, news, …) always ship world-scale.", ge=-90, le=90), + w: float = Query(None, description="West bound (see s)", ge=-180, le=180), + n: float = Query(None, description="North bound (see s)", ge=-90, le=90), + e: float = Query(None, description="East bound (see s)", ge=-180, le=180), ): - etag = _current_etag(prefix="slow|full|") + bbox_suffix = _bbox_etag_suffix(s, w, n, e) + etag = _current_etag(prefix="slow|full|" + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else "")) if request.headers.get("if-none-match") == etag: return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"}) from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot) @@ -592,6 +681,12 @@ async def live_data_slow( "crowdthreat": (d.get("crowdthreat") or []) if active_layers.get("crowdthreat", True) else [], "freshness": freshness, } + # Issue #288: bbox filter heavy/dense layers only when all four bounds + # are supplied. Static reference layers (datacenters, military bases, + # power_plants, etc.) deliberately stay world-scale so panning never + # hides the infrastructure overlay the operator already has on screen. + if _has_full_bbox(s, w, n, e): + payload = _apply_bbox_to_payload(payload, _SLOW_BBOX_HEAVY_KEYS, s, w, n, e) return Response( content=orjson.dumps(_sanitize_payload(payload), default=str, option=orjson.OPT_NON_STR_KEYS), media_type="application/json", diff --git a/backend/tests/test_live_data_viewport_bbox.py b/backend/tests/test_live_data_viewport_bbox.py new file mode 100644 index 0000000..2cf6ff8 --- /dev/null +++ b/backend/tests/test_live_data_viewport_bbox.py @@ -0,0 +1,273 @@ +"""Tests for issue #288: viewport bbox filtering on /api/live-data/{fast,slow}. + +Behaviour contract: + * Without s/w/n/e params, the response is byte-for-byte identical to the + pre-#288 implementation. (No filtering, no extra fields, no ETag change.) + * With s/w/n/e supplied, heavy/dense layers are filtered to that viewport + with a 20% padding box. + * Light reference layers (datacenters, military_bases, power_plants, + satellites, news, weather, …) are NEVER filtered, even when bounds are + supplied — panning must never reveal an "empty world" of infrastructure. + * World-scale bounds (lng_span >= 300 OR lat_span >= 120) short-circuit + filtering and share the global ETag. + * The ETag includes a 1°-quantized bbox so two viewports never poison each + other's 304 cache. +""" + +import pytest + + +# ───────────────────────── /api/live-data/fast ───────────────────────────── + + +class TestFastBboxFiltering: + def _seed_fast(self, monkeypatch): + """Plant deterministic heavy + light fixtures across the globe.""" + from services.fetchers import _store + + # Heavy collections: dense across the world. + commercial = [ + {"lat": -60.0, "lng": -120.0, "id": "f-sw"}, # south Pacific + {"lat": 35.0, "lng": -75.0, "id": "f-ne"}, # eastern US + {"lat": 35.0, "lng": 100.0, "id": "f-asia"}, # Asia + ] + ships = [ + {"lat": -60.0, "lng": -120.0, "id": "s-sw"}, + {"lat": 35.0, "lng": -75.0, "id": "s-ne"}, + ] + cctv = [{"lat": 35.0, "lng": -75.0, "id": "c-1"}] + + # Sigint heavy collection. + sigint = [ + {"source": "meshtastic", "lat": 35.0, "lng": -75.0, "id": "sig-east"}, + {"source": "meshtastic", "lat": 35.0, "lng": 100.0, "id": "sig-asia"}, + ] + + # Light/reference layer — must NEVER be filtered. + satellites = [ + {"lat": -60.0, "lng": -120.0, "id": "sat-sw"}, + {"lat": 35.0, "lng": -75.0, "id": "sat-ne"}, + {"lat": 35.0, "lng": 100.0, "id": "sat-asia"}, + ] + + monkeypatch.setitem(_store.latest_data, "commercial_flights", commercial) + monkeypatch.setitem(_store.latest_data, "ships", ships) + monkeypatch.setitem(_store.latest_data, "cctv", cctv) + monkeypatch.setitem(_store.latest_data, "sigint", sigint) + monkeypatch.setitem(_store.latest_data, "satellites", satellites) + # Ensure all layers are on so the response includes them. + for layer in ( + "flights", "ships_military", "ships_cargo", "ships_civilian", + "ships_passenger", "ships_tracked_yachts", "cctv", + "sigint_meshtastic", "sigint_aprs", "satellites", + ): + monkeypatch.setitem(_store.active_layers, layer, True) + + def test_no_bbox_returns_world_data(self, client, monkeypatch): + self._seed_fast(monkeypatch) + r = client.get("/api/live-data/fast") + assert r.status_code == 200 + data = r.json() + # All heavy fixtures pass through unchanged. + assert len(data["commercial_flights"]) == 3 + assert len(data["ships"]) == 2 + assert len(data["sigint"]) == 2 + # Light layer also full. + assert len(data["satellites"]) == 3 + + def test_bbox_filters_heavy_layers(self, client, monkeypatch): + self._seed_fast(monkeypatch) + # Box tightly around the eastern-US fixture (lat 35, lng -75). + # ±5° → after 20% padding inside _bbox_filter, ~±6° window. + r = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70") + assert r.status_code == 200 + data = r.json() + # Heavy layers: only the eastern-US fixture survives. + assert {f["id"] for f in data["commercial_flights"]} == {"f-ne"} + assert {s["id"] for s in data["ships"]} == {"s-ne"} + assert {c["id"] for c in data["cctv"]} == {"c-1"} + assert {s["id"] for s in data["sigint"]} == {"sig-east"} + + def test_bbox_does_not_filter_light_layers(self, client, monkeypatch): + self._seed_fast(monkeypatch) + r = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70") + assert r.status_code == 200 + data = r.json() + # Satellites are a reference layer — must NOT be bbox-filtered. + assert len(data["satellites"]) == 3 + + def test_world_scale_bbox_skips_filtering(self, client, monkeypatch): + self._seed_fast(monkeypatch) + # lng_span = 360 → treated as world-scale; same as no bbox. + r = client.get("/api/live-data/fast?s=-90&w=-180&n=90&e=180") + assert r.status_code == 200 + data = r.json() + assert len(data["commercial_flights"]) == 3 + assert len(data["ships"]) == 2 + + def test_partial_bbox_is_treated_as_no_bbox(self, client, monkeypatch): + self._seed_fast(monkeypatch) + # Only three of four bounds → filtering must NOT engage. + r = client.get("/api/live-data/fast?s=30&w=-80&n=40") + assert r.status_code == 200 + data = r.json() + assert len(data["commercial_flights"]) == 3 + + def test_etag_changes_with_bbox(self, client, monkeypatch): + self._seed_fast(monkeypatch) + r_world = client.get("/api/live-data/fast") + r_local = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70") + assert r_world.status_code == 200 + assert r_local.status_code == 200 + etag_world = r_world.headers.get("etag") + etag_local = r_local.headers.get("etag") + assert etag_world and etag_local + assert etag_world != etag_local, ( + "ETag must differ between world and regional bbox to prevent " + "304 cache poisoning across viewports" + ) + + def test_etag_stable_for_subdegree_pan(self, client, monkeypatch): + self._seed_fast(monkeypatch) + # Sub-degree pan should land in the same 1°-quantized bucket. + r_a = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70") + r_b = client.get("/api/live-data/fast?s=30.3&w=-79.8&n=39.7&e=-70.4") + assert r_a.headers.get("etag") == r_b.headers.get("etag") + + def test_if_none_match_returns_304_for_same_bbox(self, client, monkeypatch): + self._seed_fast(monkeypatch) + r1 = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70") + etag = r1.headers.get("etag") + r2 = client.get( + "/api/live-data/fast?s=30&w=-80&n=40&e=-70", + headers={"If-None-Match": etag}, + ) + assert r2.status_code == 304 + + +# ───────────────────────── /api/live-data/slow ───────────────────────────── + + +class TestSlowBboxFiltering: + def _seed_slow(self, monkeypatch): + from services.fetchers import _store + + # Heavy collections. + gdelt = [ + {"lat": 35.0, "lng": -75.0, "id": "g-east"}, + {"lat": 35.0, "lng": 100.0, "id": "g-asia"}, + ] + firms_fires = [ + {"lat": 35.0, "lng": -75.0, "id": "fire-east"}, + {"lat": -10.0, "lng": 120.0, "id": "fire-ido"}, + ] + # Light/reference layers — must always ship in full. + datacenters = [ + {"lat": 35.0, "lng": -75.0, "id": "dc-east"}, + {"lat": 35.0, "lng": 100.0, "id": "dc-asia"}, + {"lat": -10.0, "lng": 120.0, "id": "dc-ido"}, + ] + military_bases = [ + {"lat": 35.0, "lng": -75.0, "id": "mb-east"}, + {"lat": -10.0, "lng": 120.0, "id": "mb-ido"}, + ] + power_plants = [ + {"lat": 35.0, "lng": -75.0, "id": "pp-east"}, + {"lat": 35.0, "lng": 100.0, "id": "pp-asia"}, + ] + + monkeypatch.setitem(_store.latest_data, "gdelt", gdelt) + monkeypatch.setitem(_store.latest_data, "firms_fires", firms_fires) + monkeypatch.setitem(_store.latest_data, "datacenters", datacenters) + monkeypatch.setitem(_store.latest_data, "military_bases", military_bases) + monkeypatch.setitem(_store.latest_data, "power_plants", power_plants) + for layer in ( + "global_incidents", "firms", "datacenters", "military_bases", "power_plants", + ): + monkeypatch.setitem(_store.active_layers, layer, True) + + def test_no_bbox_returns_world_data(self, client, monkeypatch): + self._seed_slow(monkeypatch) + r = client.get("/api/live-data/slow") + assert r.status_code == 200 + data = r.json() + assert len(data["gdelt"]) == 2 + assert len(data["firms_fires"]) == 2 + assert len(data["datacenters"]) == 3 + + def test_bbox_filters_heavy_layers(self, client, monkeypatch): + self._seed_slow(monkeypatch) + r = client.get("/api/live-data/slow?s=30&w=-80&n=40&e=-70") + assert r.status_code == 200 + data = r.json() + assert {g["id"] for g in data["gdelt"]} == {"g-east"} + assert {f["id"] for f in data["firms_fires"]} == {"fire-east"} + + def test_bbox_leaves_reference_layers_untouched(self, client, monkeypatch): + """Datacenters, bases, and power plants are infrastructure overlays — + they must remain world-scale so panning never hides them.""" + self._seed_slow(monkeypatch) + r = client.get("/api/live-data/slow?s=30&w=-80&n=40&e=-70") + assert r.status_code == 200 + data = r.json() + assert len(data["datacenters"]) == 3 + assert len(data["military_bases"]) == 2 + assert len(data["power_plants"]) == 2 + + def test_antimeridian_bbox(self, client, monkeypatch): + from services.fetchers import _store + # Box that straddles the antimeridian (Pacific): w=170, e=-170. + gdelt = [ + {"lat": 0.0, "lng": 175.0, "id": "in-west"}, + {"lat": 0.0, "lng": -175.0, "id": "in-east"}, + {"lat": 0.0, "lng": 0.0, "id": "out-mid"}, + ] + monkeypatch.setitem(_store.latest_data, "gdelt", gdelt) + monkeypatch.setitem(_store.active_layers, "global_incidents", True) + r = client.get("/api/live-data/slow?s=-10&w=170&n=10&e=-170") + assert r.status_code == 200 + data = r.json() + ids = {g["id"] for g in data["gdelt"]} + assert "in-west" in ids + assert "in-east" in ids + assert "out-mid" not in ids + + +# ─────────────────── Direct helper coverage (defensive) ───────────────────── + + +class TestHelpers: + def test_has_full_bbox(self): + from routers.data import _has_full_bbox + assert _has_full_bbox(1, 2, 3, 4) + assert not _has_full_bbox(None, 2, 3, 4) + assert not _has_full_bbox(1, None, 3, 4) + assert not _has_full_bbox(1, 2, None, 4) + assert not _has_full_bbox(1, 2, 3, None) + + def test_bbox_etag_suffix_quantizes(self): + from routers.data import _bbox_etag_suffix + a = _bbox_etag_suffix(30.1, -79.6, 39.9, -70.1) + b = _bbox_etag_suffix(30.4, -79.2, 39.4, -70.8) + assert a == b, "Sub-degree pan must collapse to the same ETag suffix" + assert a.startswith("|bbox=") + + def test_bbox_etag_suffix_world_collapses(self): + from routers.data import _bbox_etag_suffix + # World-scale → empty suffix (shares the global ETag). + assert _bbox_etag_suffix(-90, -180, 90, 180) == "" + + def test_bbox_etag_suffix_partial_is_empty(self): + from routers.data import _bbox_etag_suffix + assert _bbox_etag_suffix(None, -180, 90, 180) == "" + + def test_apply_bbox_preserves_non_list_values(self): + from routers.data import _apply_bbox_to_payload, _FAST_BBOX_HEAVY_KEYS + payload = { + "commercial_flights": [{"lat": 35, "lng": -75, "id": "x"}], + "satellite_source": "tle", # not a list, must pass through + "sigint_totals": {"total": 1}, # dict — must pass through + } + out = _apply_bbox_to_payload(dict(payload), _FAST_BBOX_HEAVY_KEYS, 30, -80, 40, -70) + assert out["satellite_source"] == "tle" + assert out["sigint_totals"] == {"total": 1} diff --git a/frontend/src/components/map/hooks/useViewportBounds.ts b/frontend/src/components/map/hooks/useViewportBounds.ts index 3b8d699..2175507 100644 --- a/frontend/src/components/map/hooks/useViewportBounds.ts +++ b/frontend/src/components/map/hooks/useViewportBounds.ts @@ -8,6 +8,7 @@ import { normalizeViewBounds, type ViewBounds, } from '@/lib/viewportPrivacy'; +import { setLiveDataBounds } from '@/lib/liveDataViewport'; const VIEWPORT_POST_DEBOUNCE_MS = 2500; const VIEWPORT_POST_MIN_INTERVAL_MS = 12000; @@ -70,6 +71,17 @@ export function useViewportBounds( window.dispatchEvent(new CustomEvent(VIEWPORT_COMMITTED_EVENT)); } + // Issue #288: hand the same coarsened/expanded bounds to the live-data + // poller so heavy collections in /api/live-data/{fast,slow} can be + // scoped to the visible region. Static reference layers are unaffected + // — see backend _FAST_BBOX_HEAVY_KEYS / _SLOW_BBOX_HEAVY_KEYS. + setLiveDataBounds({ + south: preloadBounds.south, + west: preloadBounds.west, + north: preloadBounds.north, + east: preloadBounds.east, + }); + // Debounce POSTing viewport bounds to backend for dynamic AIS stream filtering if (debounceTimerRef.current) clearTimeout(debounceTimerRef.current); debounceTimerRef.current = setTimeout(() => { diff --git a/frontend/src/hooks/useDataPolling.ts b/frontend/src/hooks/useDataPolling.ts index 329d1db..a887508 100644 --- a/frontend/src/hooks/useDataPolling.ts +++ b/frontend/src/hooks/useDataPolling.ts @@ -1,6 +1,7 @@ import { useEffect, useRef } from "react"; import { API_BASE } from "@/lib/api"; import { mergeData, setBackendStatus as setStoreBackendStatus } from "./useDataStore"; +import { appendLiveDataBoundsParams } from "@/lib/liveDataViewport"; export type BackendStatus = 'connecting' | 'connected' | 'disconnected'; @@ -32,8 +33,8 @@ export async function forceRefreshLiveData(): Promise { try { const [fastRes, slowRes] = await Promise.all([ - fetch(`${API_BASE}/api/live-data/fast`), - fetch(`${API_BASE}/api/live-data/slow`), + fetch(appendLiveDataBoundsParams(`${API_BASE}/api/live-data/fast`)), + fetch(appendLiveDataBoundsParams(`${API_BASE}/api/live-data/slow`)), ]); if (fastRes.ok) { @@ -85,9 +86,13 @@ export const LAYER_TOGGLE_EVENT = 'sb:layer-toggle'; /** * Polls the backend for fast and slow data tiers. * - * All data is fetched globally (no bbox filtering) — the backend returns its - * full in-memory cache and MapLibre culls off-screen entities on the GPU. - * This eliminates the "empty map when zooming out" lag. + * Issue #288: heavy, density-driven layers (vessels, aircraft, gdelt + * events, fires, sigint, …) are bbox-scoped to the visible map area via + * `appendLiveDataBoundsParams`. Static reference layers (datacenters, + * military bases, power plants, satellites, weather, news, …) are NOT + * filtered backend-side, so panning never reveals an "empty world" of + * infrastructure. World-zoomed views skip bbox params entirely and hit + * the shared ETag cache exactly like the pre-#288 behaviour. * * The AIS stream viewport POST (/api/viewport) is still handled separately * by useViewportBounds to limit upstream AIS ingestion. @@ -147,7 +152,9 @@ export function useDataPolling() { const useStartupPayload = !fetchedStartupFastPayload && !fastEtag.current; const headers: Record = {}; if (!useStartupPayload && fastEtag.current) headers['If-None-Match'] = fastEtag.current; - const url = `${API_BASE}/api/live-data/fast${useStartupPayload ? '?initial=1' : ''}`; + const url = appendLiveDataBoundsParams( + `${API_BASE}/api/live-data/fast${useStartupPayload ? '?initial=1' : ''}`, + ); const res = await fetch(url, { headers, signal: controller.signal, @@ -193,10 +200,13 @@ export function useDataPolling() { try { const headers: Record = {}; if (slowEtag.current) headers['If-None-Match'] = slowEtag.current; - const res = await fetch(`${API_BASE}/api/live-data/slow`, { - headers, - signal: controller.signal, - }); + const res = await fetch( + appendLiveDataBoundsParams(`${API_BASE}/api/live-data/slow`), + { + headers, + signal: controller.signal, + }, + ); if (res.status === 304) { scheduleNext('slow'); return; } if (res.ok) { slowEtag.current = res.headers.get('etag') || null; diff --git a/frontend/src/lib/liveDataViewport.ts b/frontend/src/lib/liveDataViewport.ts new file mode 100644 index 0000000..b3e71e6 --- /dev/null +++ b/frontend/src/lib/liveDataViewport.ts @@ -0,0 +1,84 @@ +/** + * Shared module-level state for the current map viewport bounds, used by + * `useDataPolling` to scope `/api/live-data/{fast,slow}` to the visible + * area when the user has zoomed in. + * + * Issue #288: the backend now bbox-filters dense layers (vessels, aircraft, + * gdelt events, fires, sigint, …) when all four bounds are supplied. Light + * reference layers stay world-scale. Heavy collections aren't sent over the + * wire for parts of the planet the operator isn't looking at, which cuts + * the steady-state poll from ~27 MB to ~5 MB for a typical regional view. + * + * No bounds set → callers omit the params entirely → backend ships full + * world data (byte-identical to pre-#288 behaviour). This keeps the cold + * boot path (where no map is mounted yet) and the world-zoomed view + * unchanged. + */ + +export interface LiveDataBounds { + south: number; + west: number; + north: number; + east: number; +} + +let _current: LiveDataBounds | null = null; + +/** True when lng_span ≥ 300 OR lat_span ≥ 120. Backend treats these as + * world-scale and skips filtering — so the frontend doesn't bother sending + * bounds at all, which keeps the ETag cache shared across operators in the + * zoomed-out case. */ +function isEffectivelyWorld(bounds: LiveDataBounds): boolean { + const latSpan = Math.max(0, bounds.north - bounds.south); + let lngSpan = bounds.east - bounds.west; + if (lngSpan < 0) lngSpan += 360; + return lngSpan >= 300 || latSpan >= 120; +} + +/** Push the latest committed bounds. Called from `useViewportBounds` + * whenever the map's bounds change enough to matter. Pass `null` to + * fall back to world-scale fetching (e.g. on unmount). */ +export function setLiveDataBounds(bounds: LiveDataBounds | null): void { + if (bounds === null) { + _current = null; + return; + } + if ( + !Number.isFinite(bounds.south) || + !Number.isFinite(bounds.west) || + !Number.isFinite(bounds.north) || + !Number.isFinite(bounds.east) + ) { + _current = null; + return; + } + if (isEffectivelyWorld(bounds)) { + // World-zoomed → fetch globally, share the ETag cache across operators. + _current = null; + return; + } + _current = bounds; +} + +/** Read the current bounds, or `null` if the caller should fetch the full + * world payload. Reader contract: must tolerate `null` and call without + * bbox params in that case. */ +export function getLiveDataBounds(): LiveDataBounds | null { + return _current; +} + +/** Append `s/w/n/e` query params to a URL when bounds are set, otherwise + * return the URL unchanged. Centralised so all live-data callers stay in + * sync about quantization and the world-scale skip rule. */ +export function appendLiveDataBoundsParams(url: string): string { + const b = _current; + if (!b) return url; + const sep = url.includes('?') ? '&' : '?'; + // Match backend ETag quantization (1° floor/ceil) so the client and + // server agree on which bounds round to the same cache key. + const s = Math.floor(b.south); + const w = Math.floor(b.west); + const n = Math.ceil(b.north); + const e = Math.ceil(b.east); + return `${url}${sep}s=${s}&w=${w}&n=${n}&e=${e}`; +}