From f14d4feb6d60dfd19e129afceef8a8a9dd1c9bb4 Mon Sep 17 00:00:00 2001 From: BigBodyCobain <43977454+BigBodyCobain@users.noreply.github.com> Date: Sat, 23 May 2026 06:14:39 -0600 Subject: [PATCH] feat(flights): stamp source attribution on every flight record MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-fix, adsb.lol records (the primary source for most flights) carried no source marker. OpenSky records got is_opensky: True and supplementals got supplemental_source, so any UI inspecting source labels saw OpenSky/airplanes.live records as explicitly tagged and adsb.lol records as "unlabeled" — making it look like adsb.lol wasn't being used at all even though it's the primary source. Changes: * _fetch_adsb_lol_regions stamps source="adsb.lol" on each aircraft before returning, so the tag survives the OpenSky dedupe-by-hex merge. * OpenSky records get source="OpenSky" (alongside is_opensky=True for back-compat). * military fetcher tags source on both adsb.lol and airplanes.live records before they're merged, and propagates source into the military_flights and uavs output dicts. * _classify_and_publish promotes the explicit source field into the published flight dict. Falls back to legacy supplemental_source if source is absent. Final fallback "adsb.lol" preserves prior behavior for any caller synthesizing records without going through a fetcher. 8 new tests cover the published-dict propagation, OpenSky tagging, supplemental fallback, explicit-wins precedence, default behavior, the adsb.lol regional fetcher tagging, and the military output dict. Co-Authored-By: Claude Opus 4.7 --- backend/services/fetchers/flights.py | 24 +- backend/services/fetchers/military.py | 6 +- .../tests/test_flight_source_attribution.py | 354 ++++++++++++++++++ 3 files changed, 382 insertions(+), 2 deletions(-) create mode 100644 backend/tests/test_flight_source_attribution.py diff --git a/backend/services/fetchers/flights.py b/backend/services/fetchers/flights.py index c55c61d..946926d 100644 --- a/backend/services/fetchers/flights.py +++ b/backend/services/fetchers/flights.py @@ -459,6 +459,18 @@ def _classify_and_publish(all_adsb_flights): ac_category = "heli" if model_upper in _HELI_TYPES_BACKEND else "plane" + # Source attribution: prefer the explicit ``source`` tag stamped + # at fetch time (adsb.lol, OpenSky). If absent, fall back to the + # legacy ``supplemental_source`` (airplanes.live, adsb.fi) so + # supplementals are still attributed without changing their + # tagger. Final fallback "adsb.lol" preserves prior behavior for + # any caller that synthesizes records without going through one + # of our fetchers (e.g. tests). + source = ( + f.get("source") + or f.get("supplemental_source") + or "adsb.lol" + ) flights.append( { "callsign": flight_str, @@ -480,6 +492,7 @@ def _classify_and_publish(all_adsb_flights): "airline_code": airline_code, "aircraft_category": ac_category, "nac_p": f.get("nac_p"), + "source": source, } ) except (ValueError, TypeError, KeyError, AttributeError) as loop_e: @@ -849,7 +862,15 @@ def _fetch_adsb_lol_regions(): res = fetch_with_curl(url, timeout=10) if res.status_code == 200: data = res.json() - return data.get("ac", []) + aircraft = data.get("ac", []) + # Stamp the source at the fetch site so attribution survives + # the OpenSky/supplemental dedupe-by-hex merge downstream. + # Previously adsb.lol records carried no marker while OpenSky + # records got ``is_opensky: True`` — which made flight tooltips + # look like everything came from OpenSky. + for a in aircraft: + a["source"] = "adsb.lol" + return aircraft except ( requests.RequestException, ConnectionError, @@ -932,6 +953,7 @@ def _enrich_with_opensky_and_supplemental(adsb_flights): "gs": (s[9] * 1.94384) if s[9] else 0, "t": "Unknown", "is_opensky": True, + "source": "OpenSky", } ) elif os_res.status_code == 429: diff --git a/backend/services/fetchers/military.py b/backend/services/fetchers/military.py index b9816d3..6ec8b68 100644 --- a/backend/services/fetchers/military.py +++ b/backend/services/fetchers/military.py @@ -171,6 +171,7 @@ def fetch_military_flights(): h = a.get("hex", "").lower() if h and h not in seen_hex: seen_hex.add(h) + a["source"] = "adsb.lol" all_mil_ac.append(a) except Exception as e: logger.warning(f"adsb.lol mil fetch failed: {e}") @@ -182,6 +183,7 @@ def fetch_military_flights(): h = a.get("hex", "").lower() if h and h not in seen_hex: seen_hex.add(h) + a["source"] = "airplanes.live" all_mil_ac.append(a) logger.info(f"airplanes.live mil: +{len(resp2.json().get('ac', []))} raw, {len(all_mil_ac)} total unique") except Exception as e: @@ -234,6 +236,7 @@ def fetch_military_flights(): "registration": f.get("r", "N/A"), "icao24": icao_hex, "squawk": f.get("squawk", ""), + "source": f.get("source") or "adsb.lol", }) continue @@ -258,7 +261,8 @@ def fetch_military_flights(): "model": f.get("t", "Unknown"), "icao24": icao_hex, "speed_knots": speed_knots, - "squawk": f.get("squawk", "") + "squawk": f.get("squawk", ""), + "source": f.get("source") or "adsb.lol", }) except Exception as loop_e: logger.error(f"Mil flight interpolation error: {loop_e}") diff --git a/backend/tests/test_flight_source_attribution.py b/backend/tests/test_flight_source_attribution.py new file mode 100644 index 0000000..81a733a --- /dev/null +++ b/backend/tests/test_flight_source_attribution.py @@ -0,0 +1,354 @@ +"""Per-flight source attribution. + +Background +---------- +Pre-fix, adsb.lol records (the primary source for most flights) carried +no source marker. OpenSky records got ``is_opensky: True`` and +supplementals got ``supplemental_source``, so any UI that wanted to show +which provider a flight came from saw OpenSky/airplanes.live records as +explicitly tagged and adsb.lol records as "unlabeled" — making it look +like adsb.lol wasn't even being used. + +This caused user confusion ("only military planes have adsb.lol +telemetry") that was diagnostic noise, not a real bug. The actual fix: +stamp ``source`` at every fetch site so the downstream consumer can +attribute the provider with no guesswork. + +These tests pin: + + * adsb.lol regional records get ``source: "adsb.lol"`` at fetch time + (synthesized via the published flight dict). + * OpenSky records get ``source: "OpenSky"`` (alongside the existing + ``is_opensky: True`` for backwards compat). + * Supplementals (airplanes.live, adsb.fi) flow through with their + ``supplemental_source`` honored. + * The military fetcher tags ``source`` on military_flights and uavs. + * The published flight dict carries ``source`` so downstream code + can render attribution. +""" + +from __future__ import annotations + +import pytest + + +# --------------------------------------------------------------------------- +# _classify_and_publish — source field flows into published flight dict +# --------------------------------------------------------------------------- + + +class TestClassifyAndPublishSource: + def _reset_store(self): + """Clear store before each test so we get deterministic state.""" + from services.fetchers._store import latest_data, _data_lock + with _data_lock: + for key in ( + "flights", "commercial_flights", "private_flights", + "private_jets", "military_flights", "tracked_flights", + ): + latest_data[key] = [] + return latest_data + + def test_adsb_lol_record_tagged_in_published_flight(self, monkeypatch): + """A raw adsb.lol record (carrying ``source: 'adsb.lol'`` from the + fetch site) flows through ``_classify_and_publish`` and the + published flight dict carries the same ``source`` field.""" + from services.fetchers import flights as flights_module + from services.fetchers._store import latest_data, _data_lock + + self._reset_store() + + # Patch route + type lookups so they don't try to hit the network. + monkeypatch.setattr(flights_module, "lookup_route", lambda _: None) + monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "") + + flights_module._classify_and_publish( + [ + { + "hex": "ad7701", + "flight": "JBU711", + "r": "N967JT", + "t": "A321", + "lat": 40.0, + "lon": -100.0, + "alt_baro": 36000, + "gs": 401.6, + "nac_p": 9, + "source": "adsb.lol", # stamped at fetch site + } + ] + ) + + with _data_lock: + published = list(latest_data.get("flights", [])) + assert len(published) == 1 + assert published[0]["source"] == "adsb.lol" + # nac_p still flows through too — sanity check that adding source + # didn't break the existing GPS jamming signal. + assert published[0]["nac_p"] == 9 + + def test_opensky_record_tagged_in_published_flight(self, monkeypatch): + """OpenSky-sourced records carry ``source: 'OpenSky'`` (plus the + existing ``is_opensky: True`` for back-compat).""" + from services.fetchers import flights as flights_module + from services.fetchers._store import latest_data, _data_lock + + self._reset_store() + monkeypatch.setattr(flights_module, "lookup_route", lambda _: None) + monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "") + + flights_module._classify_and_publish( + [ + { + "hex": "a12345", + "flight": "UAL100", + "r": "N100UA", + "t": "Unknown", + "lat": 41.0, + "lon": -87.0, + "alt_baro": 35000, + "gs": 450, + # No nac_p — OpenSky doesn't carry it. + "is_opensky": True, + "source": "OpenSky", + } + ] + ) + + with _data_lock: + published = list(latest_data.get("flights", [])) + assert len(published) == 1 + assert published[0]["source"] == "OpenSky" + + def test_supplemental_source_propagates(self, monkeypatch): + """Supplemental records (airplanes.live, adsb.fi) have their + legacy ``supplemental_source`` field promoted to the unified + ``source`` field in the published dict — so consumers don't have + to inspect two different keys.""" + from services.fetchers import flights as flights_module + from services.fetchers._store import latest_data, _data_lock + + self._reset_store() + monkeypatch.setattr(flights_module, "lookup_route", lambda _: None) + monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "") + + flights_module._classify_and_publish( + [ + { + "hex": "b22222", + "flight": "DAL200", + "r": "N200DL", + "t": "B738", + "lat": 42.0, + "lon": -90.0, + "alt_baro": 32000, + "gs": 420, + "supplemental_source": "airplanes.live", + # No explicit "source" — should fall through to + # supplemental_source. + } + ] + ) + + with _data_lock: + published = list(latest_data.get("flights", [])) + assert len(published) == 1 + assert published[0]["source"] == "airplanes.live" + + def test_explicit_source_wins_over_supplemental_source(self, monkeypatch): + """If both fields are present, explicit ``source`` wins (it's the + newer canonical tag).""" + from services.fetchers import flights as flights_module + from services.fetchers._store import latest_data, _data_lock + + self._reset_store() + monkeypatch.setattr(flights_module, "lookup_route", lambda _: None) + monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "") + + flights_module._classify_and_publish( + [ + { + "hex": "c33333", + "flight": "AAL300", + "r": "N300AA", + "t": "A321", + "lat": 33.0, + "lon": -97.0, + "alt_baro": 34000, + "gs": 430, + "source": "adsb.lol", + "supplemental_source": "adsb.fi", + } + ] + ) + + with _data_lock: + published = list(latest_data.get("flights", [])) + assert published[0]["source"] == "adsb.lol" + + def test_untagged_record_defaults_to_adsb_lol(self, monkeypatch): + """A record with neither ``source`` nor ``supplemental_source`` + (e.g. synthesized by a test, or a fetcher that hasn't been + migrated yet) defaults to ``"adsb.lol"`` since that's been the + primary source historically. Defensive default — better than + empty string.""" + from services.fetchers import flights as flights_module + from services.fetchers._store import latest_data, _data_lock + + self._reset_store() + monkeypatch.setattr(flights_module, "lookup_route", lambda _: None) + monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "") + + flights_module._classify_and_publish( + [ + { + "hex": "d44444", + "flight": "SWA400", + "r": "N400SW", + "t": "B737", + "lat": 32.0, + "lon": -110.0, + "alt_baro": 30000, + "gs": 410, + } + ] + ) + + with _data_lock: + published = list(latest_data.get("flights", [])) + assert published[0]["source"] == "adsb.lol" + + +# --------------------------------------------------------------------------- +# adsb.lol regional fetcher tags at fetch time +# --------------------------------------------------------------------------- + + +class TestAdsbLolRegionalTagging: + def test_fetch_region_stamps_source_on_each_aircraft(self, monkeypatch): + """The wrapper around the adsb.lol regional endpoint stamps + ``source: 'adsb.lol'`` on every record before returning, so the + downstream merge step sees attribution survive even when the + record gets reshuffled (e.g. dedupe-by-hex during OpenSky merge).""" + from services.fetchers import flights as flights_module + + # Fake response — 3 aircraft, none have a source field originally. + class FakeResp: + status_code = 200 + + def json(self): + return { + "ac": [ + {"hex": "a1", "lat": 40.0, "lon": -100.0, "nac_p": 8}, + {"hex": "a2", "lat": 40.1, "lon": -100.1, "nac_p": 9}, + {"hex": "a3", "lat": 40.2, "lon": -100.2, "nac_p": 10}, + ] + } + + monkeypatch.setattr( + flights_module, "fetch_with_curl", lambda *a, **kw: FakeResp() + ) + + results = flights_module._fetch_adsb_lol_regions() + + assert len(results) >= 3 + # Every aircraft we got back must be tagged. + sources = {a.get("source") for a in results} + assert sources == {"adsb.lol"}, ( + f"adsb.lol regional fetcher must stamp source on every record; " + f"got: {sources}" + ) + + def test_fetch_region_failure_returns_empty_without_crashing(self, monkeypatch): + """If adsb.lol returns non-200, the fetcher returns [] gracefully — + downstream code already handles this. Sanity check that the source + tagging doesn't introduce a new failure mode.""" + from services.fetchers import flights as flights_module + + class FakeResp: + status_code = 500 + def json(self): return {} + + monkeypatch.setattr( + flights_module, "fetch_with_curl", lambda *a, **kw: FakeResp() + ) + + results = flights_module._fetch_adsb_lol_regions() + + assert results == [] + + +# --------------------------------------------------------------------------- +# Military fetcher tags source on output dicts +# --------------------------------------------------------------------------- + + +class TestMilitarySourceTagging: + def test_military_output_carries_source_field(self, monkeypatch): + """Each entry in ``military_flights`` should carry a ``source`` + field. Pre-fix the only military attribution was inferring from + which endpoint we hit; now it's explicit.""" + from services.fetchers import military as mil_module + from services.fetchers._store import latest_data, _data_lock + + # Reset relevant store state. + with _data_lock: + latest_data["military_flights"] = [] + latest_data["uavs"] = [] + latest_data["tracked_flights"] = [] + + # Stub _store.is_any_active so the fetch doesn't early-return. + # The military module imports the function inline at call time, + # so we have to patch it on the _store module itself rather than + # on the military module. + from services.fetchers import _store as store_module + monkeypatch.setattr(store_module, "is_any_active", lambda *_: True) + + # Stub fetch_with_curl to return one synthetic military aircraft + # from adsb.lol, none from airplanes.live. + class _RespMil: + status_code = 200 + def json(self): + return { + "ac": [ + { + "hex": "ae6c1d", + "flight": "CRUSH52", + "r": "170281", + "t": "C30J", + "lat": 47.594, + "lon": -124.879, + "alt_baro": 9025, + "gs": 162.8, + "track": 334.5, + "nac_p": 10, + } + ] + } + + class _RespEmpty: + status_code = 200 + def json(self): + return {"ac": []} + + def _fake_fetch(url, *a, **kw): + if "adsb.lol" in url: + return _RespMil() + return _RespEmpty() + + monkeypatch.setattr(mil_module, "fetch_with_curl", _fake_fetch) + # Stubs for downstream enrichments that try to hit external state. + monkeypatch.setattr(mil_module, "enrich_with_plane_alert", lambda mf: None) + monkeypatch.setattr(mil_module, "_enrich_country", lambda hex_, flag: ("US", "USAF")) + monkeypatch.setattr(mil_module, "_classify_military_type", lambda t: "transport") + monkeypatch.setattr(mil_module, "_classify_uav", lambda m, c: (False, "", "")) + monkeypatch.setattr(mil_module, "get_emissions_info", lambda model: None) + monkeypatch.setattr(mil_module, "_mark_fresh", lambda *keys: None) + + mil_module.fetch_military_flights() + + with _data_lock: + mil_published = list(latest_data.get("military_flights", [])) + + assert len(mil_published) == 1 + assert mil_published[0]["source"] == "adsb.lol"