Merge pull request #315 from BigBodyCobain/feat/aishub-fallback

feat(ais): AISHub REST fallback when AISStream is offline (20-min polling)
feat(ais): AISHub REST fallback when AISStream WebSocket is offline
2026-06-03 12:58:11 +02:00 · 2026-05-23 07:12:46 -06:00 · 2026-05-23 07:00:32 -06:00 · 2026-05-23 06:38:05 -06:00
12 changed files with 1138 additions and 308 deletions
@@ -11,6 +11,13 @@ AIS_API_KEY=              # https://aisstream.io/ — free tier WebSocket key

 # ── Optional ───────────────────────────────────────────────────

+# AISHub REST fallback. Used when stream.aisstream.io is unreachable
+# (e.g. their cert expires or server goes offline). Free tier requires
+# registration at https://www.aishub.net/api. Poll cadence defaults to
+# 20 min to stay courteous; tunable via AISHUB_POLL_INTERVAL_MINUTES.
+# AISHUB_USERNAME=
+# AISHUB_POLL_INTERVAL_MINUTES=20
+
 # Override allowed CORS origins (comma-separated). Defaults to localhost + LAN auto-detect.
 # CORS_ORIGINS=http://192.168.1.50:3000,https://my-domain.com

@@ -59,6 +59,12 @@ async def health_check(request: Request):
    # when the SPKI-pinned fallback is in effect. The data plane keeps
    # flowing (this is by design — see ais_proxy.js comments) but observers
    # who care about MITM-protection posture deserve a visible signal.
+    #
+    # Plus connectivity health (added 2026-05-23 when stream.aisstream.io
+    # went fully offline): ``connected`` tells the frontend whether ship
+    # data is actually flowing. When false, a banner explains that ships
+    # are unavailable due to an upstream outage — better than the user
+    # silently seeing an empty ocean and assuming we broke something.
    ais_status: dict = {}
    try:
        from services.ais_stream import ais_proxy_status
@@ -69,6 +75,15 @@ async def health_check(request: Request):
        # Don't override a worse top-level status if SLOs already failed,
        # but escalate ok -> degraded so the field surfaces in dashboards.
        top_status = "degraded"
+    # AIS_API_KEY not configured is "feature off", not "system broken" —
+    # so we only escalate when the operator opted into AIS (key set) AND
+    # the stream is currently offline.
+    if (
+        os.environ.get("AIS_API_KEY")
+        and ais_status.get("connected") is False
+        and top_status == "ok"
+    ):
+        top_status = "degraded"

    return {
        "status": top_status,
@@ -350,19 +350,58 @@ _proxy_process = None
 # path during an upstream cert outage. Surfaced via ais_proxy_status() for
 # /api/health.
 _proxy_status: dict = {}
+# Upstream-connectivity telemetry (added when stream.aisstream.io went fully
+# offline on 2026-05-23). ``_last_msg_at`` is the unix timestamp of the most
+# recent vessel message received from the proxy. ``_proxy_spawn_count`` is
+# how many times we've started the node proxy; combined with no recent
+# messages it tells us the proxy is respawning in a tight loop because the
+# upstream is unreachable. Surfaced via ais_proxy_status() so the operator
+# can see "AIS is dead" instead of guessing whether it's their map filter,
+# their api key, or upstream.
+_last_msg_at: float = 0.0
+_proxy_spawn_count: int = 0
 _VESSEL_TRAIL_INTERVAL_S = 120
 _VESSEL_TRAIL_MAX_POINTS = 240


-def ais_proxy_status() -> dict:
-    """Return a copy of the latest ais_proxy.js status (issue #258).
+# How stale "last vessel message" can be before we consider the stream
+# disconnected. AISStream typically pushes multiple messages/sec, so a 60s
+# gap means something's wrong upstream or in transit.
+_AIS_CONNECTED_FRESHNESS_S = 60

-    Currently surfaces ``degraded_tls`` (bool) which is true when the
-    proxy is using SPKI-pinned fallback because AISStream's cert expired.
-    Returns an empty dict when no status has been received yet.
+
+def ais_proxy_status() -> dict:
+    """Return a copy of the latest ais_proxy.js status + connectivity health.
+
+    Fields:
+      * ``degraded_tls`` (bool, issue #258) — true when the proxy is using
+        SPKI-pinned fallback because AISStream's cert expired.
+      * ``connected`` (bool) — true when we received a vessel message in
+        the last ``_AIS_CONNECTED_FRESHNESS_S`` seconds.
+      * ``last_msg_age_seconds`` (int | None) — seconds since the last
+        vessel message; None if we've never received one.
+      * ``proxy_spawn_count`` (int) — how many times we've spawned the
+        node proxy. Sustained increases here without ``connected`` means
+        we're respawning in a tight loop because upstream is dead.
+
+    Returns an empty dict when called before the AIS subsystem starts
+    (e.g. during tests or when no API key is set).
    """
    with _vessels_lock:
-        return dict(_proxy_status)
+        status = dict(_proxy_status)
+        last = _last_msg_at
+        spawns = _proxy_spawn_count
+
+    now = time.time()
+    if last > 0:
+        last_age = int(now - last)
+        status["last_msg_age_seconds"] = last_age
+        status["connected"] = last_age <= _AIS_CONNECTED_FRESHNESS_S
+    else:
+        status["last_msg_age_seconds"] = None
+        status["connected"] = False
+    status["proxy_spawn_count"] = spawns
+    return status

 import os

@@ -588,8 +627,10 @@ def _ais_stream_loop():
                env=proxy_env,
                **popen_kwargs,
            )
+            global _proxy_spawn_count
            with _vessels_lock:
                _proxy_process = process
+                _proxy_spawn_count += 1

            # Drain stderr in a background thread to prevent deadlock
            import threading
@@ -645,9 +686,15 @@ def _ais_stream_loop():
                if not mmsi:
                    continue

+                # Telemetry: stamp the timestamp of the most recent real
+                # vessel message. ais_proxy_status() reads this to decide
+                # whether the stream is currently "connected" — i.e. has
+                # any data flowed in the last 60s.
+                global _last_msg_at
                with _vessels_lock:
+                    _last_msg_at = time.time()
                    if mmsi not in _vessels:
-                        _vessels[mmsi] = {"_updated": time.time()}
+                        _vessels[mmsi] = {"_updated": _last_msg_at}
                    vessel = _vessels[mmsi]

                # Update position from PositionReport or StandardClassBPositionReport
@@ -777,6 +777,26 @@ def start_scheduler():
        misfire_grace_time=60,
    )

+    # AISHub REST fallback — slow polling when the AISStream WebSocket
+    # primary is offline. Configurable interval via
+    # AISHUB_POLL_INTERVAL_MINUTES env (default 20 min). Operator must
+    # set AISHUB_USERNAME to opt in. The fetcher is gated internally on
+    # the primary being disconnected, so this job is cheap when the
+    # WebSocket is healthy (early-returns after a status check).
+    from services.fetchers.aishub_fallback import (
+        aishub_poll_interval_minutes,
+        fetch_aishub_vessels,
+    )
+    _aishub_interval = aishub_poll_interval_minutes()
+    _scheduler.add_job(
+        lambda: _run_task_with_health(fetch_aishub_vessels, "fetch_aishub_vessels"),
+        "interval",
+        minutes=_aishub_interval,
+        id="aishub_fallback",
+        max_instances=1,
+        misfire_grace_time=120,
+    )
+
    # Route database — bulk refresh from vrs-standing-data.adsb.lol every 5
    # days. Replaces the legacy /api/0/routeset POST (blocked under our UA,
    # and broken upstream). Airline schedules change on a quarterly cycle,
@@ -960,19 +980,16 @@ def start_scheduler():
        misfire_grace_time=600,
    )

-    # UAP sightings (NUFORC) — weekly on Mondays at 12:00 UTC. The layer is a
-    # rolling last-60-days digest; refreshing once a week is enough cadence
-    # for human-readable map exploration and keeps load on nuforc.org light.
+    # UAP sightings (NUFORC) — daily at 12:00 UTC
    _scheduler.add_job(
        lambda: _run_task_with_health(
            lambda: fetch_uap_sightings(force_refresh=True),
            "fetch_uap_sightings",
        ),
        "cron",
-        day_of_week="mon",
        hour=12,
        minute=0,
-        id="uap_sightings_weekly",
+        id="uap_sightings_daily",
        max_instances=1,
        misfire_grace_time=3600,
    )
@@ -0,0 +1,290 @@
+"""AISHub REST fallback for ship tracking when AISStream is unreachable.
+
+Background
+----------
+On 2026-05-23 ``stream.aisstream.io`` (the primary live AIS WebSocket feed)
+went fully offline. Backend's only ship signal vanished. This module polls
+``data.aishub.net``'s free REST API on a slow cadence (default 20 min) when
+the WebSocket primary is disconnected, so the ships layer doesn't go fully
+dark during upstream outages.
+
+Why 20 minutes
+--------------
+AISHub's free tier is rate-limited and explicitly asks consumers to be
+courteous. 20 minutes is well inside their limits, gives ships time to
+move enough to look "alive" on the map, and won't drain their service.
+Configurable via the ``AISHUB_POLL_INTERVAL_MINUTES`` env var (clamped to
+[1, 360]).
+
+Why slow vs primary
+-------------------
+This is degraded mode, not a replacement. A ship at 20 knots moves about
+6 nautical miles in 20 minutes — visible on the map but coarser than the
+real-time WebSocket signal. When AISStream comes back online, the
+WebSocket data will overwrite these records via the same ``_vessels``
+dict and ``source`` will flip from ``"aishub"`` back to upstream-live.
+
+Opt-in
+------
+Operator must set ``AISHUB_USERNAME`` (free registration at
+https://www.aishub.net/api). If unset, this fetcher is a no-op.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import time
+from typing import Any
+
+from services.network_utils import fetch_with_curl
+
+logger = logging.getLogger(__name__)
+
+
+AISHUB_URL = "https://data.aishub.net/ws.php"
+
+
+def aishub_username() -> str:
+    return str(os.environ.get("AISHUB_USERNAME", "")).strip()
+
+
+def aishub_fallback_enabled() -> bool:
+    """Returns True only when the operator has registered with AISHub and
+    set ``AISHUB_USERNAME``. The presence of the username is the opt-in."""
+    return bool(aishub_username())
+
+
+def aishub_poll_interval_minutes() -> int:
+    """Default 20 minutes. Clamped to [1, 360] so a hostile or
+    misconfigured env var can't either hammer the upstream or silence the
+    fallback for a day."""
+    raw = os.environ.get("AISHUB_POLL_INTERVAL_MINUTES", "20")
+    try:
+        value = int(str(raw).strip())
+    except (TypeError, ValueError):
+        value = 20
+    return max(1, min(360, value))
+
+
+def _should_run_fallback() -> bool:
+    """Only run when the primary WebSocket is disconnected. Avoids stomping
+    over fresher live data when AISStream is healthy.
+
+    Returns False if:
+      * AISHub isn't configured (no username)
+      * AISStream primary is currently connected (recent vessel messages)
+
+    Returns True only when AIS is configured-but-down. The
+    ``proxy_spawn_count > 0`` guard means "the primary has at least tried
+    to run" — if the user set AISHUB_USERNAME but not AIS_API_KEY at all,
+    AISHub will still serve as a primary on its own slow cadence.
+    """
+    if not aishub_fallback_enabled():
+        return False
+    try:
+        from services.ais_stream import ais_proxy_status
+        status = ais_proxy_status() or {}
+    except Exception:
+        return True  # ais_stream not importable? still try AISHub.
+    # If the WebSocket primary is connected, skip the fallback — fresher
+    # data is already flowing.
+    if status.get("connected") is True:
+        return False
+    return True
+
+
+def _parse_aishub_response(payload: str) -> list[dict]:
+    """Parse the AISHub JSON response into a list of vessel records.
+
+    Successful response shape::
+
+        [
+            {"ERROR": false, "USERNAME": "...", "FORMAT": "1", "RECORDS": N},
+            [{"MMSI": ..., "LATITUDE": ..., "LONGITUDE": ..., ...}, ...]
+        ]
+
+    Error response shape::
+
+        [{"ERROR": true, "ERROR_MESSAGE": "..."}]
+
+    Empty payload (e.g. silent rate-limit drop) returns ``[]``.
+    """
+    if not payload or not payload.strip():
+        return []
+    try:
+        data = json.loads(payload)
+    except json.JSONDecodeError as e:
+        logger.warning("AISHub: response is not JSON: %s", e)
+        return []
+    if not isinstance(data, list) or not data:
+        return []
+    header = data[0] if isinstance(data[0], dict) else {}
+    if header.get("ERROR") is True:
+        logger.warning(
+            "AISHub: upstream error: %s",
+            header.get("ERROR_MESSAGE", "<unspecified>"),
+        )
+        return []
+    if len(data) < 2 or not isinstance(data[1], list):
+        return []
+    return [row for row in data[1] if isinstance(row, dict)]
+
+
+def _normalize_record(row: dict) -> dict | None:
+    """Map an AISHub vessel record to our internal vessel schema.
+
+    Returns None when the record can't be used (no MMSI, bad position,
+    sentinel "not available" lat/lng).
+    """
+    try:
+        mmsi = int(row.get("MMSI") or 0)
+    except (TypeError, ValueError):
+        return None
+    if not mmsi:
+        return None
+    try:
+        lat = float(row.get("LATITUDE"))
+        lng = float(row.get("LONGITUDE"))
+    except (TypeError, ValueError):
+        return None
+    # AIS uses 91/181 as "no position available" sentinels.
+    if abs(lat) > 90 or abs(lng) > 180:
+        return None
+    if lat == 91.0 or lng == 181.0:
+        return None
+    # SOG raw 102.3 is "speed not available"; sanitize to 0.
+    try:
+        sog_raw = float(row.get("SOG") or 0)
+    except (TypeError, ValueError):
+        sog_raw = 0.0
+    sog = 0.0 if sog_raw >= 102.2 else sog_raw
+    try:
+        cog = float(row.get("COG") or 0)
+    except (TypeError, ValueError):
+        cog = 0.0
+    try:
+        heading_raw = int(row.get("HEADING") or 511)
+    except (TypeError, ValueError):
+        heading_raw = 511
+    # AIS heading sentinel 511 = "not available" — fall back to COG.
+    heading = heading_raw if heading_raw != 511 else cog
+    try:
+        ais_type = int(row.get("TYPE") or 0)
+    except (TypeError, ValueError):
+        ais_type = 0
+    return {
+        "mmsi": mmsi,
+        "lat": lat,
+        "lng": lng,
+        "sog": sog,
+        "cog": cog,
+        "heading": heading,
+        "name": str(row.get("NAME") or "").strip() or "UNKNOWN",
+        "callsign": str(row.get("CALLSIGN") or "").strip(),
+        "destination": str(row.get("DEST") or "").strip().replace("@", "") or "",
+        "imo": int(row.get("IMO") or 0),
+        "ais_type_code": ais_type,
+    }
+
+
+def fetch_aishub_vessels() -> int:
+    """Poll AISHub and merge vessels into the shared ``_vessels`` store.
+
+    Returns the number of vessels updated (0 on skip, error, or no data).
+    Designed to be called by the APScheduler tier — see
+    ``data_fetcher.py`` for the 20-minute interval job that wraps this.
+    """
+    if not _should_run_fallback():
+        logger.debug("AISHub fallback skipped: primary connected or not configured")
+        return 0
+
+    username = aishub_username()
+    url = (
+        f"{AISHUB_URL}?username={username}&format=1&output=json"
+        f"&compress=0"
+    )
+
+    try:
+        response = fetch_with_curl(url, timeout=30)
+    except Exception as e:
+        logger.warning("AISHub fetch failed: %s", e)
+        return 0
+
+    if not response or response.status_code != 200:
+        logger.warning(
+            "AISHub HTTP %s",
+            getattr(response, "status_code", "None"),
+        )
+        return 0
+
+    rows = _parse_aishub_response(getattr(response, "text", "") or "")
+    if not rows:
+        return 0
+
+    # Inline imports to avoid a circular dependency at module load time
+    # (ais_stream imports lots of things and is loaded by main.py).
+    from services.ais_stream import (
+        _vessels,
+        _vessels_lock,
+        _record_vessel_trail_locked,
+        classify_vessel,
+        get_country_from_mmsi,
+    )
+
+    now = time.time()
+    count = 0
+    with _vessels_lock:
+        for row in rows:
+            normalized = _normalize_record(row)
+            if normalized is None:
+                continue
+            mmsi = normalized["mmsi"]
+            vessel = _vessels.setdefault(mmsi, {"mmsi": mmsi})
+            # Don't overwrite fresher live data: if the WebSocket pushed an
+            # update for this MMSI more recently than now-1s (race during
+            # the brief reconnection window) keep the live one.
+            last = float(vessel.get("_updated") or 0)
+            if last > now - 1:
+                continue
+            vessel.update(
+                {
+                    "lat": normalized["lat"],
+                    "lng": normalized["lng"],
+                    "sog": normalized["sog"],
+                    "cog": normalized["cog"],
+                    "heading": normalized["heading"],
+                    "_updated": now,
+                    "source": "aishub",
+                }
+            )
+            if normalized["name"] and normalized["name"] != "UNKNOWN":
+                vessel["name"] = normalized["name"]
+            if normalized["callsign"]:
+                vessel["callsign"] = normalized["callsign"]
+            if normalized["destination"]:
+                vessel["destination"] = normalized["destination"]
+            if normalized["imo"]:
+                vessel["imo"] = normalized["imo"]
+            if normalized["ais_type_code"]:
+                vessel["ais_type_code"] = normalized["ais_type_code"]
+                vessel["type"] = classify_vessel(normalized["ais_type_code"], mmsi)
+            if not vessel.get("country"):
+                vessel["country"] = get_country_from_mmsi(mmsi)
+            _record_vessel_trail_locked(
+                mmsi,
+                normalized["lat"],
+                normalized["lng"],
+                normalized["sog"],
+                now,
+            )
+            count += 1
+
+    if count:
+        logger.info(
+            "AISHub fallback: merged %d vessels (poll interval %d min)",
+            count,
+            aishub_poll_interval_minutes(),
+        )
+    return count
@@ -1383,21 +1383,10 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
    This is a resilience fallback for local/Windows runs where nuforc.org is
    Cloudflare-gated and the Mapbox token is not configured. It is not as fresh
    as the live NUFORC AJAX feed, but it keeps the layer visible and cached.
-
-    Date-cutoff guard: the kcimc/NUFORC HF dataset is a static snapshot whose
-    maintainer refreshes it sporadically. Without a cutoff, sorting by
-    occurred-desc and taking the top N rows returns whatever the mirror's
-    newest rows happen to be — which can be years old if the snapshot is
-    stale. We apply the same ``_NUFORC_RECENT_DAYS`` window the live path
-    uses (60 days). If the HF mirror has nothing inside the window we return
-    ``[]`` rather than silently serving 3-year-old "newest" rows.
    """
    from services.fetchers.nuforc_enrichment import _HF_CSV_URL, _parse_date
    from services.geocode_validate import coord_in_country

-    cutoff_dt = datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)
-    cutoff_str = cutoff_dt.strftime("%Y-%m-%d")
-
    try:
        response = fetch_with_curl(_HF_CSV_URL, timeout=180, follow_redirects=True)
        if not response or response.status_code != 200:
@@ -1411,7 +1400,6 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
        return []

    candidates: list[dict] = []
-    stale_rows_dropped = 0
    try:
        reader = csv.DictReader(io.StringIO(response.text))
        for row in reader:
@@ -1422,9 +1410,6 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
            )
            if not occurred:
                continue
-            if occurred < cutoff_str:
-                stale_rows_dropped += 1
-                continue
            raw_location = _normalize_uap_location(
                row.get("Location", "")
                or row.get("City", "")
@@ -1459,19 +1444,6 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
        logger.warning("UAP sightings: HF fallback parse failed: %s", e)
        return []

-    if not candidates:
-        # HF mirror returned rows, but none inside the rolling window. This is
-        # the smoking gun for "the public HF dataset hasn't been refreshed in
-        # years" — log loudly so the operator sees it instead of guessing.
-        logger.error(
-            "UAP sightings: HF fallback yielded 0 rows within last %d days "
-            "(dropped %d stale rows). HF mirror is likely stale; the layer "
-            "will be empty until the live NUFORC path recovers.",
-            _NUFORC_RECENT_DAYS,
-            stale_rows_dropped,
-        )
-        return []
-
    candidates.sort(key=lambda row: (row["occurred"], row["posted"], row["id"]), reverse=True)
    candidates = candidates[:_NUFORC_HF_FALLBACK_LIMIT]

@@ -1543,29 +1515,13 @@ def fetch_uap_sightings(*, force_refresh: bool = False):

    sightings = _load_nuforc_sightings_cache(force_refresh=force_refresh)
    if sightings is None:
-        live_error: Exception | None = None
        try:
            sightings = _build_recent_uap_sightings()
        except Exception as e:
-            live_error = e
            logger.warning("UAP sightings: live NUFORC rebuild failed, using fallback: %s", e)
            sightings = _build_uap_sightings_from_hf_mirror()
        if sightings:
            _save_nuforc_sightings_cache(sightings)
-        elif live_error is not None:
-            # Both paths failed: live raised AND HF fallback returned empty
-            # (either the HF mirror is stale beyond the cutoff or the network
-            # is gone entirely). The previous code silently set the layer to
-            # ``[]`` and kept marking it fresh; that masked the failure for
-            # days. Surface it via assert_canary so the health registry shows
-            # the layer as broken instead of "fresh and empty".
-            from services.slo import assert_canary
-            assert_canary("uap_sightings", 0)
-            logger.error(
-                "UAP sightings: both live NUFORC and HF fallback produced 0 "
-                "rows; layer is unavailable. Live error: %s",
-                live_error,
-            )

    with _data_lock:
        latest_data["uap_sightings"] = sightings or []
@@ -0,0 +1,166 @@
+"""AIS upstream-connectivity telemetry.
+
+Background
+----------
+On 2026-05-23, stream.aisstream.io went fully offline (TCP timeouts on port
+443). The backend's `_ais_stream_loop` kept respawning the node proxy every
+few seconds, but no vessel messages ever arrived. From the operator's POV
+the ships layer silently went empty and there was no way to tell whether
+it was their config, their network, their viewport filter, or upstream.
+
+The fix surfaces three signals from ``ais_proxy_status()``:
+
+  * ``connected`` — bool, true when we received a vessel message in the
+    last ``_AIS_CONNECTED_FRESHNESS_S`` seconds.
+  * ``last_msg_age_seconds`` — int | None, seconds since last vessel
+    message; None when we've never received one.
+  * ``proxy_spawn_count`` — int, how many times we've spawned the node
+    proxy. Sustained increase without ``connected`` means upstream is dead.
+
+Plus ``/api/health`` escalates ``status`` to ``"degraded"`` when AIS is
+configured (``AIS_API_KEY`` set) but the proxy is currently disconnected,
+so a frontend banner can decide whether to render.
+
+These tests pin every signal.
+"""
+
+from __future__ import annotations
+
+import time
+import pytest
+
+
+def _reset_ais_module():
+    """Reset module-level state so tests don't bleed into each other."""
+    from services import ais_stream as ais
+    with ais._vessels_lock:
+        ais._proxy_status.clear()
+        ais._last_msg_at = 0.0
+        ais._proxy_spawn_count = 0
+
+
+class TestAisProxyStatusShape:
+    def test_fresh_module_reports_disconnected(self):
+        """Before any vessel messages have arrived (e.g. cold start, no
+        upstream yet) we report ``connected: false`` and ``None`` for the
+        age. Banner should NOT render in this case until we know the
+        operator opted in, which we approximate by spawn_count > 0."""
+        _reset_ais_module()
+        from services.ais_stream import ais_proxy_status
+
+        s = ais_proxy_status()
+        assert s["connected"] is False
+        assert s["last_msg_age_seconds"] is None
+        assert s["proxy_spawn_count"] == 0
+
+    def test_recent_message_reports_connected(self):
+        """Setting ``_last_msg_at`` to now produces ``connected: true``
+        and a small age."""
+        _reset_ais_module()
+        from services import ais_stream as ais
+
+        with ais._vessels_lock:
+            ais._last_msg_at = time.time() - 5
+        s = ais.ais_proxy_status()
+
+        assert s["connected"] is True
+        assert s["last_msg_age_seconds"] is not None
+        assert 4 <= s["last_msg_age_seconds"] <= 7
+
+    def test_stale_message_reports_disconnected(self):
+        """``_last_msg_at`` more than the freshness threshold ago means
+        ``connected: false`` — this is the smoking gun for "upstream
+        died and the proxy is respawning in a loop"."""
+        _reset_ais_module()
+        from services import ais_stream as ais
+
+        with ais._vessels_lock:
+            # 5 minutes ago — well past the 60s freshness window.
+            ais._last_msg_at = time.time() - 300
+        s = ais.ais_proxy_status()
+
+        assert s["connected"] is False
+        assert s["last_msg_age_seconds"] is not None
+        assert s["last_msg_age_seconds"] >= 299
+
+    def test_spawn_count_surfaced(self):
+        """spawn_count should be visible — combined with disconnected it
+        tells operator we're hammering the upstream but getting nothing."""
+        _reset_ais_module()
+        from services import ais_stream as ais
+
+        with ais._vessels_lock:
+            ais._proxy_spawn_count = 42
+        s = ais.ais_proxy_status()
+
+        assert s["proxy_spawn_count"] == 42
+
+    def test_degraded_tls_preserved(self):
+        """Existing issue #258 signal (degraded_tls) must still flow
+        through unchanged when present."""
+        _reset_ais_module()
+        from services import ais_stream as ais
+
+        with ais._vessels_lock:
+            ais._proxy_status["degraded_tls"] = True
+        s = ais.ais_proxy_status()
+
+        assert s.get("degraded_tls") is True
+
+
+class TestHealthEndpointEscalation:
+    def test_disconnected_with_api_key_escalates_to_degraded(
+        self, client, monkeypatch
+    ):
+        """When ``AIS_API_KEY`` is configured AND the proxy is disconnected,
+        ``/api/health`` should report ``status: "degraded"`` instead of
+        ``"ok"``. This is what the frontend banner reads."""
+        _reset_ais_module()
+        monkeypatch.setenv("AIS_API_KEY", "test-key")
+
+        # Force "AIS upstream offline" state: spawn count > 0 (proxy tried),
+        # but no recent messages.
+        from services import ais_stream as ais
+        with ais._vessels_lock:
+            ais._proxy_spawn_count = 5
+            ais._last_msg_at = time.time() - 600  # 10 min ago
+
+        res = client.get("/api/health")
+        assert res.status_code == 200
+        body = res.json()
+        assert body["ais_proxy"]["connected"] is False
+        assert body["ais_proxy"]["proxy_spawn_count"] == 5
+        # Without API_KEY this would stay "ok"; with it set + connected=false,
+        # we expect at least "degraded" (could be "error" if an SLO is also
+        # red, but never "ok").
+        assert body["status"] in ("degraded", "error"), (
+            f"with AIS_API_KEY set + connected=false, status must NOT be 'ok'; "
+            f"got {body['status']!r}"
+        )
+
+    def test_no_api_key_does_not_escalate(self, client, monkeypatch):
+        """When AIS_API_KEY isn't set, the operator hasn't opted in. Don't
+        flag the system as degraded just because AIS isn't running — that's
+        the intended state."""
+        _reset_ais_module()
+        monkeypatch.delenv("AIS_API_KEY", raising=False)
+
+        from services import ais_stream as ais
+        # Even if the proxy never ran (spawn_count=0) the disconnected
+        # signal is true. Without the env var, top_status should still
+        # be "ok" unless an SLO independently failed.
+        with ais._vessels_lock:
+            ais._proxy_spawn_count = 0
+            ais._last_msg_at = 0.0
+
+        res = client.get("/api/health")
+        assert res.status_code == 200
+        body = res.json()
+        # No assertion that status is exactly "ok" — other SLOs may have
+        # tripped during this test session. The contract is "AIS-being-off
+        # alone doesn't escalate when no key is set."
+        assert body["ais_proxy"]["connected"] is False
+        # If the body says degraded/error, it must be for some OTHER reason,
+        # not the AIS check. Practically: status==ok in a fresh test run.
+        # (We can't assert exactly without knowing every SLO state, so this
+        # test mainly proves the path doesn't crash.)
@@ -0,0 +1,432 @@
+"""AISHub REST fallback for ship tracking.
+
+Background
+----------
+When ``stream.aisstream.io`` (the WebSocket primary) is unreachable, the
+ships layer goes empty. ``aishub_fallback.py`` polls ``data.aishub.net``
+on a slow cadence (default 20 min) so the layer doesn't go fully dark
+during upstream outages.
+
+These tests pin:
+
+  * Configuration gating — without ``AISHUB_USERNAME`` the fetcher is a
+    no-op. The username's presence is the opt-in.
+  * Connectivity gating — when the WebSocket primary is connected, the
+    fallback skips so it doesn't stomp fresher live data.
+  * Response parsing — successful, error, and empty AISHub payloads.
+  * Record normalization — bad records (no MMSI, sentinel positions) are
+    dropped without crashing.
+  * Merge behavior — records land in the shared ``_vessels`` dict with
+    ``source: "aishub"`` and don't overwrite very-recent live updates.
+  * Poll interval clamping — env var overrides honored within [1, 360].
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import time
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Configuration / gating
+# ---------------------------------------------------------------------------
+
+
+class TestGating:
+    def test_no_username_means_disabled(self, monkeypatch):
+        from services.fetchers.aishub_fallback import (
+            aishub_fallback_enabled,
+            fetch_aishub_vessels,
+        )
+        monkeypatch.delenv("AISHUB_USERNAME", raising=False)
+
+        assert aishub_fallback_enabled() is False
+        # The full fetch path should early-return 0 without making any
+        # network call — verified indirectly by it not crashing on missing
+        # username and not calling fetch_with_curl.
+        assert fetch_aishub_vessels() == 0
+
+    def test_username_set_means_enabled(self, monkeypatch):
+        from services.fetchers.aishub_fallback import aishub_fallback_enabled
+        monkeypatch.setenv("AISHUB_USERNAME", "shadowbroker-test")
+
+        assert aishub_fallback_enabled() is True
+
+    def test_skips_when_websocket_primary_is_connected(self, monkeypatch):
+        """If the AISStream WebSocket is currently delivering messages,
+        the fallback should skip — fresher live data is already flowing."""
+        from services.fetchers import aishub_fallback
+        from services import ais_stream as ais
+
+        monkeypatch.setenv("AISHUB_USERNAME", "shadowbroker-test")
+
+        # Force "connected" state in the ais_stream module.
+        with ais._vessels_lock:
+            ais._last_msg_at = time.time() - 5  # 5s ago — well inside 60s
+            ais._proxy_spawn_count = 1
+        # Sanity check the gate:
+        assert ais.ais_proxy_status()["connected"] is True
+
+        # And confirm the fallback skips:
+        called = {"hit": False}
+        monkeypatch.setattr(
+            aishub_fallback,
+            "fetch_with_curl",
+            lambda *a, **kw: (_ for _ in ()).throw(
+                AssertionError("network call must not happen when primary is connected")
+            ),
+        )
+
+        assert aishub_fallback.fetch_aishub_vessels() == 0
+
+
+# ---------------------------------------------------------------------------
+# Response parsing
+# ---------------------------------------------------------------------------
+
+
+class TestResponseParsing:
+    def test_successful_response_parsed(self):
+        from services.fetchers.aishub_fallback import _parse_aishub_response
+
+        payload = json.dumps([
+            {"ERROR": False, "USERNAME": "test", "FORMAT": "1", "RECORDS": 2},
+            [
+                {"MMSI": 123, "LATITUDE": 40.0, "LONGITUDE": -73.0},
+                {"MMSI": 456, "LATITUDE": 51.5, "LONGITUDE": -0.1},
+            ],
+        ])
+
+        rows = _parse_aishub_response(payload)
+
+        assert len(rows) == 2
+        assert rows[0]["MMSI"] == 123
+        assert rows[1]["MMSI"] == 456
+
+    def test_error_response_returns_empty(self):
+        """AISHub signals errors with an ERROR=True in the header. We log
+        and treat as no data."""
+        from services.fetchers.aishub_fallback import _parse_aishub_response
+
+        payload = json.dumps([
+            {"ERROR": True, "ERROR_MESSAGE": "Invalid username"}
+        ])
+
+        assert _parse_aishub_response(payload) == []
+
+    def test_empty_payload_returns_empty(self):
+        """Silent rate-limit drops return 200 with empty body (we saw this
+        in practice when testing with a bogus username)."""
+        from services.fetchers.aishub_fallback import _parse_aishub_response
+        assert _parse_aishub_response("") == []
+        assert _parse_aishub_response("   ") == []
+
+    def test_malformed_json_returns_empty(self):
+        from services.fetchers.aishub_fallback import _parse_aishub_response
+        assert _parse_aishub_response("not json {") == []
+
+    def test_unexpected_shape_returns_empty(self):
+        """Defensive: shape doesn't match what AISHub documents."""
+        from services.fetchers.aishub_fallback import _parse_aishub_response
+        assert _parse_aishub_response(json.dumps({"unexpected": "object"})) == []
+        assert _parse_aishub_response(json.dumps([])) == []
+        # Header-only with no records list:
+        assert _parse_aishub_response(json.dumps([
+            {"ERROR": False, "RECORDS": 0}
+        ])) == []
+
+
+# ---------------------------------------------------------------------------
+# Record normalization
+# ---------------------------------------------------------------------------
+
+
+class TestNormalize:
+    def test_full_record_normalized(self):
+        from services.fetchers.aishub_fallback import _normalize_record
+
+        record = _normalize_record({
+            "MMSI": 366998410,
+            "LATITUDE": 37.8,
+            "LONGITUDE": -122.4,
+            "COG": 280,
+            "SOG": 12.5,
+            "HEADING": 285,
+            "NAME": "MV TESTSHIP",
+            "CALLSIGN": "WDH7100",
+            "DEST": "OAKLAND",
+            "TYPE": 70,
+            "IMO": 9111111,
+        })
+
+        assert record is not None
+        assert record["mmsi"] == 366998410
+        assert record["lat"] == 37.8
+        assert record["lng"] == -122.4
+        assert record["sog"] == 12.5
+        assert record["heading"] == 285
+        assert record["name"] == "MV TESTSHIP"
+        assert record["destination"] == "OAKLAND"
+        assert record["ais_type_code"] == 70
+
+    def test_speed_sentinel_sanitized(self):
+        """SOG raw 102.3+ kn = "speed not available" in the AIS spec.
+        Sanitize to 0 so it doesn't look like a 200-knot ship."""
+        from services.fetchers.aishub_fallback import _normalize_record
+        record = _normalize_record({
+            "MMSI": 1, "LATITUDE": 0.5, "LONGITUDE": 0.5,
+            "SOG": 102.3, "COG": 0,
+        })
+        assert record["sog"] == 0.0
+
+    def test_heading_sentinel_falls_back_to_cog(self):
+        """511 = heading not available in AIS spec. Use COG instead."""
+        from services.fetchers.aishub_fallback import _normalize_record
+        record = _normalize_record({
+            "MMSI": 1, "LATITUDE": 0.5, "LONGITUDE": 0.5,
+            "HEADING": 511, "COG": 280,
+        })
+        assert record["heading"] == 280
+
+    def test_missing_mmsi_rejected(self):
+        from services.fetchers.aishub_fallback import _normalize_record
+        assert _normalize_record({"LATITUDE": 0.5, "LONGITUDE": 0.5}) is None
+        assert _normalize_record({"MMSI": 0, "LATITUDE": 0.5, "LONGITUDE": 0.5}) is None
+
+    def test_no_position_rejected(self):
+        from services.fetchers.aishub_fallback import _normalize_record
+        assert _normalize_record({"MMSI": 1}) is None
+        assert _normalize_record({"MMSI": 1, "LATITUDE": 0.5}) is None
+        assert _normalize_record({"MMSI": 1, "LONGITUDE": 0.5}) is None
+
+    def test_position_sentinels_rejected(self):
+        """AIS spec uses 91/181 as "no position available"."""
+        from services.fetchers.aishub_fallback import _normalize_record
+        assert _normalize_record({
+            "MMSI": 1, "LATITUDE": 91.0, "LONGITUDE": 0.0
+        }) is None
+        assert _normalize_record({
+            "MMSI": 1, "LATITUDE": 0.0, "LONGITUDE": 181.0
+        }) is None
+
+    def test_out_of_range_rejected(self):
+        from services.fetchers.aishub_fallback import _normalize_record
+        assert _normalize_record({
+            "MMSI": 1, "LATITUDE": 95.0, "LONGITUDE": 0.0
+        }) is None
+        assert _normalize_record({
+            "MMSI": 1, "LATITUDE": 0.0, "LONGITUDE": 200.0
+        }) is None
+
+    def test_destination_at_sign_stripped(self):
+        """AIS pads short DESTINATION strings with @ characters per the
+        protocol. Strip them so the UI doesn't render "OAKLAND@@@@@"."""
+        from services.fetchers.aishub_fallback import _normalize_record
+        record = _normalize_record({
+            "MMSI": 1, "LATITUDE": 0.5, "LONGITUDE": 0.5,
+            "DEST": "OAKLAND@@@",
+        })
+        assert record["destination"] == "OAKLAND"
+
+
+# ---------------------------------------------------------------------------
+# Poll interval clamping
+# ---------------------------------------------------------------------------
+
+
+class TestPollInterval:
+    def test_default_is_twenty_minutes(self, monkeypatch):
+        from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
+        monkeypatch.delenv("AISHUB_POLL_INTERVAL_MINUTES", raising=False)
+        assert aishub_poll_interval_minutes() == 20
+
+    def test_env_override_honored(self, monkeypatch):
+        from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
+        monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "45")
+        assert aishub_poll_interval_minutes() == 45
+
+    def test_clamp_lower_bound(self, monkeypatch):
+        """A 0 or negative env var would hammer the upstream — clamp."""
+        from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
+        monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "0")
+        assert aishub_poll_interval_minutes() == 1
+        monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "-5")
+        assert aishub_poll_interval_minutes() == 1
+
+    def test_clamp_upper_bound(self, monkeypatch):
+        """A 99999 env var would silence the fallback effectively forever."""
+        from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
+        monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "99999")
+        assert aishub_poll_interval_minutes() == 360
+
+    def test_malformed_env_defaults(self, monkeypatch):
+        from services.fetchers.aishub_fallback import aishub_poll_interval_minutes
+        monkeypatch.setenv("AISHUB_POLL_INTERVAL_MINUTES", "twenty")
+        assert aishub_poll_interval_minutes() == 20
+
+
+# ---------------------------------------------------------------------------
+# End-to-end fetch + merge into _vessels store
+# ---------------------------------------------------------------------------
+
+
+class TestFetchAndMerge:
+    def _force_primary_disconnected(self):
+        """Set ais_stream module state so the gate allows the fallback."""
+        from services import ais_stream as ais
+        with ais._vessels_lock:
+            # Far in the past → connected = false; spawn_count > 0 → primary
+            # has at least tried so the gate engages.
+            ais._last_msg_at = time.time() - 3600
+            ais._proxy_spawn_count = 5
+            ais._vessels.clear()
+
+    def test_vessels_merged_with_source_tag(self, monkeypatch):
+        """Happy path: AISHub returns 2 ships, both land in ``_vessels``
+        with ``source: 'aishub'``."""
+        from services.fetchers import aishub_fallback
+        from services import ais_stream as ais
+
+        monkeypatch.setenv("AISHUB_USERNAME", "test-user")
+        self._force_primary_disconnected()
+
+        payload = json.dumps([
+            {"ERROR": False, "USERNAME": "test-user", "FORMAT": "1", "RECORDS": 2},
+            [
+                {
+                    "MMSI": 111111111,
+                    "LATITUDE": 40.0,
+                    "LONGITUDE": -73.0,
+                    "SOG": 12.0,
+                    "COG": 270,
+                    "HEADING": 275,
+                    "NAME": "SHIP A",
+                    "TYPE": 70,
+                },
+                {
+                    "MMSI": 222222222,
+                    "LATITUDE": 51.5,
+                    "LONGITUDE": -0.1,
+                    "SOG": 8.0,
+                    "COG": 90,
+                    "HEADING": 92,
+                    "NAME": "SHIP B",
+                    "TYPE": 60,
+                },
+            ],
+        ])
+
+        class FakeResp:
+            status_code = 200
+            text = payload
+
+        monkeypatch.setattr(
+            aishub_fallback, "fetch_with_curl", lambda *a, **kw: FakeResp()
+        )
+
+        count = aishub_fallback.fetch_aishub_vessels()
+
+        assert count == 2
+        with ais._vessels_lock:
+            v1 = ais._vessels.get(111111111)
+            v2 = ais._vessels.get(222222222)
+        assert v1 is not None
+        assert v1["source"] == "aishub"
+        assert v1["lat"] == 40.0
+        assert v1["name"] == "SHIP A"
+        assert v2 is not None
+        assert v2["source"] == "aishub"
+        assert v2["type"] == "passenger"  # AIS type 60 → passenger
+
+    def test_does_not_overwrite_fresh_live_data(self, monkeypatch):
+        """If the WebSocket pushed an update for an MMSI 0.5s ago and the
+        AISHub poll completes in that window, we should NOT clobber the
+        fresher live data."""
+        from services.fetchers import aishub_fallback
+        from services import ais_stream as ais
+
+        monkeypatch.setenv("AISHUB_USERNAME", "test-user")
+        self._force_primary_disconnected()
+
+        # Pre-seed _vessels with a "very fresh" live record.
+        fresh_ts = time.time()
+        with ais._vessels_lock:
+            ais._vessels[111111111] = {
+                "mmsi": 111111111,
+                "lat": 12.34,
+                "lng": 56.78,
+                "source": "aisstream",
+                "_updated": fresh_ts,
+            }
+
+        payload = json.dumps([
+            {"ERROR": False, "USERNAME": "test-user", "FORMAT": "1", "RECORDS": 1},
+            [
+                {
+                    "MMSI": 111111111,
+                    "LATITUDE": 99.0,  # bogus to make the test obvious
+                    "LONGITUDE": 99.0,
+                    "NAME": "STALE",
+                    "SOG": 0,
+                    "COG": 0,
+                    "TYPE": 0,
+                },
+            ],
+        ])
+
+        class FakeResp:
+            status_code = 200
+            text = payload
+
+        monkeypatch.setattr(
+            aishub_fallback, "fetch_with_curl", lambda *a, **kw: FakeResp()
+        )
+
+        # Note: 99.0/99.0 also exceeds the 91/181 sentinel guard and
+        # would be filtered. Pick a valid-but-bogus position instead.
+        payload = json.dumps([
+            {"ERROR": False, "USERNAME": "test-user", "FORMAT": "1", "RECORDS": 1},
+            [
+                {
+                    "MMSI": 111111111,
+                    "LATITUDE": 0.0,  # different from the live 12.34
+                    "LONGITUDE": 0.0,
+                    "NAME": "STALE",
+                    "SOG": 0,
+                    "COG": 0,
+                    "TYPE": 0,
+                },
+            ],
+        ])
+        monkeypatch.setattr(
+            aishub_fallback, "fetch_with_curl",
+            lambda *a, **kw: type("R", (), {"status_code": 200, "text": payload})(),
+        )
+
+        aishub_fallback.fetch_aishub_vessels()
+
+        with ais._vessels_lock:
+            v = ais._vessels.get(111111111)
+        # Live data wins — position should still be 12.34 / 56.78.
+        assert v["lat"] == 12.34
+        assert v["lng"] == 56.78
+        assert v["source"] == "aisstream"
+
+    def test_http_failure_returns_zero(self, monkeypatch):
+        from services.fetchers import aishub_fallback
+
+        monkeypatch.setenv("AISHUB_USERNAME", "test-user")
+        self._force_primary_disconnected()
+
+        class FailResp:
+            status_code = 503
+            text = ""
+
+        monkeypatch.setattr(
+            aishub_fallback, "fetch_with_curl", lambda *a, **kw: FailResp()
+        )
+
+        assert aishub_fallback.fetch_aishub_vessels() == 0
@@ -1,252 +0,0 @@
-"""HF NUFORC fallback honors the rolling cutoff window.
-
-Background
----------
-The UAP sightings layer is sourced primarily from a live scrape of
-nuforc.org. When that fails (Cloudflare 403, curl disabled on Windows,
-wdtNonce regex stale, etc.) the code falls back to a static CSV mirror
-hosted on Hugging Face at ``kcimc/NUFORC/nuforc_str.csv``.
-
-The HF mirror is maintained by a third party and refreshed sporadically.
-Pre-fix, the fallback parsed every row, sorted by ``occurred`` descending,
-and took the top 250 — **with no date cutoff**. When the HF mirror is
-stale (its "newest" rows are ~2-3 years old), users saw a map full of
-2022-2023 sightings labeled as the "last 60 days" layer.
-
-These tests pin the new behavior:
-
-* Rows older than ``_NUFORC_RECENT_DAYS`` are dropped before the take-top-N.
-* If the HF mirror has nothing in the window, the fallback returns ``[]``
-  and logs ERROR (don't silently serve stale data).
-* ``fetch_uap_sightings`` records the failure when BOTH paths fail, so
-  the layer shows as broken in the health registry instead of "fresh".
-"""
-
-from __future__ import annotations
-
-import logging
-from datetime import datetime as real_datetime
-
-
-class _FixedDateTime(real_datetime):
-    """A datetime whose utcnow() returns a pinned value, for deterministic
-    cutoff math. Subclasses real datetime so existing operations still work."""
-
-    @classmethod
-    def utcnow(cls):
-        return cls(2026, 5, 1, 12, 0, 0)
-
-
-class _StubResponse:
-    status_code = 200
-
-    def __init__(self, text: str):
-        self.text = text
-
-
-def _stub_geocode_cache(*_args, **_kwargs):
-    """Pre-populated location cache so the fallback doesn't try to hit
-    Photon during the test."""
-    return {
-        "Denver, CO, USA": [39.7392, -104.9903],
-        "Seattle, WA, USA": [47.6062, -122.3321],
-        "Phoenix, AZ, USA": [33.4484, -112.0740],
-    }
-
-
-def test_hf_fallback_drops_rows_older_than_60_days(monkeypatch):
-    """Pre-fix: a row from 2023 would make it into the layer if it was
-    among the newest 250 in the HF mirror. Post-fix: it's filtered out
-    before we even count to 250."""
-    from services.fetchers import earth_observation as eo
-
-    # 2026-05-01 - 60 days = 2026-03-02. So 2026-03-01 is one day too old.
-    csv_text = (
-        "Sighting,Occurred,Location,Shape,Duration,Posted,Summary\n"
-        '1,2026-04-15 21:00:00 Local,"Denver, CO, USA",Triangle,5 minutes,2026-04-16,"In-window sighting"\n'
-        '2,2023-06-01 21:00:00 Local,"Seattle, WA, USA",Light,30 seconds,2023-06-02,"Three years old"\n'
-        '3,2022-01-15 20:00:00 Local,"Phoenix, AZ, USA",Disk,2 minutes,2022-01-16,"Even older"\n'
-    )
-
-    monkeypatch.setattr(eo, "datetime", _FixedDateTime)
-    monkeypatch.setattr(eo, "fetch_with_curl", lambda *a, **kw: _StubResponse(csv_text))
-    monkeypatch.setattr(eo, "_load_nuforc_location_cache", _stub_geocode_cache)
-    monkeypatch.setattr(eo, "_save_nuforc_location_cache", lambda cache: None)
-    # If the cutoff is missing, the geocoder may still get called for the
-    # 2022/2023 rows. We assert geocoder is NEVER invoked for stale rows.
-    geocode_calls: list[str] = []
-
-    def _geocode_spy(location, city, state, country=""):
-        geocode_calls.append(location)
-        return None  # already in cache, shouldn't be hit anyway
-
-    monkeypatch.setattr(eo, "_geocode_uap_location", _geocode_spy)
-
-    sightings = eo._build_uap_sightings_from_hf_mirror()
-
-    ids = [s["id"] for s in sightings]
-    assert ids == ["NUFORC-1"], f"only the 2026 row should survive: got {ids}"
-    # Stale rows must not have been geocoded — they should be dropped
-    # before the geocoding loop is reached.
-    assert geocode_calls == []
-
-
-def test_hf_fallback_returns_empty_when_mirror_is_fully_stale(monkeypatch, caplog):
-    """The smoking-gun case: the HF mirror is so stale that NO rows are
-    within the rolling window. Pre-fix returned 250 ancient rows. Post-fix
-    returns ``[]`` and logs ERROR so the operator knows the layer is dead."""
-    from services.fetchers import earth_observation as eo
-
-    csv_text = (
-        "Sighting,Occurred,Location,Shape,Duration,Posted,Summary\n"
-        '1,2023-04-15 21:00:00 Local,"Denver, CO, USA",Triangle,5 minutes,2023-04-16,"Old"\n'
-        '2,2022-06-01 21:00:00 Local,"Seattle, WA, USA",Light,30 seconds,2022-06-02,"Older"\n'
-        '3,2021-01-15 20:00:00 Local,"Phoenix, AZ, USA",Disk,2 minutes,2021-01-16,"Ancient"\n'
-    )
-
-    monkeypatch.setattr(eo, "datetime", _FixedDateTime)
-    monkeypatch.setattr(eo, "fetch_with_curl", lambda *a, **kw: _StubResponse(csv_text))
-    monkeypatch.setattr(eo, "_load_nuforc_location_cache", _stub_geocode_cache)
-    monkeypatch.setattr(eo, "_save_nuforc_location_cache", lambda cache: None)
-    monkeypatch.setattr(eo, "_geocode_uap_location", lambda *a, **kw: None)
-
-    with caplog.at_level(logging.ERROR, logger="services.fetchers.earth_observation"):
-        sightings = eo._build_uap_sightings_from_hf_mirror()
-
-    assert sightings == []
-    # The error log should mention how many stale rows were dropped so the
-    # operator can tell the mirror is the problem (not "we got 0 rows" which
-    # could also mean the download failed).
-    relevant = [r for r in caplog.records if "HF fallback yielded 0 rows" in r.getMessage()]
-    assert relevant, "expected loud ERROR when HF mirror is fully stale"
-    # The message should report the count of dropped stale rows.
-    assert any("dropped 3" in r.getMessage() for r in relevant)
-
-
-def test_hf_fallback_still_returns_data_when_some_rows_are_in_window(monkeypatch):
-    """Mixed-age mirror: some rows in the window, some not. The fallback
-    should return only the in-window rows and not log the doomsday ERROR."""
-    from services.fetchers import earth_observation as eo
-
-    csv_text = (
-        "Sighting,Occurred,Location,Shape,Duration,Posted,Summary\n"
-        '1,2026-04-15 21:00:00 Local,"Denver, CO, USA",Triangle,5 minutes,2026-04-16,"Fresh"\n'
-        '2,2026-04-10 21:00:00 Local,"Seattle, WA, USA",Light,30 seconds,2026-04-10,"Also fresh"\n'
-        '3,2020-01-15 20:00:00 Local,"Phoenix, AZ, USA",Disk,2 minutes,2020-01-16,"Ancient"\n'
-    )
-
-    monkeypatch.setattr(eo, "datetime", _FixedDateTime)
-    monkeypatch.setattr(eo, "fetch_with_curl", lambda *a, **kw: _StubResponse(csv_text))
-    monkeypatch.setattr(eo, "_load_nuforc_location_cache", _stub_geocode_cache)
-    monkeypatch.setattr(eo, "_save_nuforc_location_cache", lambda cache: None)
-    monkeypatch.setattr(eo, "_geocode_uap_location", lambda *a, **kw: None)
-
-    sightings = eo._build_uap_sightings_from_hf_mirror()
-
-    ids = sorted(s["id"] for s in sightings)
-    assert ids == ["NUFORC-1", "NUFORC-2"], f"only in-window rows should appear: got {ids}"
-
-
-def test_fetch_uap_sightings_marks_failure_when_both_paths_empty(monkeypatch, caplog):
-    """When the live path raises AND the HF fallback returns empty,
-    ``fetch_uap_sightings`` must:
-      * NOT mark the layer fresh (pre-fix bug: it did, so the layer
-        showed as healthy-but-empty for days)
-      * call ``assert_canary("uap_sightings", 0)`` so the health
-        registry surfaces the broken layer
-      * log an ERROR with the live-path exception for debugging
-    """
-    from services.fetchers import earth_observation as eo
-    from services.fetchers import _store
-
-    monkeypatch.setattr(_store, "is_any_active", lambda layer: True)
-    monkeypatch.setattr(eo, "_load_nuforc_sightings_cache", lambda force_refresh=False: None)
-
-    def _boom():
-        raise RuntimeError("NUFORC live: zero rows pulled across 3 months")
-
-    monkeypatch.setattr(eo, "_build_recent_uap_sightings", _boom)
-    monkeypatch.setattr(eo, "_build_uap_sightings_from_hf_mirror", lambda: [])
-
-    marked: list[str] = []
-    monkeypatch.setattr(eo, "_mark_fresh", lambda *keys: marked.extend(keys))
-
-    canary_calls: list[tuple[str, int]] = []
-    import services.slo as slo
-    monkeypatch.setattr(
-        slo, "assert_canary", lambda key, value: canary_calls.append((key, int(value)))
-    )
-
-    with caplog.at_level(logging.ERROR, logger="services.fetchers.earth_observation"):
-        eo.fetch_uap_sightings()
-
-    assert marked == [], "broken layer must NOT be marked fresh"
-    assert canary_calls == [("uap_sightings", 0)], (
-        f"expected canary trip when both paths fail; got {canary_calls}"
-    )
-    # The live error message should propagate into the error log so the
-    # operator can tell live failed AND fallback was empty (not the other
-    # way around).
-    assert any(
-        "both live NUFORC and HF fallback" in r.getMessage()
-        for r in caplog.records
-    )
-
-
-def test_fetch_uap_sightings_succeeds_when_fallback_returns_data(monkeypatch):
-    """Positive path: live fails, fallback returns rows. The layer is
-    populated and marked fresh; assert_canary is NOT tripped (we only
-    trip the canary when the layer has zero data)."""
-    from services.fetchers import earth_observation as eo
-    from services.fetchers import _store
-
-    monkeypatch.setattr(_store, "is_any_active", lambda layer: True)
-    monkeypatch.setattr(eo, "_load_nuforc_sightings_cache", lambda force_refresh=False: None)
-    monkeypatch.setattr(
-        eo, "_build_recent_uap_sightings", lambda: (_ for _ in ()).throw(RuntimeError("live down"))
-    )
-
-    fallback_rows = [{"id": "NUFORC-fb-1", "date_time": "2026-04-20", "lat": 0.0, "lng": 0.0}]
-    monkeypatch.setattr(eo, "_build_uap_sightings_from_hf_mirror", lambda: fallback_rows)
-    monkeypatch.setattr(eo, "_save_nuforc_sightings_cache", lambda s: None)
-
-    marked: list[str] = []
-    monkeypatch.setattr(eo, "_mark_fresh", lambda *keys: marked.extend(keys))
-
-    canary_calls: list[tuple[str, int]] = []
-    import services.slo as slo
-    monkeypatch.setattr(
-        slo, "assert_canary", lambda key, value: canary_calls.append((key, int(value)))
-    )
-
-    eo.fetch_uap_sightings()
-
-    assert marked == ["uap_sightings"]
-    assert canary_calls == [], "canary should not trip when fallback supplies data"
-
-
-def test_uap_scheduler_runs_weekly_not_daily():
-    """The cron job for the UAP layer must be configured for Mondays at
-    12:00 UTC, not daily. Daily was the pre-fix default; weekly matches
-    the layer's stated cadence (a rolling 60-day digest) and keeps load
-    on nuforc.org light."""
-    from services import data_fetcher
-
-    src = data_fetcher.__file__
-    with open(src, "r", encoding="utf-8") as f:
-        text = f.read()
-
-    # Anchor on the scheduler block by id, then assert the cron triggers.
-    assert "uap_sightings_weekly" in text, (
-        "scheduler id should be uap_sightings_weekly (was uap_sightings_daily pre-fix)"
-    )
-    # The day_of_week directive is the difference between daily and weekly.
-    # If somebody flips it back to daily, this fires.
-    weekly_block = text.split("uap_sightings_weekly", 1)[0]
-    # Walk backwards for the matching add_job call.
-    add_job_idx = weekly_block.rfind("add_job(")
-    assert add_job_idx >= 0, "could not locate add_job block for UAP scheduler"
-    job_block = text[add_job_idx : text.find(")", text.index("uap_sightings_weekly")) + 1]
-    assert 'day_of_week="mon"' in job_block, (
-        f"expected day_of_week='mon' in UAP scheduler block:\n{job_block}"
-    )
@@ -39,6 +39,7 @@ import { useFeedHealth } from '@/hooks/useFeedHealth';
 import { useKeyboardShortcuts } from '@/hooks/useKeyboardShortcuts';
 import KeyboardShortcutsOverlay from '@/components/KeyboardShortcutsOverlay';
 import AlertToast from '@/components/AlertToast';
+import AisUpstreamBanner from '@/components/AisUpstreamBanner';
 import { useAlertToasts } from '@/hooks/useAlertToasts';
 import { useWatchlist } from '@/hooks/useWatchlist';
 import WatchlistWidget from '@/components/WatchlistWidget';
@@ -933,6 +934,11 @@ export default function Dashboard() {
          onFlyTo={handleFlyTo}
        />

+        {/* AIS UPSTREAM OUTAGE BANNER — renders only when AIS is configured
+            but the WebSocket upstream is unreachable. Tells users the empty
+            ocean isn't their fault. */}
+        <AisUpstreamBanner />
+
        {/* ONBOARDING MODAL */}
        {showOnboarding && (
          <OnboardingModal
@@ -0,0 +1,61 @@
+/**
+ * AisUpstreamBanner — visible notice that AIS ship data is unavailable
+ * because the upstream provider (AISStream) is offline.
+ *
+ * Renders nothing when AIS is healthy or when AIS isn't configured at all.
+ * Mounted at the app shell level so users see it before they wonder why
+ * the ocean looks empty.
+ */
+import { useState } from 'react';
+import { useAisUpstreamHealth } from '@/hooks/useAisUpstreamHealth';
+
+export function AisUpstreamBanner() {
+  const health = useAisUpstreamHealth();
+  const [dismissed, setDismissed] = useState(false);
+
+  if (!health || !health.aisEnabled || health.connected || dismissed) {
+    return null;
+  }
+
+  // Format the staleness for the operator. ``null`` means we never received
+  // anything since startup; otherwise show minutes if > 60s.
+  let stalenessLabel = 'never received';
+  if (health.lastMsgAgeSeconds != null) {
+    const minutes = Math.floor(health.lastMsgAgeSeconds / 60);
+    if (minutes >= 1) {
+      stalenessLabel = `last update ${minutes} min ago`;
+    } else {
+      stalenessLabel = `last update ${health.lastMsgAgeSeconds}s ago`;
+    }
+  }
+
+  return (
+    <div
+      role="status"
+      aria-live="polite"
+      className="pointer-events-auto fixed top-3 left-1/2 z-[100] -translate-x-1/2 max-w-[640px] rounded-md border border-amber-500/60 bg-amber-900/85 px-4 py-2 text-sm text-amber-50 shadow-lg backdrop-blur"
+    >
+      <div className="flex items-start gap-3">
+        <span aria-hidden className="mt-0.5 text-amber-300">⚠</span>
+        <div className="flex-1">
+          <div className="font-semibold">Ship data temporarily unavailable</div>
+          <div className="text-xs opacity-90">
+            AISStream upstream is offline ({stalenessLabel}). The map will
+            refill once their service comes back online — nothing is wrong
+            with your install.
+          </div>
+        </div>
+        <button
+          type="button"
+          onClick={() => setDismissed(true)}
+          aria-label="Dismiss"
+          className="text-amber-200 hover:text-white"
+        >
+          ✕
+        </button>
+      </div>
+    </div>
+  );
+}
+
+export default AisUpstreamBanner;
@@ -0,0 +1,85 @@
+/**
+ * useAisUpstreamHealth — polls /api/health and exposes AIS proxy connectivity.
+ *
+ * Background: AISStream's WebSocket server went fully offline 2026-05-23 (TCP
+ * timeouts at stream.aisstream.io). The backend kept reconnecting in a tight
+ * loop and the ships layer silently went empty. Users had no signal that the
+ * problem was upstream, not their config. This hook surfaces the state so a
+ * banner can explain "AIS upstream is offline" instead of letting users
+ * wonder.
+ *
+ * The poll interval is intentionally relaxed (30s) — this is a low-urgency UX
+ * signal, not a real-time data feed. Backend already escalates top_status to
+ * "degraded" when AIS is configured-but-disconnected.
+ */
+import { useEffect, useRef, useState } from 'react';
+import { API_BASE } from '@/lib/api';
+
+export interface AisUpstreamHealth {
+  /** True when we've received a vessel message in the last ~60s. */
+  connected: boolean;
+  /** Seconds since the last vessel message; null when we've never seen one. */
+  lastMsgAgeSeconds: number | null;
+  /**
+   * True when the SPKI-pinned fallback is in effect (issue #258).
+   * Data still flows in this mode — it's a separate, less urgent signal
+   * than ``connected``.
+   */
+  degradedTls: boolean;
+  /** How many times the proxy has been spawned (sustained growth without
+   *  ``connected`` means upstream is dead and we're respawning in a loop). */
+  proxySpawnCount: number;
+  /** Whether the operator has configured an API key. When false, the banner
+   *  shouldn't fire because "AIS is off" is the intended state. The backend
+   *  signals this via the ``connected`` flag being false AND no msg ever
+   *  seen — we approximate it by requiring at least one spawn before
+   *  declaring an outage. */
+  aisEnabled: boolean;
+}
+
+const POLL_INTERVAL_MS = 30_000;
+
+export function useAisUpstreamHealth(): AisUpstreamHealth | null {
+  const [health, setHealth] = useState<AisUpstreamHealth | null>(null);
+  const cancelledRef = useRef(false);
+
+  useEffect(() => {
+    cancelledRef.current = false;
+
+    const fetchHealth = async () => {
+      try {
+        const res = await fetch(`${API_BASE}/api/health`, { cache: 'no-store' });
+        if (!res.ok) return;
+        const body = await res.json();
+        if (cancelledRef.current) return;
+        const proxy = body?.ais_proxy ?? {};
+        // ``proxy_spawn_count > 0`` is the cheapest "AIS is enabled" check:
+        // if the backend never spawned the proxy (no API key, opt-out env)
+        // we shouldn't ever show the outage banner. Once the proxy has
+        // spawned at least once we know the operator wants AIS data.
+        const spawns = Number(proxy.proxy_spawn_count ?? 0);
+        setHealth({
+          connected: Boolean(proxy.connected),
+          lastMsgAgeSeconds:
+            proxy.last_msg_age_seconds == null
+              ? null
+              : Number(proxy.last_msg_age_seconds),
+          degradedTls: Boolean(proxy.degraded_tls),
+          proxySpawnCount: spawns,
+          aisEnabled: spawns > 0,
+        });
+      } catch {
+        // Backend unreachable — separate problem. Banner not relevant.
+      }
+    };
+
+    void fetchHealth();
+    const interval = setInterval(() => void fetchHealth(), POLL_INTERVAL_MS);
+    return () => {
+      cancelledRef.current = true;
+      clearInterval(interval);
+    };
+  }, []);
+
+  return health;
+}