mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-03 21:08:13 +02:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ebbf42fb3c |
-29
@@ -261,32 +261,3 @@ backend/data/wormhole_stdout.log
|
||||
|
||||
# Compressed snapshot archives (can be 100 MB+)
|
||||
*.json.gz
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# AI assistant / coding-agent scratch
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Per-tool config + scratch directories. These are private to whichever
|
||||
# coding agent the operator happens to be using and have no business in
|
||||
# the repo. If a tool's instructions need to be canonical for the project,
|
||||
# we'll put them in docs/ explicitly — not let the agent dump them at the
|
||||
# repo root.
|
||||
|
||||
# OpenAI Codex CLI
|
||||
.codex/
|
||||
.codex-app-schema/
|
||||
.codex-app-ts/
|
||||
|
||||
# Per-agent instruction files dropped at repo root by various tools.
|
||||
# These are operator-side preferences, not part of the project contract.
|
||||
AGENTS.md
|
||||
GEMINI.md
|
||||
CLAUDE.md
|
||||
.github/copilot-instructions.md
|
||||
|
||||
# Stale AI-generated test file that referenced fields that don't exist in
|
||||
# the current `_parse_carrier_positions_from_news` implementation. Kept
|
||||
# ignored so it doesn't accidentally get committed if it shows up again
|
||||
# from a tool that's working off an out-of-date understanding of the
|
||||
# module. If a real test for that function is needed, write it under a
|
||||
# meaningful name in tests/test_carrier_tracker_quality.py.
|
||||
backend/tests/test_carrier_tracker_region_centers.py
|
||||
|
||||
+10
-105
@@ -98,88 +98,6 @@ def _current_etag(prefix: str = "") -> str:
|
||||
return f"{prefix}v{get_data_version()}-l{get_active_layers_version()}"
|
||||
|
||||
|
||||
# ── Issue #288: viewport-aware payloads ─────────────────────────────────────
|
||||
# Heavy, density-driven, time-sensitive layers that benefit from bbox
|
||||
# filtering. Light reference layers (datacenters, military_bases,
|
||||
# power_plants, satellites, weather, news, etc.) are intentionally NOT
|
||||
# in these sets — they ship world-scale even when bounds are supplied so
|
||||
# panning never reveals an "empty world" of static infrastructure.
|
||||
#
|
||||
# When the caller does NOT pass s/w/n/e, none of this runs and the response
|
||||
# is byte-for-byte identical to the pre-#288 behavior.
|
||||
_FAST_BBOX_HEAVY_KEYS: tuple[str, ...] = (
|
||||
"commercial_flights",
|
||||
"military_flights",
|
||||
"private_flights",
|
||||
"private_jets",
|
||||
"tracked_flights",
|
||||
"ships",
|
||||
"cctv",
|
||||
"uavs",
|
||||
"liveuamap",
|
||||
"gps_jamming",
|
||||
"sigint",
|
||||
"trains",
|
||||
)
|
||||
_SLOW_BBOX_HEAVY_KEYS: tuple[str, ...] = (
|
||||
"gdelt",
|
||||
"firms_fires",
|
||||
"kiwisdr",
|
||||
"scanners",
|
||||
"psk_reporter",
|
||||
)
|
||||
|
||||
|
||||
def _has_full_bbox(s, w, n, e) -> bool:
|
||||
return None not in (s, w, n, e)
|
||||
|
||||
|
||||
def _bbox_etag_suffix(s, w, n, e) -> str:
|
||||
"""Quantize bbox to 1° before mixing into the ETag.
|
||||
|
||||
The 20% padding inside _bbox_filter already absorbs sub-degree pans;
|
||||
quantizing here means small mouse drags don't blow the ETag cache
|
||||
on the client. Full-world bounds collapse to a single suffix.
|
||||
"""
|
||||
if not _has_full_bbox(s, w, n, e):
|
||||
return ""
|
||||
try:
|
||||
ss = math.floor(float(s))
|
||||
ww = math.floor(float(w))
|
||||
nn = math.ceil(float(n))
|
||||
ee = math.ceil(float(e))
|
||||
except (TypeError, ValueError):
|
||||
return ""
|
||||
# If the requested window covers basically the whole world, treat it as
|
||||
# "no bbox" for caching purposes so world-zoomed clients all hit the
|
||||
# same ETag and benefit from the existing 304 path.
|
||||
lat_span, lng_span = _bbox_spans(s, w, n, e)
|
||||
if lng_span >= 300 or lat_span >= 120:
|
||||
return ""
|
||||
return f"|bbox={ss},{ww},{nn},{ee}"
|
||||
|
||||
|
||||
def _apply_bbox_to_payload(payload: dict, heavy_keys: tuple[str, ...],
|
||||
s: float, w: float, n: float, e: float) -> dict:
|
||||
"""In-place filter the heavy-key collections in *payload* to a viewport.
|
||||
|
||||
Items without lat/lng are passed through (so e.g. summary blobs aren't
|
||||
accidentally dropped). The existing _bbox_filter helper applies a 20%
|
||||
pad and handles antimeridian crossings.
|
||||
"""
|
||||
lat_span, lng_span = _bbox_spans(s, w, n, e)
|
||||
# World-scale request → skip filtering entirely. Spares the CPU and
|
||||
# guarantees the response matches the no-params shape.
|
||||
if lng_span >= 300 or lat_span >= 120:
|
||||
return payload
|
||||
for key in heavy_keys:
|
||||
items = payload.get(key)
|
||||
if not isinstance(items, list) or not items:
|
||||
continue
|
||||
payload[key] = _bbox_filter(items, s, w, n, e)
|
||||
return payload
|
||||
|
||||
|
||||
def _json_safe(value):
|
||||
if isinstance(value, float):
|
||||
return value if math.isfinite(value) else None
|
||||
@@ -561,14 +479,13 @@ async def bootstrap_critical(request: Request):
|
||||
@limiter.limit("120/minute")
|
||||
async def live_data_fast(
|
||||
request: Request,
|
||||
s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (vessels, aircraft, sigint, CCTV, …) are filtered to this viewport with 20% padding. Static reference layers (satellites, etc.) always ship world-scale.", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (see s)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (see s)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (see s)", ge=-180, le=180),
|
||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
||||
initial: bool = Query(False, description="Return a capped startup payload for first paint"),
|
||||
):
|
||||
bbox_suffix = _bbox_etag_suffix(s, w, n, e)
|
||||
etag = _current_etag(prefix=("fast|initial|" if initial else "fast|full|") + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else ""))
|
||||
etag = _current_etag(prefix="fast|initial|" if initial else "fast|full|")
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
||||
@@ -608,11 +525,6 @@ async def live_data_fast(
|
||||
payload = _cap_fast_startup_payload(payload)
|
||||
else:
|
||||
payload = _cap_fast_dashboard_payload(payload)
|
||||
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||
# are supplied. Without bounds, behaviour is byte-for-byte identical
|
||||
# to the pre-#288 implementation.
|
||||
if _has_full_bbox(s, w, n, e):
|
||||
payload = _apply_bbox_to_payload(payload, _FAST_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||
return Response(content=orjson.dumps(_sanitize_payload(payload)), media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
|
||||
@@ -621,13 +533,12 @@ async def live_data_fast(
|
||||
@limiter.limit("60/minute")
|
||||
async def live_data_slow(
|
||||
request: Request,
|
||||
s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (gdelt, firms_fires, kiwisdr, scanners, psk_reporter) are filtered to this viewport with 20% padding. Static reference layers (datacenters, military bases, power plants, weather, news, …) always ship world-scale.", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (see s)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (see s)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (see s)", ge=-180, le=180),
|
||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
||||
):
|
||||
bbox_suffix = _bbox_etag_suffix(s, w, n, e)
|
||||
etag = _current_etag(prefix="slow|full|" + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else ""))
|
||||
etag = _current_etag(prefix="slow|full|")
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
||||
@@ -681,12 +592,6 @@ async def live_data_slow(
|
||||
"crowdthreat": (d.get("crowdthreat") or []) if active_layers.get("crowdthreat", True) else [],
|
||||
"freshness": freshness,
|
||||
}
|
||||
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||
# are supplied. Static reference layers (datacenters, military bases,
|
||||
# power_plants, etc.) deliberately stay world-scale so panning never
|
||||
# hides the infrastructure overlay the operator already has on screen.
|
||||
if _has_full_bbox(s, w, n, e):
|
||||
payload = _apply_bbox_to_payload(payload, _SLOW_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||
return Response(
|
||||
content=orjson.dumps(_sanitize_payload(payload), default=str, option=orjson.OPT_NON_STR_KEYS),
|
||||
media_type="application/json",
|
||||
|
||||
@@ -627,56 +627,20 @@ def update_carrier_positions() -> None:
|
||||
_carrier_positions.update(positions)
|
||||
_last_update = datetime.now(timezone.utc)
|
||||
logger.info(
|
||||
"Carrier tracker: %d carriers loaded from cache (USNI + GDELT enrichment starting...)",
|
||||
"Carrier tracker: %d carriers loaded from cache (GDELT enrichment starting...)",
|
||||
len(positions),
|
||||
)
|
||||
|
||||
# --- Phase 2: USNI Fleet & Marine Tracker (PRIMARY source) ---
|
||||
#
|
||||
# USNI publishes a weekly editorial tracker with each carrier's
|
||||
# actual operating area, parsed from explicit prose like
|
||||
# "The Gerald R. Ford Carrier Strike Group is operating in the Red Sea"
|
||||
# These positions are tagged ``position_confidence: "recent"`` because
|
||||
# they reflect actual reporting, not headline-keyword centroids.
|
||||
# USNI updates are preferred over GDELT — they're authoritative on
|
||||
# US Navy positions where GDELT is just article-title text mining.
|
||||
try:
|
||||
from services.fetchers.usni_fleet_tracker import (
|
||||
fetch_latest_fleet_tracker_positions,
|
||||
)
|
||||
usni_positions = fetch_latest_fleet_tracker_positions()
|
||||
for hull, pos in usni_positions.items():
|
||||
positions[hull] = pos
|
||||
logger.info(
|
||||
"Carrier USNI update: %s → %s",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
pos.get("desc", ""),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("USNI fleet-tracker fetch failed: %s", e)
|
||||
|
||||
# --- Phase 3: GDELT enrichment (SECONDARY — fills gaps) ---
|
||||
#
|
||||
# Used only to backfill carriers USNI didn't mention this week. The
|
||||
# position is stamped ``approximate`` so the UI knows it's a
|
||||
# headline-centroid match (Issue #245).
|
||||
# --- Phase 2: GDELT enrichment ---
|
||||
try:
|
||||
articles = _fetch_gdelt_carrier_news()
|
||||
news_positions = _parse_carrier_positions_from_news(articles)
|
||||
for hull, pos in news_positions.items():
|
||||
# Only overwrite if the existing entry is NOT a recent USNI
|
||||
# observation. A "recent" USNI position is higher-confidence
|
||||
# than a GDELT headline-centroid match — don't let GDELT
|
||||
# demote a real position to an approximate one.
|
||||
existing = positions.get(hull, {})
|
||||
existing_conf = _compute_position_confidence(existing)
|
||||
if existing_conf == "recent":
|
||||
continue
|
||||
# Always overwrite — newest GDELT mention wins. The previous
|
||||
# entry's position is preserved in git history and the next
|
||||
# cycle either confirms or replaces it.
|
||||
positions[hull] = pos
|
||||
logger.info(
|
||||
"Carrier OSINT: updated %s from GDELT news",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
)
|
||||
logger.info("Carrier OSINT: updated %s from news", CARRIER_REGISTRY[hull]["name"])
|
||||
except (ValueError, KeyError, json.JSONDecodeError, OSError) as e:
|
||||
logger.warning("GDELT carrier fetch failed: %s", e)
|
||||
|
||||
|
||||
@@ -1,457 +0,0 @@
|
||||
"""USNI News Fleet & Marine Tracker — authoritative weekly carrier
|
||||
position publication.
|
||||
|
||||
Why this exists
|
||||
---------------
|
||||
The previous carrier_tracker pipeline relied on GDELT headline matching
|
||||
(``api.gdeltproject.org``) to derive positions from text like "USS Ford
|
||||
in the Mediterranean" → centroid of "Mediterranean Sea". That was
|
||||
- low-precision (audit issue #245 — false precision from text mentions),
|
||||
- unreliable (``api.gdeltproject.org`` is sometimes unreachable from
|
||||
certain network paths, including Docker Desktop on some Windows hosts).
|
||||
|
||||
USNI publishes a weekly tracker that explicitly lists where every U.S.
|
||||
carrier is operating. The article body uses extremely consistent phrasing:
|
||||
|
||||
"The Gerald R. Ford Carrier Strike Group is operating in the Red Sea"
|
||||
"Aircraft carrier USS George Washington (CVN-73) is in port in
|
||||
Yokosuka, Japan."
|
||||
"USS Dwight D. Eisenhower (CVN-69) sails down the Elizabeth River"
|
||||
|
||||
Those are deterministic to parse. This module:
|
||||
|
||||
1. Pulls the WordPress RSS feeds (both site-wide and category) — the
|
||||
site-wide feed often has fresher posts before the category feed
|
||||
catches up, so we union them.
|
||||
2. Picks the most recent post by parsed ``pubDate``.
|
||||
3. For each carrier in the registry, scans the article body for a
|
||||
"is operating in / is in port in / departed from" pattern near
|
||||
the carrier's name.
|
||||
4. Maps the extracted region phrase to coordinates via the carrier
|
||||
tracker's existing REGION_COORDS.
|
||||
|
||||
The result is a ``{hull: position_entry}`` dict that the carrier tracker
|
||||
consumes as a high-confidence source — ``position_confidence: "recent"``
|
||||
with ``position_source_at`` set to the article's actual publication
|
||||
timestamp (not ``now()``).
|
||||
|
||||
Politeness
|
||||
----------
|
||||
We send the per-install operator handle via ``outbound_user_agent``
|
||||
(Round 7a) so USNI can rate-limit / contact the specific install if
|
||||
needed. Article-body pages return 403 to non-browser UAs (Cloudflare),
|
||||
but WordPress RSS feeds are open and serve the full article in
|
||||
``<content:encoded>`` — that's the supported path for aggregators and
|
||||
the one we use. We do not spoof browser headers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime, timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
from typing import Iterable
|
||||
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_RSS_URLS: tuple[str, ...] = (
|
||||
# Site-wide feed often has the freshest posts before the category
|
||||
# feed catches up. We try this first.
|
||||
"https://news.usni.org/feed",
|
||||
# Category feed has older fleet trackers for backfill.
|
||||
"https://news.usni.org/category/fleet-tracker/feed",
|
||||
)
|
||||
|
||||
_RSS_NS = {"content": "http://purl.org/rss/1.0/modules/content/"}
|
||||
|
||||
_FLEET_TRACKER_TITLE_RE = re.compile(
|
||||
r"fleet\s+and\s+marine\s+tracker", re.IGNORECASE
|
||||
)
|
||||
|
||||
_TAG_STRIP_RE = re.compile(r"<[^>]+>")
|
||||
_WHITESPACE_RE = re.compile(r"\s+")
|
||||
|
||||
|
||||
def _strip_html(html: str) -> str:
|
||||
text = _TAG_STRIP_RE.sub(" ", html or "")
|
||||
return _WHITESPACE_RE.sub(" ", text).strip()
|
||||
|
||||
|
||||
def _request_headers() -> dict[str, str]:
|
||||
"""Headers USNI's WordPress feed accepts from a legitimate aggregator.
|
||||
|
||||
The ``Referer`` is the category index page — that's where a real
|
||||
feed reader navigates from. ``Accept`` declares RSS preference but
|
||||
falls back to HTML. No browser UA spoofing.
|
||||
"""
|
||||
return {
|
||||
"User-Agent": outbound_user_agent("usni-fleet-tracker"),
|
||||
"Accept": "application/rss+xml, application/xml;q=0.9, */*;q=0.1",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Referer": "https://news.usni.org/category/fleet-tracker",
|
||||
}
|
||||
|
||||
|
||||
def _parse_pubdate(raw: str) -> datetime | None:
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
dt = parsedate_to_datetime(raw)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _iter_fleet_tracker_items(rss_urls: Iterable[str]) -> list[dict]:
|
||||
"""Pull every fleet-tracker post visible across the given RSS feeds.
|
||||
|
||||
De-duplicates by article link. Returns a list of dicts:
|
||||
{"title", "link", "pub_date" (datetime), "body" (plain text)}
|
||||
"""
|
||||
items_by_link: dict[str, dict] = {}
|
||||
for url in rss_urls:
|
||||
try:
|
||||
r = fetch_with_curl(url, timeout=15, headers=_request_headers())
|
||||
except Exception as exc:
|
||||
logger.debug("USNI RSS %s exception: %s", url, exc)
|
||||
continue
|
||||
if not r or r.status_code != 200 or not r.text:
|
||||
logger.debug(
|
||||
"USNI RSS %s returned status=%s body=%d",
|
||||
url,
|
||||
getattr(r, "status_code", "?"),
|
||||
len(getattr(r, "text", "") or ""),
|
||||
)
|
||||
continue
|
||||
try:
|
||||
root = ET.fromstring(r.text)
|
||||
except ET.ParseError as exc:
|
||||
logger.warning("USNI RSS parse error from %s: %s", url, exc)
|
||||
continue
|
||||
for item in root.findall(".//item"):
|
||||
title = (item.findtext("title") or "").strip()
|
||||
if not _FLEET_TRACKER_TITLE_RE.search(title):
|
||||
continue
|
||||
link = (item.findtext("link") or "").strip()
|
||||
if not link or link in items_by_link:
|
||||
continue
|
||||
pub_dt = _parse_pubdate(item.findtext("pubDate") or "")
|
||||
body_html = (
|
||||
item.findtext("content:encoded", default="", namespaces=_RSS_NS)
|
||||
or item.findtext("description", default="")
|
||||
or ""
|
||||
)
|
||||
items_by_link[link] = {
|
||||
"title": title,
|
||||
"link": link,
|
||||
"pub_date": pub_dt,
|
||||
"body": _strip_html(body_html),
|
||||
}
|
||||
return list(items_by_link.values())
|
||||
|
||||
|
||||
# Map USNI region phrases to keys in carrier_tracker.REGION_COORDS.
|
||||
# The carrier_tracker table already covers most named bodies of water and
|
||||
# major ports — we just need to teach this module to RECOGNIZE the
|
||||
# specific phrases USNI's editorial style uses, which sometimes spell
|
||||
# the same body of water differently.
|
||||
_USNI_REGION_ALIASES: tuple[tuple[str, str], ...] = (
|
||||
# USNI phrase (lowercase) -> REGION_COORDS key
|
||||
("eastern mediterranean", "eastern mediterranean"),
|
||||
("western mediterranean", "western mediterranean"),
|
||||
("mediterranean sea", "mediterranean"),
|
||||
("the mediterranean", "mediterranean"),
|
||||
("red sea", "red sea"),
|
||||
("arabian sea area of responsibility", "arabian sea"),
|
||||
("north arabian sea", "north arabian sea"),
|
||||
("arabian sea", "arabian sea"),
|
||||
("persian gulf", "persian gulf"),
|
||||
("gulf of oman", "gulf of oman"),
|
||||
("strait of hormuz", "strait of hormuz"),
|
||||
("south china sea", "south china sea"),
|
||||
("east china sea", "east china sea"),
|
||||
("philippine sea", "philippine sea"),
|
||||
("sea of japan", "sea of japan"),
|
||||
("taiwan strait", "taiwan strait"),
|
||||
("western pacific", "western pacific"),
|
||||
("pacific ocean", "pacific"),
|
||||
("indian ocean", "indian ocean"),
|
||||
("north atlantic", "north atlantic"),
|
||||
("western atlantic", "atlantic"),
|
||||
("eastern atlantic", "atlantic"),
|
||||
("atlantic ocean", "atlantic"),
|
||||
("gulf of aden", "gulf of aden"),
|
||||
("horn of africa", "horn of africa"),
|
||||
("bab el-mandeb", "bab el-mandeb"),
|
||||
("suez canal", "suez canal"),
|
||||
("baltic sea", "baltic sea"),
|
||||
("north sea", "north sea"),
|
||||
("black sea", "black sea"),
|
||||
("south atlantic", "south atlantic"),
|
||||
("coral sea", "coral sea"),
|
||||
("gulf of mexico", "gulf of mexico"),
|
||||
("caribbean sea", "caribbean"),
|
||||
("caribbean", "caribbean"),
|
||||
# Specific ports
|
||||
("naval station norfolk", "norfolk"),
|
||||
("norfolk naval shipyard", "newport news"),
|
||||
("newport news shipbuilding", "newport news"),
|
||||
("newport news", "newport news"),
|
||||
# USNI tags Norfolk mentions with state suffix; match both.
|
||||
("norfolk, va", "norfolk"),
|
||||
("norfolk", "norfolk"),
|
||||
("naval station everett", "puget sound"),
|
||||
("naval base kitsap", "bremerton"),
|
||||
("bremerton", "bremerton"),
|
||||
("puget sound", "puget sound"),
|
||||
("naval base san diego", "san diego"),
|
||||
("san diego, calif", "san diego"),
|
||||
("san diego", "san diego"),
|
||||
("yokosuka, japan", "yokosuka"),
|
||||
("yokosuka", "yokosuka"),
|
||||
("pearl harbor", "pearl harbor"),
|
||||
("apra harbor, guam", "guam"),
|
||||
("guam", "guam"),
|
||||
("bahrain", "bahrain"),
|
||||
("naval station rota", "rota"),
|
||||
("rota, spain", "rota"),
|
||||
("naples, italy", "naples"),
|
||||
# Fleets / AORs
|
||||
("5th fleet", "5th fleet"),
|
||||
("6th fleet", "6th fleet"),
|
||||
("7th fleet", "7th fleet"),
|
||||
("3rd fleet", "3rd fleet"),
|
||||
("2nd fleet", "2nd fleet"),
|
||||
("centcom", "centcom"),
|
||||
("indo-pacific command", "indopacom"),
|
||||
("eucom", "eucom"),
|
||||
("southcom", "southcom"),
|
||||
)
|
||||
|
||||
|
||||
def _resolve_region_phrase(phrase: str) -> tuple[str, str] | None:
|
||||
"""Map a USNI region phrase to a ``(canonical_key, display)`` tuple,
|
||||
or ``None`` if we don't recognize it.
|
||||
|
||||
``canonical_key`` is what ``carrier_tracker.REGION_COORDS`` keys on.
|
||||
``display`` is the phrase we'll show in the dossier description.
|
||||
"""
|
||||
p = (phrase or "").lower().strip()
|
||||
if not p:
|
||||
return None
|
||||
for usni_phrase, canonical in _USNI_REGION_ALIASES:
|
||||
if usni_phrase in p:
|
||||
return canonical, usni_phrase
|
||||
return None
|
||||
|
||||
|
||||
# Operating-verb phrases USNI uses, with a capture group for the region
|
||||
# phrase that immediately follows. Each pattern is designed to swallow
|
||||
# the optional editorial filler that often appears between verb and
|
||||
# location (e.g. "returned Friday to Norfolk" — "Friday" goes in the
|
||||
# filler; "Norfolk" is the location).
|
||||
#
|
||||
# Order matters: most-specific patterns first, so e.g. "is in port in"
|
||||
# wins over the generic "is".
|
||||
_DAY_FILLER = r"(?:[A-Z][a-z]+(?:day)?,?\s+)?" # optional "Friday" / "Monday" / etc.
|
||||
_LOC_CAPTURE = r"([A-Za-z][A-Za-z0-9\s,\.\-']{2,80})"
|
||||
|
||||
_OPERATING_PATTERNS: tuple[re.Pattern, ...] = (
|
||||
# "is operating in [the] {REGION}" / "is also operating in [the] {REGION}"
|
||||
re.compile(r"\bis\s+(?:also\s+|now\s+)?operating\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is conducting <stuff> in [the] {REGION}"
|
||||
re.compile(r"\bis\s+conducting\s+[A-Za-z0-9\-\s]{2,40}\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is in port in {LOCATION}"
|
||||
re.compile(r"\bis\s+in\s+port\s+in\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is in port" (no location — degenerate, use carrier's homeport via separate path)
|
||||
# → not captured here; falls through to homeport
|
||||
# "is underway in [the] {REGION}"
|
||||
re.compile(r"\bis\s+underway\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is deployed to [the] {REGION}" / "deployed in"
|
||||
re.compile(r"\bis\s+deployed\s+(?:to|in)\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "returned [Day] to {LOCATION}" / "returned [Day] from {REGION}"
|
||||
re.compile(r"\breturned\s+" + _DAY_FILLER + r"to\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
re.compile(r"\breturned\s+" + _DAY_FILLER + r"from\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "arrived [Day] in/at {LOCATION}"
|
||||
re.compile(r"\barrived\s+" + _DAY_FILLER + r"(?:in|at)\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "departed [Day] from {LOCATION}"
|
||||
re.compile(r"\bdeparted\s+" + _DAY_FILLER + r"(?:from\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "transiting [the] {REGION}" / "sailing through [the] {REGION}"
|
||||
re.compile(r"\btransiting\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
re.compile(r"\bsailing\s+through\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is homeported at {LOCATION}"
|
||||
re.compile(r"\bis\s+homeported\s+at\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
)
|
||||
|
||||
|
||||
def _extract_region_for_carrier(
|
||||
body: str,
|
||||
carrier_names: list[str],
|
||||
hull_code: str,
|
||||
) -> str | None:
|
||||
"""Return the best-guess region phrase for one carrier from the
|
||||
article body, or None if no confident match.
|
||||
|
||||
Algorithm:
|
||||
1. Find every mention of the carrier (any name variant or the hull
|
||||
code) in the body.
|
||||
2. For each mention, look in the ~300-char window AFTER it for any
|
||||
of the operating-verb patterns.
|
||||
3. Return the first hit. If a more-confident match later turns up
|
||||
(e.g. "is operating in the X" beats "is homeported at Y"), the
|
||||
first one in document order still wins — USNI's structure puts
|
||||
the position-update sentence near the top of each carrier's
|
||||
section, and the homeport mention later.
|
||||
"""
|
||||
# Build a master mention regex covering every name variant + the hull.
|
||||
candidates: list[str] = []
|
||||
for name in carrier_names:
|
||||
if name and len(name) >= 4:
|
||||
candidates.append(re.escape(name))
|
||||
if hull_code:
|
||||
candidates.append(re.escape(hull_code))
|
||||
if not candidates:
|
||||
return None
|
||||
mention_re = re.compile(r"\b(?:" + "|".join(candidates) + r")\b", re.IGNORECASE)
|
||||
|
||||
window_chars = 320
|
||||
seen_phrases: list[str] = []
|
||||
for mention in mention_re.finditer(body):
|
||||
end = mention.end()
|
||||
window = body[end : end + window_chars]
|
||||
# Cut window at the next sentence break for tighter context.
|
||||
# (We use the LAST period within the window so "Norfolk, Va." isn't
|
||||
# confused for a sentence end — USNI uses ", Va." prolifically.)
|
||||
# Sentence break candidates: ". " followed by uppercase OR newline.
|
||||
sent_break = re.search(r"[\.!?]\s+[A-Z]", window)
|
||||
if sent_break:
|
||||
window = window[: sent_break.start() + 1]
|
||||
# Try patterns in priority order.
|
||||
for pat in _OPERATING_PATTERNS:
|
||||
m = pat.search(window)
|
||||
if not m:
|
||||
continue
|
||||
phrase = m.group(1).strip().rstrip(",.;: ")
|
||||
if not phrase:
|
||||
continue
|
||||
# Strip trailing editorial filler — USNI often writes
|
||||
# "Norfolk, Va., according to ship spotters" or
|
||||
# "Yokosuka, Japan, according to..."
|
||||
phrase = re.split(
|
||||
r",\s+(?:according|as of|for|while|where|in support|in the)",
|
||||
phrase,
|
||||
maxsplit=1,
|
||||
)[0].strip()
|
||||
seen_phrases.append(phrase)
|
||||
return phrase
|
||||
return seen_phrases[0] if seen_phrases else None
|
||||
|
||||
|
||||
def fetch_latest_fleet_tracker_positions(
|
||||
carrier_registry: dict | None = None,
|
||||
region_coords: dict | None = None,
|
||||
) -> dict[str, dict]:
|
||||
"""Return ``{hull: position_entry}`` for the latest USNI fleet tracker.
|
||||
|
||||
Entries look like::
|
||||
|
||||
{
|
||||
"lat": 18.0, "lng": 39.5, "heading": 0,
|
||||
"desc": "Red Sea (USNI May 18, 2026)",
|
||||
"source": "USNI News Fleet & Marine Tracker (May 18, 2026)",
|
||||
"source_url": "https://news.usni.org/2026/05/18/...",
|
||||
"position_source_at": "2026-05-18T18:58:44+00:00",
|
||||
"position_confidence": "recent",
|
||||
}
|
||||
|
||||
Carriers whose section can't be parsed (e.g. an off-week with no
|
||||
mention) are simply absent from the result — the caller keeps
|
||||
whatever position they had before.
|
||||
|
||||
``carrier_registry`` and ``region_coords`` default to the carrier_tracker
|
||||
module's own tables; passed in here for testability.
|
||||
"""
|
||||
if carrier_registry is None or region_coords is None:
|
||||
from services.carrier_tracker import CARRIER_REGISTRY, REGION_COORDS
|
||||
carrier_registry = carrier_registry or CARRIER_REGISTRY
|
||||
region_coords = region_coords or REGION_COORDS
|
||||
|
||||
items = _iter_fleet_tracker_items(_RSS_URLS)
|
||||
if not items:
|
||||
logger.warning("USNI fleet-tracker: no parseable RSS items")
|
||||
return {}
|
||||
|
||||
# Pick the most recent by parsed pubDate. Items without a parseable
|
||||
# date fall to the back of the list.
|
||||
items.sort(
|
||||
key=lambda it: it["pub_date"] or datetime(1970, 1, 1, tzinfo=timezone.utc),
|
||||
reverse=True,
|
||||
)
|
||||
latest = items[0]
|
||||
|
||||
pub_dt: datetime | None = latest["pub_date"]
|
||||
pub_iso = pub_dt.isoformat() if pub_dt else ""
|
||||
pub_human = pub_dt.strftime("%b %d, %Y") if pub_dt else "unknown date"
|
||||
|
||||
body = latest["body"]
|
||||
if not body:
|
||||
logger.warning("USNI fleet-tracker: latest item has empty body")
|
||||
return {}
|
||||
|
||||
positions: dict[str, dict] = {}
|
||||
for hull, info in carrier_registry.items():
|
||||
# Build name variants we'll try in the body.
|
||||
full_name = info["name"] # "USS Gerald R. Ford (CVN-78)"
|
||||
without_hull = full_name.split("(")[0].strip() # "USS Gerald R. Ford"
|
||||
last_word = without_hull.split()[-1] # "Ford"
|
||||
ship_only = without_hull[4:] # "Gerald R. Ford"
|
||||
|
||||
# Variants ordered most-specific first.
|
||||
variants: list[str] = []
|
||||
for v in (without_hull, f"USS {ship_only}", ship_only, last_word):
|
||||
if v and v not in variants and len(v) >= 4:
|
||||
variants.append(v)
|
||||
|
||||
phrase = _extract_region_for_carrier(body, variants, hull)
|
||||
if not phrase:
|
||||
continue
|
||||
resolved = _resolve_region_phrase(phrase)
|
||||
if not resolved:
|
||||
logger.debug(
|
||||
"USNI: %s region phrase %r did not match any known region",
|
||||
hull, phrase,
|
||||
)
|
||||
continue
|
||||
canonical_key, display_phrase = resolved
|
||||
coords = region_coords.get(canonical_key)
|
||||
if not coords:
|
||||
continue
|
||||
|
||||
positions[hull] = {
|
||||
"lat": coords[0],
|
||||
"lng": coords[1],
|
||||
"heading": 0,
|
||||
"desc": f"{display_phrase.title()} (USNI {pub_human})",
|
||||
"source": f"USNI News Fleet & Marine Tracker ({pub_human})",
|
||||
"source_url": latest["link"],
|
||||
"position_source_at": pub_iso,
|
||||
"position_confidence": "recent",
|
||||
}
|
||||
|
||||
if positions:
|
||||
logger.info(
|
||||
"USNI fleet-tracker: parsed %d/%d carrier positions from %s",
|
||||
len(positions), len(carrier_registry), latest["link"],
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"USNI fleet-tracker: latest article %s yielded zero parseable carriers",
|
||||
latest["link"],
|
||||
)
|
||||
return positions
|
||||
@@ -1,273 +0,0 @@
|
||||
"""Tests for issue #288: viewport bbox filtering on /api/live-data/{fast,slow}.
|
||||
|
||||
Behaviour contract:
|
||||
* Without s/w/n/e params, the response is byte-for-byte identical to the
|
||||
pre-#288 implementation. (No filtering, no extra fields, no ETag change.)
|
||||
* With s/w/n/e supplied, heavy/dense layers are filtered to that viewport
|
||||
with a 20% padding box.
|
||||
* Light reference layers (datacenters, military_bases, power_plants,
|
||||
satellites, news, weather, …) are NEVER filtered, even when bounds are
|
||||
supplied — panning must never reveal an "empty world" of infrastructure.
|
||||
* World-scale bounds (lng_span >= 300 OR lat_span >= 120) short-circuit
|
||||
filtering and share the global ETag.
|
||||
* The ETag includes a 1°-quantized bbox so two viewports never poison each
|
||||
other's 304 cache.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ───────────────────────── /api/live-data/fast ─────────────────────────────
|
||||
|
||||
|
||||
class TestFastBboxFiltering:
|
||||
def _seed_fast(self, monkeypatch):
|
||||
"""Plant deterministic heavy + light fixtures across the globe."""
|
||||
from services.fetchers import _store
|
||||
|
||||
# Heavy collections: dense across the world.
|
||||
commercial = [
|
||||
{"lat": -60.0, "lng": -120.0, "id": "f-sw"}, # south Pacific
|
||||
{"lat": 35.0, "lng": -75.0, "id": "f-ne"}, # eastern US
|
||||
{"lat": 35.0, "lng": 100.0, "id": "f-asia"}, # Asia
|
||||
]
|
||||
ships = [
|
||||
{"lat": -60.0, "lng": -120.0, "id": "s-sw"},
|
||||
{"lat": 35.0, "lng": -75.0, "id": "s-ne"},
|
||||
]
|
||||
cctv = [{"lat": 35.0, "lng": -75.0, "id": "c-1"}]
|
||||
|
||||
# Sigint heavy collection.
|
||||
sigint = [
|
||||
{"source": "meshtastic", "lat": 35.0, "lng": -75.0, "id": "sig-east"},
|
||||
{"source": "meshtastic", "lat": 35.0, "lng": 100.0, "id": "sig-asia"},
|
||||
]
|
||||
|
||||
# Light/reference layer — must NEVER be filtered.
|
||||
satellites = [
|
||||
{"lat": -60.0, "lng": -120.0, "id": "sat-sw"},
|
||||
{"lat": 35.0, "lng": -75.0, "id": "sat-ne"},
|
||||
{"lat": 35.0, "lng": 100.0, "id": "sat-asia"},
|
||||
]
|
||||
|
||||
monkeypatch.setitem(_store.latest_data, "commercial_flights", commercial)
|
||||
monkeypatch.setitem(_store.latest_data, "ships", ships)
|
||||
monkeypatch.setitem(_store.latest_data, "cctv", cctv)
|
||||
monkeypatch.setitem(_store.latest_data, "sigint", sigint)
|
||||
monkeypatch.setitem(_store.latest_data, "satellites", satellites)
|
||||
# Ensure all layers are on so the response includes them.
|
||||
for layer in (
|
||||
"flights", "ships_military", "ships_cargo", "ships_civilian",
|
||||
"ships_passenger", "ships_tracked_yachts", "cctv",
|
||||
"sigint_meshtastic", "sigint_aprs", "satellites",
|
||||
):
|
||||
monkeypatch.setitem(_store.active_layers, layer, True)
|
||||
|
||||
def test_no_bbox_returns_world_data(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
r = client.get("/api/live-data/fast")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# All heavy fixtures pass through unchanged.
|
||||
assert len(data["commercial_flights"]) == 3
|
||||
assert len(data["ships"]) == 2
|
||||
assert len(data["sigint"]) == 2
|
||||
# Light layer also full.
|
||||
assert len(data["satellites"]) == 3
|
||||
|
||||
def test_bbox_filters_heavy_layers(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
# Box tightly around the eastern-US fixture (lat 35, lng -75).
|
||||
# ±5° → after 20% padding inside _bbox_filter, ~±6° window.
|
||||
r = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# Heavy layers: only the eastern-US fixture survives.
|
||||
assert {f["id"] for f in data["commercial_flights"]} == {"f-ne"}
|
||||
assert {s["id"] for s in data["ships"]} == {"s-ne"}
|
||||
assert {c["id"] for c in data["cctv"]} == {"c-1"}
|
||||
assert {s["id"] for s in data["sigint"]} == {"sig-east"}
|
||||
|
||||
def test_bbox_does_not_filter_light_layers(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
r = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# Satellites are a reference layer — must NOT be bbox-filtered.
|
||||
assert len(data["satellites"]) == 3
|
||||
|
||||
def test_world_scale_bbox_skips_filtering(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
# lng_span = 360 → treated as world-scale; same as no bbox.
|
||||
r = client.get("/api/live-data/fast?s=-90&w=-180&n=90&e=180")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert len(data["commercial_flights"]) == 3
|
||||
assert len(data["ships"]) == 2
|
||||
|
||||
def test_partial_bbox_is_treated_as_no_bbox(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
# Only three of four bounds → filtering must NOT engage.
|
||||
r = client.get("/api/live-data/fast?s=30&w=-80&n=40")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert len(data["commercial_flights"]) == 3
|
||||
|
||||
def test_etag_changes_with_bbox(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
r_world = client.get("/api/live-data/fast")
|
||||
r_local = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||
assert r_world.status_code == 200
|
||||
assert r_local.status_code == 200
|
||||
etag_world = r_world.headers.get("etag")
|
||||
etag_local = r_local.headers.get("etag")
|
||||
assert etag_world and etag_local
|
||||
assert etag_world != etag_local, (
|
||||
"ETag must differ between world and regional bbox to prevent "
|
||||
"304 cache poisoning across viewports"
|
||||
)
|
||||
|
||||
def test_etag_stable_for_subdegree_pan(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
# Sub-degree pan should land in the same 1°-quantized bucket.
|
||||
r_a = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||
r_b = client.get("/api/live-data/fast?s=30.3&w=-79.8&n=39.7&e=-70.4")
|
||||
assert r_a.headers.get("etag") == r_b.headers.get("etag")
|
||||
|
||||
def test_if_none_match_returns_304_for_same_bbox(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
r1 = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||
etag = r1.headers.get("etag")
|
||||
r2 = client.get(
|
||||
"/api/live-data/fast?s=30&w=-80&n=40&e=-70",
|
||||
headers={"If-None-Match": etag},
|
||||
)
|
||||
assert r2.status_code == 304
|
||||
|
||||
|
||||
# ───────────────────────── /api/live-data/slow ─────────────────────────────
|
||||
|
||||
|
||||
class TestSlowBboxFiltering:
|
||||
def _seed_slow(self, monkeypatch):
|
||||
from services.fetchers import _store
|
||||
|
||||
# Heavy collections.
|
||||
gdelt = [
|
||||
{"lat": 35.0, "lng": -75.0, "id": "g-east"},
|
||||
{"lat": 35.0, "lng": 100.0, "id": "g-asia"},
|
||||
]
|
||||
firms_fires = [
|
||||
{"lat": 35.0, "lng": -75.0, "id": "fire-east"},
|
||||
{"lat": -10.0, "lng": 120.0, "id": "fire-ido"},
|
||||
]
|
||||
# Light/reference layers — must always ship in full.
|
||||
datacenters = [
|
||||
{"lat": 35.0, "lng": -75.0, "id": "dc-east"},
|
||||
{"lat": 35.0, "lng": 100.0, "id": "dc-asia"},
|
||||
{"lat": -10.0, "lng": 120.0, "id": "dc-ido"},
|
||||
]
|
||||
military_bases = [
|
||||
{"lat": 35.0, "lng": -75.0, "id": "mb-east"},
|
||||
{"lat": -10.0, "lng": 120.0, "id": "mb-ido"},
|
||||
]
|
||||
power_plants = [
|
||||
{"lat": 35.0, "lng": -75.0, "id": "pp-east"},
|
||||
{"lat": 35.0, "lng": 100.0, "id": "pp-asia"},
|
||||
]
|
||||
|
||||
monkeypatch.setitem(_store.latest_data, "gdelt", gdelt)
|
||||
monkeypatch.setitem(_store.latest_data, "firms_fires", firms_fires)
|
||||
monkeypatch.setitem(_store.latest_data, "datacenters", datacenters)
|
||||
monkeypatch.setitem(_store.latest_data, "military_bases", military_bases)
|
||||
monkeypatch.setitem(_store.latest_data, "power_plants", power_plants)
|
||||
for layer in (
|
||||
"global_incidents", "firms", "datacenters", "military_bases", "power_plants",
|
||||
):
|
||||
monkeypatch.setitem(_store.active_layers, layer, True)
|
||||
|
||||
def test_no_bbox_returns_world_data(self, client, monkeypatch):
|
||||
self._seed_slow(monkeypatch)
|
||||
r = client.get("/api/live-data/slow")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert len(data["gdelt"]) == 2
|
||||
assert len(data["firms_fires"]) == 2
|
||||
assert len(data["datacenters"]) == 3
|
||||
|
||||
def test_bbox_filters_heavy_layers(self, client, monkeypatch):
|
||||
self._seed_slow(monkeypatch)
|
||||
r = client.get("/api/live-data/slow?s=30&w=-80&n=40&e=-70")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert {g["id"] for g in data["gdelt"]} == {"g-east"}
|
||||
assert {f["id"] for f in data["firms_fires"]} == {"fire-east"}
|
||||
|
||||
def test_bbox_leaves_reference_layers_untouched(self, client, monkeypatch):
|
||||
"""Datacenters, bases, and power plants are infrastructure overlays —
|
||||
they must remain world-scale so panning never hides them."""
|
||||
self._seed_slow(monkeypatch)
|
||||
r = client.get("/api/live-data/slow?s=30&w=-80&n=40&e=-70")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert len(data["datacenters"]) == 3
|
||||
assert len(data["military_bases"]) == 2
|
||||
assert len(data["power_plants"]) == 2
|
||||
|
||||
def test_antimeridian_bbox(self, client, monkeypatch):
|
||||
from services.fetchers import _store
|
||||
# Box that straddles the antimeridian (Pacific): w=170, e=-170.
|
||||
gdelt = [
|
||||
{"lat": 0.0, "lng": 175.0, "id": "in-west"},
|
||||
{"lat": 0.0, "lng": -175.0, "id": "in-east"},
|
||||
{"lat": 0.0, "lng": 0.0, "id": "out-mid"},
|
||||
]
|
||||
monkeypatch.setitem(_store.latest_data, "gdelt", gdelt)
|
||||
monkeypatch.setitem(_store.active_layers, "global_incidents", True)
|
||||
r = client.get("/api/live-data/slow?s=-10&w=170&n=10&e=-170")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
ids = {g["id"] for g in data["gdelt"]}
|
||||
assert "in-west" in ids
|
||||
assert "in-east" in ids
|
||||
assert "out-mid" not in ids
|
||||
|
||||
|
||||
# ─────────────────── Direct helper coverage (defensive) ─────────────────────
|
||||
|
||||
|
||||
class TestHelpers:
|
||||
def test_has_full_bbox(self):
|
||||
from routers.data import _has_full_bbox
|
||||
assert _has_full_bbox(1, 2, 3, 4)
|
||||
assert not _has_full_bbox(None, 2, 3, 4)
|
||||
assert not _has_full_bbox(1, None, 3, 4)
|
||||
assert not _has_full_bbox(1, 2, None, 4)
|
||||
assert not _has_full_bbox(1, 2, 3, None)
|
||||
|
||||
def test_bbox_etag_suffix_quantizes(self):
|
||||
from routers.data import _bbox_etag_suffix
|
||||
a = _bbox_etag_suffix(30.1, -79.6, 39.9, -70.1)
|
||||
b = _bbox_etag_suffix(30.4, -79.2, 39.4, -70.8)
|
||||
assert a == b, "Sub-degree pan must collapse to the same ETag suffix"
|
||||
assert a.startswith("|bbox=")
|
||||
|
||||
def test_bbox_etag_suffix_world_collapses(self):
|
||||
from routers.data import _bbox_etag_suffix
|
||||
# World-scale → empty suffix (shares the global ETag).
|
||||
assert _bbox_etag_suffix(-90, -180, 90, 180) == ""
|
||||
|
||||
def test_bbox_etag_suffix_partial_is_empty(self):
|
||||
from routers.data import _bbox_etag_suffix
|
||||
assert _bbox_etag_suffix(None, -180, 90, 180) == ""
|
||||
|
||||
def test_apply_bbox_preserves_non_list_values(self):
|
||||
from routers.data import _apply_bbox_to_payload, _FAST_BBOX_HEAVY_KEYS
|
||||
payload = {
|
||||
"commercial_flights": [{"lat": 35, "lng": -75, "id": "x"}],
|
||||
"satellite_source": "tle", # not a list, must pass through
|
||||
"sigint_totals": {"total": 1}, # dict — must pass through
|
||||
}
|
||||
out = _apply_bbox_to_payload(dict(payload), _FAST_BBOX_HEAVY_KEYS, 30, -80, 40, -70)
|
||||
assert out["satellite_source"] == "tle"
|
||||
assert out["sigint_totals"] == {"total": 1}
|
||||
@@ -57,18 +57,6 @@ services:
|
||||
# name). If you rename the frontend service or run with a different
|
||||
# container_name, list the hostnames here (comma-separated, no spaces).
|
||||
- SHADOWBROKER_TRUSTED_FRONTEND_HOSTS=${SHADOWBROKER_TRUSTED_FRONTEND_HOSTS:-frontend,shadowbroker-frontend}
|
||||
# Third-party fetcher opt-ins. Default OFF — these phone home to
|
||||
# politically/commercially sensitive upstreams (Polymarket, Kalshi,
|
||||
# Yahoo Finance, EU disinfo trackers, NUFORC dataset host, etc.).
|
||||
# Set to "true" in your .env only if you want the node's IP to
|
||||
# contact each of these services. The dashboard panel for each
|
||||
# feature reads as "no data" until the corresponding flag is on.
|
||||
- PREDICTION_MARKETS_ENABLED=${PREDICTION_MARKETS_ENABLED:-false}
|
||||
- FINANCIAL_ENABLED=${FINANCIAL_ENABLED:-false}
|
||||
- CROWDTHREAT_ENABLED=${CROWDTHREAT_ENABLED:-false}
|
||||
- FIMI_ENABLED=${FIMI_ENABLED:-false}
|
||||
- NUFORC_ENABLED=${NUFORC_ENABLED:-false}
|
||||
- NEWS_ENABLED=${NEWS_ENABLED:-true}
|
||||
volumes:
|
||||
- backend_data:/app/data
|
||||
restart: unless-stopped
|
||||
|
||||
@@ -859,7 +859,7 @@ export default function TopRightControls({
|
||||
}>
|
||||
{activatingPhase === 'done'
|
||||
? (syncOutcomeRaw === 'solo'
|
||||
? `${t('node.soloNodeReady')} — ${nodeStatus?.total_events ?? 0} ${t('node.events')}`
|
||||
? `${t('node.soloReady')} — ${nodeStatus?.total_events ?? 0} ${t('node.events')}`
|
||||
: `${t('node.synced')} — ${nodeStatus?.total_events ?? 0} ${t('node.events')}`)
|
||||
: activatingPhase === 'sync'
|
||||
? `${t('node.syncingChain')}${(nodeStatus?.total_events ?? 0) > 0 ? ` ${nodeStatus?.total_events} ${t('node.events')}` : ''}`
|
||||
@@ -1013,8 +1013,8 @@ export default function TopRightControls({
|
||||
: t('terminal.terminalDetail')}
|
||||
<div className="mt-2 text-[12px] text-cyan-200/70 normal-case tracking-normal">
|
||||
{terminalPrivateReady
|
||||
? t('terminal.identityReady')
|
||||
: t('terminal.identityNotReady')}
|
||||
? t('terminal.enterTerminalDetail')
|
||||
: t('terminal.terminalDetailMore')}
|
||||
</div>
|
||||
</div>
|
||||
{terminalLaunchError && (
|
||||
@@ -1025,15 +1025,15 @@ export default function TopRightControls({
|
||||
<div className="border border-cyan-500/20 bg-black/30 px-4 py-4 text-[12px] font-mono text-slate-200 leading-[1.85]">
|
||||
<div className="text-cyan-300 tracking-[0.18em]">{t('terminal.beforeYouEnter')}</div>
|
||||
<ul className="mt-3 space-y-2 list-disc pl-5">
|
||||
<li>{t('terminal.termTerminal1')}</li>
|
||||
<li>{t('terminal.termTerminal2')}</li>
|
||||
<li>{t('terminal.termTerminal3')}</li>
|
||||
<li>{t('terminal.term1')}</li>
|
||||
<li>{t('terminal.term2')}</li>
|
||||
<li>{t('terminal.term3')}</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div className="border border-amber-500/20 bg-amber-950/10 px-4 py-3 text-[12px] font-mono text-amber-200/80 leading-[1.85]">
|
||||
<div className="text-amber-300 tracking-[0.18em]">{t('terminal.wormholeCleanup')}</div>
|
||||
<div className="mt-2">
|
||||
{t('terminal.cleanupDetail')}
|
||||
{t('terminal.wormholeCleanupDetail')}
|
||||
</div>
|
||||
</div>
|
||||
<div className="grid grid-cols-1 gap-3 sm:grid-cols-3">
|
||||
|
||||
@@ -8,7 +8,6 @@ import {
|
||||
normalizeViewBounds,
|
||||
type ViewBounds,
|
||||
} from '@/lib/viewportPrivacy';
|
||||
import { setLiveDataBounds } from '@/lib/liveDataViewport';
|
||||
|
||||
const VIEWPORT_POST_DEBOUNCE_MS = 2500;
|
||||
const VIEWPORT_POST_MIN_INTERVAL_MS = 12000;
|
||||
@@ -71,17 +70,6 @@ export function useViewportBounds(
|
||||
window.dispatchEvent(new CustomEvent(VIEWPORT_COMMITTED_EVENT));
|
||||
}
|
||||
|
||||
// Issue #288: hand the same coarsened/expanded bounds to the live-data
|
||||
// poller so heavy collections in /api/live-data/{fast,slow} can be
|
||||
// scoped to the visible region. Static reference layers are unaffected
|
||||
// — see backend _FAST_BBOX_HEAVY_KEYS / _SLOW_BBOX_HEAVY_KEYS.
|
||||
setLiveDataBounds({
|
||||
south: preloadBounds.south,
|
||||
west: preloadBounds.west,
|
||||
north: preloadBounds.north,
|
||||
east: preloadBounds.east,
|
||||
});
|
||||
|
||||
// Debounce POSTing viewport bounds to backend for dynamic AIS stream filtering
|
||||
if (debounceTimerRef.current) clearTimeout(debounceTimerRef.current);
|
||||
debounceTimerRef.current = setTimeout(() => {
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import { useEffect, useRef } from "react";
|
||||
import { API_BASE } from "@/lib/api";
|
||||
import { mergeData, setBackendStatus as setStoreBackendStatus } from "./useDataStore";
|
||||
import { appendLiveDataBoundsParams } from "@/lib/liveDataViewport";
|
||||
|
||||
export type BackendStatus = 'connecting' | 'connected' | 'disconnected';
|
||||
|
||||
@@ -33,8 +32,8 @@ export async function forceRefreshLiveData(): Promise<void> {
|
||||
|
||||
try {
|
||||
const [fastRes, slowRes] = await Promise.all([
|
||||
fetch(appendLiveDataBoundsParams(`${API_BASE}/api/live-data/fast`)),
|
||||
fetch(appendLiveDataBoundsParams(`${API_BASE}/api/live-data/slow`)),
|
||||
fetch(`${API_BASE}/api/live-data/fast`),
|
||||
fetch(`${API_BASE}/api/live-data/slow`),
|
||||
]);
|
||||
|
||||
if (fastRes.ok) {
|
||||
@@ -86,13 +85,9 @@ export const LAYER_TOGGLE_EVENT = 'sb:layer-toggle';
|
||||
/**
|
||||
* Polls the backend for fast and slow data tiers.
|
||||
*
|
||||
* Issue #288: heavy, density-driven layers (vessels, aircraft, gdelt
|
||||
* events, fires, sigint, …) are bbox-scoped to the visible map area via
|
||||
* `appendLiveDataBoundsParams`. Static reference layers (datacenters,
|
||||
* military bases, power plants, satellites, weather, news, …) are NOT
|
||||
* filtered backend-side, so panning never reveals an "empty world" of
|
||||
* infrastructure. World-zoomed views skip bbox params entirely and hit
|
||||
* the shared ETag cache exactly like the pre-#288 behaviour.
|
||||
* All data is fetched globally (no bbox filtering) — the backend returns its
|
||||
* full in-memory cache and MapLibre culls off-screen entities on the GPU.
|
||||
* This eliminates the "empty map when zooming out" lag.
|
||||
*
|
||||
* The AIS stream viewport POST (/api/viewport) is still handled separately
|
||||
* by useViewportBounds to limit upstream AIS ingestion.
|
||||
@@ -152,9 +147,7 @@ export function useDataPolling() {
|
||||
const useStartupPayload = !fetchedStartupFastPayload && !fastEtag.current;
|
||||
const headers: Record<string, string> = {};
|
||||
if (!useStartupPayload && fastEtag.current) headers['If-None-Match'] = fastEtag.current;
|
||||
const url = appendLiveDataBoundsParams(
|
||||
`${API_BASE}/api/live-data/fast${useStartupPayload ? '?initial=1' : ''}`,
|
||||
);
|
||||
const url = `${API_BASE}/api/live-data/fast${useStartupPayload ? '?initial=1' : ''}`;
|
||||
const res = await fetch(url, {
|
||||
headers,
|
||||
signal: controller.signal,
|
||||
@@ -200,13 +193,10 @@ export function useDataPolling() {
|
||||
try {
|
||||
const headers: Record<string, string> = {};
|
||||
if (slowEtag.current) headers['If-None-Match'] = slowEtag.current;
|
||||
const res = await fetch(
|
||||
appendLiveDataBoundsParams(`${API_BASE}/api/live-data/slow`),
|
||||
{
|
||||
headers,
|
||||
signal: controller.signal,
|
||||
},
|
||||
);
|
||||
const res = await fetch(`${API_BASE}/api/live-data/slow`, {
|
||||
headers,
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (res.status === 304) { scheduleNext('slow'); return; }
|
||||
if (res.ok) {
|
||||
slowEtag.current = res.headers.get('etag') || null;
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
/**
|
||||
* Shared module-level state for the current map viewport bounds, used by
|
||||
* `useDataPolling` to scope `/api/live-data/{fast,slow}` to the visible
|
||||
* area when the user has zoomed in.
|
||||
*
|
||||
* Issue #288: the backend now bbox-filters dense layers (vessels, aircraft,
|
||||
* gdelt events, fires, sigint, …) when all four bounds are supplied. Light
|
||||
* reference layers stay world-scale. Heavy collections aren't sent over the
|
||||
* wire for parts of the planet the operator isn't looking at, which cuts
|
||||
* the steady-state poll from ~27 MB to ~5 MB for a typical regional view.
|
||||
*
|
||||
* No bounds set → callers omit the params entirely → backend ships full
|
||||
* world data (byte-identical to pre-#288 behaviour). This keeps the cold
|
||||
* boot path (where no map is mounted yet) and the world-zoomed view
|
||||
* unchanged.
|
||||
*/
|
||||
|
||||
export interface LiveDataBounds {
|
||||
south: number;
|
||||
west: number;
|
||||
north: number;
|
||||
east: number;
|
||||
}
|
||||
|
||||
let _current: LiveDataBounds | null = null;
|
||||
|
||||
/** True when lng_span ≥ 300 OR lat_span ≥ 120. Backend treats these as
|
||||
* world-scale and skips filtering — so the frontend doesn't bother sending
|
||||
* bounds at all, which keeps the ETag cache shared across operators in the
|
||||
* zoomed-out case. */
|
||||
function isEffectivelyWorld(bounds: LiveDataBounds): boolean {
|
||||
const latSpan = Math.max(0, bounds.north - bounds.south);
|
||||
let lngSpan = bounds.east - bounds.west;
|
||||
if (lngSpan < 0) lngSpan += 360;
|
||||
return lngSpan >= 300 || latSpan >= 120;
|
||||
}
|
||||
|
||||
/** Push the latest committed bounds. Called from `useViewportBounds`
|
||||
* whenever the map's bounds change enough to matter. Pass `null` to
|
||||
* fall back to world-scale fetching (e.g. on unmount). */
|
||||
export function setLiveDataBounds(bounds: LiveDataBounds | null): void {
|
||||
if (bounds === null) {
|
||||
_current = null;
|
||||
return;
|
||||
}
|
||||
if (
|
||||
!Number.isFinite(bounds.south) ||
|
||||
!Number.isFinite(bounds.west) ||
|
||||
!Number.isFinite(bounds.north) ||
|
||||
!Number.isFinite(bounds.east)
|
||||
) {
|
||||
_current = null;
|
||||
return;
|
||||
}
|
||||
if (isEffectivelyWorld(bounds)) {
|
||||
// World-zoomed → fetch globally, share the ETag cache across operators.
|
||||
_current = null;
|
||||
return;
|
||||
}
|
||||
_current = bounds;
|
||||
}
|
||||
|
||||
/** Read the current bounds, or `null` if the caller should fetch the full
|
||||
* world payload. Reader contract: must tolerate `null` and call without
|
||||
* bbox params in that case. */
|
||||
export function getLiveDataBounds(): LiveDataBounds | null {
|
||||
return _current;
|
||||
}
|
||||
|
||||
/** Append `s/w/n/e` query params to a URL when bounds are set, otherwise
|
||||
* return the URL unchanged. Centralised so all live-data callers stay in
|
||||
* sync about quantization and the world-scale skip rule. */
|
||||
export function appendLiveDataBoundsParams(url: string): string {
|
||||
const b = _current;
|
||||
if (!b) return url;
|
||||
const sep = url.includes('?') ? '&' : '?';
|
||||
// Match backend ETag quantization (1° floor/ceil) so the client and
|
||||
// server agree on which bounds round to the same cache key.
|
||||
const s = Math.floor(b.south);
|
||||
const w = Math.floor(b.west);
|
||||
const n = Math.ceil(b.north);
|
||||
const e = Math.ceil(b.east);
|
||||
return `${url}${sep}s=${s}&w=${w}&n=${n}&e=${e}`;
|
||||
}
|
||||
Reference in New Issue
Block a user