mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-03 12:58:11 +02:00
Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| de37dfa09a | |||
| 5e6bb8511a | |||
| 0fee36e8f7 | |||
| e125467721 |
@@ -105,6 +105,10 @@ backend/data/*
|
||||
# the self-updater as a second-line integrity check when the release's
|
||||
# SHA256SUMS.txt asset can't be fetched.
|
||||
!backend/data/release_digests.json
|
||||
# Issue #244/#245/#246: one-shot carrier-position seed shipped with each
|
||||
# release. Used ONLY on first-ever startup to bootstrap carrier_cache.json;
|
||||
# after that the cache reflects this install's own GDELT observations.
|
||||
!backend/data/carrier_seed.json
|
||||
|
||||
# OS generated files
|
||||
.DS_Store
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
{
|
||||
"_meta": {
|
||||
"as_of": "2026-03-09",
|
||||
"source": "USNI News Fleet & Marine Tracker",
|
||||
"source_url": "https://news.usni.org/2026/03/09/usni-news-fleet-and-marine-tracker-march-9-2026",
|
||||
"note": "One-shot bootstrap for first-run carrier positions. Once carrier_cache.json exists in the runtime data volume, this seed file is never read again. All subsequent updates come from GDELT (and any future sources) and are written to carrier_cache.json. A year from now, your runtime cache reflects whatever your install has observed since first launch — not these snapshot positions."
|
||||
},
|
||||
"carriers": {
|
||||
"CVN-68": {
|
||||
"lat": 47.5535,
|
||||
"lng": -122.6400,
|
||||
"heading": 90,
|
||||
"desc": "Bremerton, WA (Maintenance)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-76": {
|
||||
"lat": 47.5580,
|
||||
"lng": -122.6360,
|
||||
"heading": 90,
|
||||
"desc": "Bremerton, WA (Decommissioning)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-69": {
|
||||
"lat": 36.9465,
|
||||
"lng": -76.3265,
|
||||
"heading": 0,
|
||||
"desc": "Norfolk, VA (Post-deployment maintenance)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-78": {
|
||||
"lat": 18.0,
|
||||
"lng": 39.5,
|
||||
"heading": 0,
|
||||
"desc": "Red Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-74": {
|
||||
"lat": 36.98,
|
||||
"lng": -76.43,
|
||||
"heading": 0,
|
||||
"desc": "Newport News, VA (RCOH refueling overhaul)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-75": {
|
||||
"lat": 36.0,
|
||||
"lng": 15.0,
|
||||
"heading": 0,
|
||||
"desc": "Mediterranean Sea deployment (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-77": {
|
||||
"lat": 36.5,
|
||||
"lng": -74.0,
|
||||
"heading": 0,
|
||||
"desc": "Atlantic — Pre-deployment workups (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-70": {
|
||||
"lat": 32.6840,
|
||||
"lng": -117.1290,
|
||||
"heading": 180,
|
||||
"desc": "San Diego, CA (Homeport)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-71": {
|
||||
"lat": 32.6885,
|
||||
"lng": -117.1280,
|
||||
"heading": 180,
|
||||
"desc": "San Diego, CA (Maintenance)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-72": {
|
||||
"lat": 20.0,
|
||||
"lng": 64.0,
|
||||
"heading": 0,
|
||||
"desc": "Arabian Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
},
|
||||
"CVN-73": {
|
||||
"lat": 35.2830,
|
||||
"lng": 139.6700,
|
||||
"heading": 180,
|
||||
"desc": "Yokosuka, Japan (Forward deployed)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed"
|
||||
}
|
||||
}
|
||||
}
|
||||
+31
-3
@@ -1080,8 +1080,18 @@ def _public_mesh_log_size(entries: list[dict[str, Any]]) -> int:
|
||||
return sum(1 for item in entries if _public_mesh_log_entry(item) is not None)
|
||||
|
||||
|
||||
_WORMHOLE_PUBLIC_SETTINGS_FIELDS = {"enabled", "transport", "anonymous_mode"}
|
||||
_WORMHOLE_PUBLIC_PROFILE_FIELDS = {"profile", "wormhole_enabled"}
|
||||
# Issue #243 (tg12): the public redaction now exposes only the bare
|
||||
# "is Wormhole on?" boolean. Transport choice (tor/i2p/mixnet/direct),
|
||||
# anonymous-mode state, and the named privacy profile are all
|
||||
# operational posture and were leaking actionable recon to any
|
||||
# unauthenticated caller. They are now gated behind authenticated reads
|
||||
# (admin key or scoped-view token). Loopback Tauri shells and Docker
|
||||
# bridge frontend containers continue to see full status because the
|
||||
# Next.js catch-all proxy injects the configured ADMIN_KEY for
|
||||
# same-origin/non-browser callers (see PR #263), so legitimate operator
|
||||
# UX is unaffected.
|
||||
_WORMHOLE_PUBLIC_SETTINGS_FIELDS = {"enabled"}
|
||||
_WORMHOLE_PUBLIC_PROFILE_FIELDS = {"wormhole_enabled"}
|
||||
_PRIVATE_LANE_CONTROL_FIELDS = {"private_lane_tier", "private_lane_policy"}
|
||||
_PUBLIC_RNS_STATUS_FIELDS = {"enabled", "ready", "configured_peers", "active_peers"}
|
||||
_NODE_PUBLIC_EVENT_HOOK_REGISTERED = False
|
||||
@@ -8810,9 +8820,14 @@ async def api_uw_flow(request: Request):
|
||||
from services.news_feed_config import get_feeds, save_feeds, reset_feeds
|
||||
|
||||
|
||||
@app.get("/api/settings/news-feeds")
|
||||
@app.get(
|
||||
"/api/settings/news-feeds",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_news_feeds(request: Request):
|
||||
"""Issue #252 (tg12): gated on local-operator. See the canonical
|
||||
handler in backend/routers/admin.py for the full rationale."""
|
||||
return get_feeds()
|
||||
|
||||
|
||||
@@ -9015,9 +9030,22 @@ class NodeSettingsUpdate(BaseModel):
|
||||
@app.get("/api/settings/node")
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_node_settings(request: Request):
|
||||
"""Issue #243 (tg12): node mode and participant state are
|
||||
operational posture. Anonymous callers receive an empty stub —
|
||||
enough for the UI to know the endpoint exists but nothing
|
||||
fingerprintable. Authenticated callers see the full state.
|
||||
|
||||
Authenticated == local-operator (loopback / Docker bridge) OR an
|
||||
admin / scoped-view token. The Tauri shell and Docker frontend
|
||||
container both qualify via their existing transport (PR #263 +
|
||||
PR #278), so legitimate operator UX is unchanged.
|
||||
"""
|
||||
from services.node_settings import read_node_settings
|
||||
|
||||
data = await asyncio.to_thread(read_node_settings)
|
||||
authenticated = _scoped_view_authenticated(request, "node")
|
||||
if not authenticated:
|
||||
return {}
|
||||
return {
|
||||
**data,
|
||||
"node_mode": _current_node_mode(),
|
||||
|
||||
@@ -82,9 +82,18 @@ async def api_get_keys_meta(request: Request):
|
||||
return get_env_path_info()
|
||||
|
||||
|
||||
@router.get("/api/settings/news-feeds")
|
||||
@router.get(
|
||||
"/api/settings/news-feeds",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_news_feeds(request: Request):
|
||||
"""Issue #252 (tg12): the curated feed inventory is configuration
|
||||
state, not a public data feed. Gated on local-operator so the
|
||||
Tauri shell, the Docker bridge frontend, and any caller with an
|
||||
admin key all see the full list; anonymous LAN/internet callers
|
||||
can no longer enumerate operator source URLs.
|
||||
"""
|
||||
from services.news_feed_config import get_feeds
|
||||
return get_feeds()
|
||||
|
||||
@@ -118,9 +127,18 @@ async def api_reset_news_feeds(request: Request):
|
||||
@router.get("/api/settings/node")
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_node_settings(request: Request):
|
||||
"""Issue #243 (tg12): node_mode and node_enabled are operational
|
||||
posture. Anonymous callers receive an empty stub; authenticated
|
||||
callers (local-operator or admin/scoped token) see the full
|
||||
state. See the canonical handler in backend/main.py for the full
|
||||
rationale.
|
||||
"""
|
||||
import asyncio
|
||||
from auth import _scoped_view_authenticated
|
||||
from services.node_settings import read_node_settings
|
||||
data = await asyncio.to_thread(read_node_settings)
|
||||
if not _scoped_view_authenticated(request, "node"):
|
||||
return {}
|
||||
return {
|
||||
**data,
|
||||
"node_mode": _current_node_mode(),
|
||||
@@ -210,9 +228,19 @@ async def api_set_meshtastic_mqtt_settings(request: Request, body: MeshtasticMqt
|
||||
return _meshtastic_runtime_snapshot()
|
||||
|
||||
|
||||
@router.get("/api/settings/timemachine")
|
||||
@router.get(
|
||||
"/api/settings/timemachine",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("30/minute")
|
||||
async def api_get_timemachine_settings(request: Request):
|
||||
"""Issue #253 (tg12): archival-capture posture is operationally
|
||||
sensitive — it tells a remote caller whether this deployment is
|
||||
retaining replayable historical surveillance data. Gated on
|
||||
local-operator so the Tauri shell and Docker bridge frontend
|
||||
still see the toggle state, but anonymous LAN/internet callers
|
||||
can no longer fingerprint Time Machine state.
|
||||
"""
|
||||
import asyncio
|
||||
from services.node_settings import read_node_settings
|
||||
data = await asyncio.to_thread(read_node_settings)
|
||||
|
||||
@@ -160,8 +160,13 @@ router = APIRouter()
|
||||
|
||||
# --- Constants ---
|
||||
|
||||
_WORMHOLE_PUBLIC_SETTINGS_FIELDS = {"enabled", "transport", "anonymous_mode"}
|
||||
_WORMHOLE_PUBLIC_PROFILE_FIELDS = {"profile", "wormhole_enabled"}
|
||||
# Issue #243 (tg12): the public redaction now exposes only the bare
|
||||
# "is this on?" boolean. Transport choice, anonymous-mode state, and
|
||||
# the named privacy profile were all leaking actionable recon to
|
||||
# unauthenticated callers and are now gated behind authenticated reads.
|
||||
# See the matching block in backend/main.py for the full rationale.
|
||||
_WORMHOLE_PUBLIC_SETTINGS_FIELDS = {"enabled"}
|
||||
_WORMHOLE_PUBLIC_PROFILE_FIELDS = {"wormhole_enabled"}
|
||||
_PRIVATE_LANE_CONTROL_FIELDS = {"private_lane_tier", "private_lane_policy"}
|
||||
_PUBLIC_RNS_STATUS_FIELDS = {"enabled", "ready", "configured_peers", "active_peers"}
|
||||
_NODE_PUBLIC_EVENT_HOOK_REGISTERED = False
|
||||
|
||||
+371
-173
@@ -1,46 +1,90 @@
|
||||
"""
|
||||
Carrier Strike Group OSINT Tracker
|
||||
===================================
|
||||
Scrapes multiple OSINT sources to maintain current estimated positions
|
||||
for US Navy Carrier Strike Groups. Updates on startup + 00:00 & 12:00 UTC.
|
||||
Maintains estimated positions for US Navy Carrier Strike Groups with
|
||||
honest provenance and freshness signals.
|
||||
|
||||
Sources:
|
||||
1. GDELT News API — recent carrier movement headlines
|
||||
2. WikiVoyage / public port-call databases
|
||||
3. Fallback — last-known or static OSINT estimates
|
||||
Issues #244 / #245 / #246 (tg12 external audit):
|
||||
|
||||
The previous implementation baked a snapshot of USNI News Fleet &
|
||||
Marine Tracker positions (March 9, 2026) into the registry as
|
||||
``fallback_lat``/``fallback_lng`` and stamped ``updated = now()``
|
||||
every time the dossier was rendered. That presented stale editorial
|
||||
data as live state. It also persisted GDELT-derived positions to the
|
||||
on-disk cache with no freshness signal, so a single news mention from
|
||||
months ago could keep overriding the (already-stale) registry default
|
||||
indefinitely.
|
||||
|
||||
Architecture after this PR:
|
||||
|
||||
::
|
||||
|
||||
backend/data/carrier_seed.json read-only, shipped with image,
|
||||
used ONCE on first-ever startup
|
||||
to bootstrap carrier_cache.json.
|
||||
|
||||
backend/data/carrier_cache.json mutable, lives in the runtime data
|
||||
volume, written by every GDELT
|
||||
refresh + any future source.
|
||||
|
||||
Startup flow:
|
||||
|
||||
1. ``carrier_cache.json`` exists? → load it.
|
||||
2. Otherwise, copy ``carrier_seed.json`` → ``carrier_cache.json``,
|
||||
then load it. (This happens once, ever, per install.)
|
||||
3. Background: GDELT fetch runs. Any carrier mentioned in fresh news
|
||||
gets its entry replaced with the news-derived position.
|
||||
``position_source_at`` is set to the news article timestamp.
|
||||
|
||||
Freshness is a *labelling* decision, not an eviction decision:
|
||||
|
||||
- ``position_source_at`` within the configurable freshness window
|
||||
(default 14 days) → ``position_confidence = "recent"``.
|
||||
- Older than that → ``position_confidence = "stale"``.
|
||||
- Bootstrapped from the seed file (never updated) → ``"seed"``.
|
||||
- No cache entry at all (e.g. a carrier added to the registry after
|
||||
first install) → carrier renders at its homeport with
|
||||
``"homeport_default"``.
|
||||
|
||||
Carriers are never hidden, never teleported, never disappeared. The
|
||||
position the user sees is always the last position the system actually
|
||||
observed, with an honest "as-of" timestamp the UI can render however
|
||||
it likes. A year from now, the runtime cache reflects whatever this
|
||||
install has observed via GDELT — not the seed snapshot.
|
||||
"""
|
||||
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
import threading
|
||||
import random
|
||||
from datetime import datetime, timezone
|
||||
import shutil
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Carrier registry: hull number → metadata + fallback position
|
||||
# Carrier registry: hull number → identity only.
|
||||
#
|
||||
# Issue #244 (tg12): the previous registry carried hard-coded
|
||||
# ``fallback_lat``/``fallback_lng`` that were dated editorial
|
||||
# snapshots from a 2026-03-09 article. Those fields are DELETED. The
|
||||
# registry is now identity + homeport only; positions are sourced
|
||||
# exclusively from carrier_cache.json (and via that, from the
|
||||
# bootstrap seed or live OSINT).
|
||||
# -----------------------------------------------------------------
|
||||
CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
# Fallback positions sourced from USNI News Fleet & Marine Tracker (Mar 9, 2026)
|
||||
# https://news.usni.org/2026/03/09/usni-news-fleet-and-marine-tracker-march-9-2026
|
||||
# --- Bremerton, WA (Naval Base Kitsap) ---
|
||||
# Distinct pier positions along Sinclair Inlet so carriers don't stack
|
||||
"CVN-68": {
|
||||
"name": "USS Nimitz (CVN-68)",
|
||||
"wiki": "https://en.wikipedia.org/wiki/USS_Nimitz",
|
||||
"homeport": "Bremerton, WA",
|
||||
"homeport_lat": 47.5535,
|
||||
"homeport_lng": -122.6400,
|
||||
"fallback_lat": 47.5535,
|
||||
"fallback_lng": -122.6400,
|
||||
"fallback_heading": 90,
|
||||
"fallback_desc": "Bremerton, WA (Maintenance)",
|
||||
},
|
||||
"CVN-76": {
|
||||
"name": "USS Ronald Reagan (CVN-76)",
|
||||
@@ -48,23 +92,14 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Bremerton, WA",
|
||||
"homeport_lat": 47.5580,
|
||||
"homeport_lng": -122.6360,
|
||||
"fallback_lat": 47.5580,
|
||||
"fallback_lng": -122.6360,
|
||||
"fallback_heading": 90,
|
||||
"fallback_desc": "Bremerton, WA (Decommissioning)",
|
||||
},
|
||||
# --- Norfolk, VA (Naval Station Norfolk) ---
|
||||
# Piers run N-S along Willoughby Bay; each carrier gets a distinct berth
|
||||
"CVN-69": {
|
||||
"name": "USS Dwight D. Eisenhower (CVN-69)",
|
||||
"wiki": "https://en.wikipedia.org/wiki/USS_Dwight_D._Eisenhower",
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9465,
|
||||
"homeport_lng": -76.3265,
|
||||
"fallback_lat": 36.9465,
|
||||
"fallback_lng": -76.3265,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Norfolk, VA (Post-deployment maintenance)",
|
||||
},
|
||||
"CVN-78": {
|
||||
"name": "USS Gerald R. Ford (CVN-78)",
|
||||
@@ -72,10 +107,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9505,
|
||||
"homeport_lng": -76.3250,
|
||||
"fallback_lat": 18.0,
|
||||
"fallback_lng": 39.5,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Red Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
},
|
||||
"CVN-74": {
|
||||
"name": "USS John C. Stennis (CVN-74)",
|
||||
@@ -83,10 +114,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9540,
|
||||
"homeport_lng": -76.3235,
|
||||
"fallback_lat": 36.98,
|
||||
"fallback_lng": -76.43,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Newport News, VA (RCOH refueling overhaul)",
|
||||
},
|
||||
"CVN-75": {
|
||||
"name": "USS Harry S. Truman (CVN-75)",
|
||||
@@ -94,10 +121,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9580,
|
||||
"homeport_lng": -76.3220,
|
||||
"fallback_lat": 36.0,
|
||||
"fallback_lng": 15.0,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Mediterranean Sea deployment (USNI Mar 9)",
|
||||
},
|
||||
"CVN-77": {
|
||||
"name": "USS George H.W. Bush (CVN-77)",
|
||||
@@ -105,23 +128,14 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Norfolk, VA",
|
||||
"homeport_lat": 36.9620,
|
||||
"homeport_lng": -76.3210,
|
||||
"fallback_lat": 36.5,
|
||||
"fallback_lng": -74.0,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Atlantic — Pre-deployment workups (USNI Mar 9)",
|
||||
},
|
||||
# --- San Diego, CA (Naval Base San Diego) ---
|
||||
# Carrier piers along the east shore of San Diego Bay, spread N-S
|
||||
"CVN-70": {
|
||||
"name": "USS Carl Vinson (CVN-70)",
|
||||
"wiki": "https://en.wikipedia.org/wiki/USS_Carl_Vinson",
|
||||
"homeport": "San Diego, CA",
|
||||
"homeport_lat": 32.6840,
|
||||
"homeport_lng": -117.1290,
|
||||
"fallback_lat": 32.6840,
|
||||
"fallback_lng": -117.1290,
|
||||
"fallback_heading": 180,
|
||||
"fallback_desc": "San Diego, CA (Homeport)",
|
||||
},
|
||||
"CVN-71": {
|
||||
"name": "USS Theodore Roosevelt (CVN-71)",
|
||||
@@ -129,10 +143,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "San Diego, CA",
|
||||
"homeport_lat": 32.6885,
|
||||
"homeport_lng": -117.1280,
|
||||
"fallback_lat": 32.6885,
|
||||
"fallback_lng": -117.1280,
|
||||
"fallback_heading": 180,
|
||||
"fallback_desc": "San Diego, CA (Maintenance)",
|
||||
},
|
||||
"CVN-72": {
|
||||
"name": "USS Abraham Lincoln (CVN-72)",
|
||||
@@ -140,10 +150,6 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "San Diego, CA",
|
||||
"homeport_lat": 32.6925,
|
||||
"homeport_lng": -117.1275,
|
||||
"fallback_lat": 20.0,
|
||||
"fallback_lng": 64.0,
|
||||
"fallback_heading": 0,
|
||||
"fallback_desc": "Arabian Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
},
|
||||
# --- Yokosuka, Japan (CFAY) ---
|
||||
"CVN-73": {
|
||||
@@ -152,16 +158,18 @@ CARRIER_REGISTRY: Dict[str, dict] = {
|
||||
"homeport": "Yokosuka, Japan",
|
||||
"homeport_lat": 35.2830,
|
||||
"homeport_lng": 139.6700,
|
||||
"fallback_lat": 35.2830,
|
||||
"fallback_lng": 139.6700,
|
||||
"fallback_heading": 180,
|
||||
"fallback_desc": "Yokosuka, Japan (Forward deployed)",
|
||||
},
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Region → approximate center coordinates
|
||||
# Used to map textual geographic descriptions to lat/lng
|
||||
# Region → approximate center coordinates.
|
||||
#
|
||||
# Issue #245 (tg12): converting a region name straight into precise
|
||||
# map coordinates is false precision. We still use this table to
|
||||
# infer a coarse position from a headline mention, but the resulting
|
||||
# carrier object is now stamped ``position_confidence = "approximate"``
|
||||
# so the UI can render an uncertainty radius / dimmed icon. The
|
||||
# centroid is a best-effort midpoint of the named body of water.
|
||||
# -----------------------------------------------------------------
|
||||
REGION_COORDS: Dict[str, tuple] = {
|
||||
# Oceans & Seas
|
||||
@@ -220,9 +228,39 @@ REGION_COORDS: Dict[str, tuple] = {
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Cache file for persisting positions between restarts
|
||||
# Files
|
||||
# -----------------------------------------------------------------
|
||||
CACHE_FILE = Path(__file__).parent.parent / "carrier_cache.json"
|
||||
#
|
||||
# The seed lives in the read-only image data dir (it ships with each
|
||||
# release). The cache lives in the same data dir but is written at
|
||||
# runtime; under Docker compose this dir is volume-mounted so the
|
||||
# cache persists across container restarts, which is the whole point
|
||||
# of the seed-then-observe model — the user's runtime observations
|
||||
# survive image upgrades.
|
||||
SEED_FILE = Path(__file__).parent.parent / "data" / "carrier_seed.json"
|
||||
CACHE_FILE = Path(__file__).parent.parent / "data" / "carrier_cache.json"
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Freshness window for position_confidence labeling. Issue #246 (tg12):
|
||||
# previously persisted cache entries had no freshness signal at all.
|
||||
# After this change, the position itself is preserved (we never lose
|
||||
# what was last observed) but the confidence label flips from
|
||||
# "recent" to "stale" once the underlying source is older than this
|
||||
# window. Operator-overridable via env var.
|
||||
# -----------------------------------------------------------------
|
||||
_DEFAULT_FRESHNESS_WINDOW_DAYS = 14
|
||||
|
||||
|
||||
def _freshness_window_days() -> int:
|
||||
raw = str(os.environ.get("SHADOWBROKER_CARRIER_FRESHNESS_DAYS", "") or "").strip()
|
||||
if not raw:
|
||||
return _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||
try:
|
||||
n = int(raw)
|
||||
return n if n > 0 else _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||
except (TypeError, ValueError):
|
||||
return _DEFAULT_FRESHNESS_WINDOW_DAYS
|
||||
|
||||
|
||||
_carrier_positions: Dict[str, dict] = {}
|
||||
_positions_lock = threading.Lock()
|
||||
@@ -234,25 +272,159 @@ _GDELT_REQUEST_DELAY_SECONDS = 1.25
|
||||
_GDELT_REQUEST_JITTER_SECONDS = 0.35
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _parse_iso(ts: str) -> Optional[datetime]:
|
||||
if not ts:
|
||||
return None
|
||||
try:
|
||||
# Python's fromisoformat accepts +00:00 but not 'Z' until 3.11.
|
||||
normalized = ts.replace("Z", "+00:00")
|
||||
dt = datetime.fromisoformat(normalized)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _compute_position_confidence(entry: dict, *, now: Optional[datetime] = None) -> str:
|
||||
"""Return the public confidence label for a carrier cache entry.
|
||||
|
||||
Order of precedence:
|
||||
- explicit "homeport_default" / "seed" labels are preserved.
|
||||
- dated entries (with position_source_at) are "recent" if within
|
||||
the configured freshness window, else "stale".
|
||||
- missing position_source_at falls through to "stale".
|
||||
"""
|
||||
raw_label = str(entry.get("position_confidence", "") or "").strip()
|
||||
# Explicit "kind of provenance" labels are preserved as-is. They
|
||||
# describe HOW we got the position, not WHEN — a fresh headline-to-
|
||||
# centroid match (#245) is still imprecise no matter how recently
|
||||
# it was observed, and the seed (#244) is always the seed.
|
||||
if raw_label in {"seed", "homeport_default", "approximate"}:
|
||||
# Approximate entries can still age into "stale_approximate" if
|
||||
# they fall out of the freshness window — that distinction lets
|
||||
# the UI render a different badge for old-and-imprecise vs
|
||||
# recent-and-imprecise. seed/homeport_default never age (they
|
||||
# were never timestamped against real observations).
|
||||
if raw_label == "approximate":
|
||||
source_at = _parse_iso(str(entry.get("position_source_at", "") or ""))
|
||||
if source_at is not None:
|
||||
reference = now or datetime.now(timezone.utc)
|
||||
if reference - source_at > timedelta(days=_freshness_window_days()):
|
||||
return "stale_approximate"
|
||||
return raw_label
|
||||
|
||||
source_at = _parse_iso(str(entry.get("position_source_at", "") or ""))
|
||||
if not source_at:
|
||||
return "stale"
|
||||
|
||||
reference = now or datetime.now(timezone.utc)
|
||||
window = timedelta(days=_freshness_window_days())
|
||||
if reference - source_at <= window:
|
||||
return "recent"
|
||||
return "stale"
|
||||
|
||||
|
||||
def _load_seed() -> Dict[str, dict]:
|
||||
"""Load the read-only seed file shipped with the image.
|
||||
|
||||
Returns a hull→entry dict (no _meta wrapper). Missing or malformed
|
||||
seed files yield an empty dict — the caller falls back to homeport
|
||||
defaults.
|
||||
"""
|
||||
try:
|
||||
if not SEED_FILE.exists():
|
||||
logger.info("Carrier seed file not present at %s; first-run will fall back to homeport defaults", SEED_FILE)
|
||||
return {}
|
||||
raw = json.loads(SEED_FILE.read_text(encoding="utf-8"))
|
||||
carriers = raw.get("carriers", {}) if isinstance(raw, dict) else {}
|
||||
if not isinstance(carriers, dict):
|
||||
return {}
|
||||
logger.info("Carrier seed loaded: %d entries from %s", len(carriers), SEED_FILE)
|
||||
return carriers
|
||||
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
|
||||
logger.warning("Failed to load carrier seed file %s: %s", SEED_FILE, e)
|
||||
return {}
|
||||
|
||||
|
||||
def _load_cache() -> Dict[str, dict]:
|
||||
"""Load cached carrier positions from disk."""
|
||||
"""Load the mutable cache (last-known positions persisted between restarts)."""
|
||||
try:
|
||||
if CACHE_FILE.exists():
|
||||
data = json.loads(CACHE_FILE.read_text())
|
||||
logger.info(f"Carrier cache loaded: {len(data)} carriers from {CACHE_FILE}")
|
||||
return data
|
||||
data = json.loads(CACHE_FILE.read_text(encoding="utf-8"))
|
||||
if isinstance(data, dict):
|
||||
logger.info("Carrier cache loaded: %d carriers from %s", len(data), CACHE_FILE)
|
||||
return data
|
||||
except (IOError, OSError, json.JSONDecodeError, ValueError) as e:
|
||||
logger.warning(f"Failed to load carrier cache: {e}")
|
||||
logger.warning("Failed to load carrier cache: %s", e)
|
||||
return {}
|
||||
|
||||
|
||||
def _save_cache(positions: Dict[str, dict]):
|
||||
"""Persist carrier positions to disk."""
|
||||
def _save_cache(positions: Dict[str, dict]) -> None:
|
||||
"""Persist the mutable cache. Atomic write (temp + rename) so a crash
|
||||
mid-write can't leave the file truncated."""
|
||||
try:
|
||||
CACHE_FILE.write_text(json.dumps(positions, indent=2))
|
||||
logger.info(f"Carrier cache saved: {len(positions)} carriers")
|
||||
CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = CACHE_FILE.with_suffix(CACHE_FILE.suffix + ".tmp")
|
||||
tmp.write_text(json.dumps(positions, indent=2), encoding="utf-8")
|
||||
# On Windows os.replace is atomic and overwrites existing files.
|
||||
os.replace(tmp, CACHE_FILE)
|
||||
logger.info("Carrier cache saved: %d carriers", len(positions))
|
||||
except (IOError, OSError) as e:
|
||||
logger.warning(f"Failed to save carrier cache: {e}")
|
||||
logger.warning("Failed to save carrier cache: %s", e)
|
||||
|
||||
|
||||
def _homeport_entry_for(hull: str) -> Optional[dict]:
|
||||
"""Return a homeport-default cache entry for a hull, or None if the
|
||||
hull is not in the registry."""
|
||||
info = CARRIER_REGISTRY.get(hull)
|
||||
if not info:
|
||||
return None
|
||||
return {
|
||||
"lat": info["homeport_lat"],
|
||||
"lng": info["homeport_lng"],
|
||||
"heading": 0,
|
||||
"desc": f"{info['homeport']} (no observations yet)",
|
||||
"source": f"Homeport default ({info['homeport']})",
|
||||
"source_url": info.get("wiki", ""),
|
||||
"position_source_at": _now_iso(),
|
||||
"position_confidence": "homeport_default",
|
||||
}
|
||||
|
||||
|
||||
def _bootstrap_cache_if_missing() -> Dict[str, dict]:
|
||||
"""One-shot: if no cache exists, materialize one from the seed file.
|
||||
|
||||
Returns the cache contents (hull→entry). On first-ever startup,
|
||||
this writes ``carrier_cache.json`` so subsequent restarts skip the
|
||||
seed entirely. Operator-deleted caches re-bootstrap the same way —
|
||||
operators can use that to "reset" carrier positions, but it's an
|
||||
explicit operator action.
|
||||
"""
|
||||
if CACHE_FILE.exists():
|
||||
return _load_cache()
|
||||
|
||||
seed = _load_seed()
|
||||
if not seed:
|
||||
# No seed file either. Build a homeport-default cache so the
|
||||
# first save_cache call still produces something honest.
|
||||
homeports: Dict[str, dict] = {}
|
||||
for hull in CARRIER_REGISTRY:
|
||||
entry = _homeport_entry_for(hull)
|
||||
if entry is not None:
|
||||
homeports[hull] = entry
|
||||
if homeports:
|
||||
_save_cache(homeports)
|
||||
return homeports
|
||||
|
||||
# Persist the seed as the first cache so subsequent runs skip this branch.
|
||||
_save_cache(seed)
|
||||
logger.info("Carrier cache bootstrapped from seed (first-ever startup)")
|
||||
return dict(seed)
|
||||
|
||||
|
||||
def _match_region(text: str) -> Optional[tuple]:
|
||||
@@ -270,10 +442,8 @@ def _match_carrier(text: str) -> Optional[str]:
|
||||
for hull, info in CARRIER_REGISTRY.items():
|
||||
hull_check = hull.lower().replace("-", "")
|
||||
name_parts = info["name"].lower()
|
||||
# Match hull number (e.g., "CVN-78", "CVN78")
|
||||
if hull.lower() in text_lower or hull_check in text_lower.replace("-", ""):
|
||||
return hull
|
||||
# Match ship name (e.g., "Ford", "Eisenhower", "Vinson")
|
||||
ship_name = name_parts.split("(")[0].strip()
|
||||
last_name = ship_name.split()[-1] if ship_name else ""
|
||||
if last_name and len(last_name) > 3 and last_name in text_lower:
|
||||
@@ -323,8 +493,9 @@ def _fetch_gdelt_carrier_news() -> List[dict]:
|
||||
articles = data.get("articles", [])
|
||||
for art in articles:
|
||||
title = art.get("title", "")
|
||||
url = art.get("url", "")
|
||||
results.append({"title": title, "url": url})
|
||||
article_url = art.get("url", "")
|
||||
article_at = art.get("seendate") or art.get("date") or ""
|
||||
results.append({"title": title, "url": article_url, "seendate": article_at})
|
||||
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
|
||||
logger.debug(f"GDELT search failed for '{term}': {e}")
|
||||
continue
|
||||
@@ -340,108 +511,139 @@ def _fetch_gdelt_carrier_news() -> List[dict]:
|
||||
return results
|
||||
|
||||
|
||||
def _gdelt_seendate_to_iso(seendate: str) -> Optional[str]:
|
||||
"""GDELT returns YYYYMMDDhhmmss (UTC). Convert to ISO8601 for
|
||||
position_source_at. Returns None if the input is unparseable."""
|
||||
raw = (seendate or "").strip()
|
||||
if len(raw) < 8 or not raw.isdigit():
|
||||
return None
|
||||
try:
|
||||
dt = datetime.strptime(raw[:14] if len(raw) >= 14 else raw[:8] + "000000", "%Y%m%d%H%M%S")
|
||||
return dt.replace(tzinfo=timezone.utc).isoformat()
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _parse_carrier_positions_from_news(articles: List[dict]) -> Dict[str, dict]:
|
||||
"""Parse carrier positions from news article titles and descriptions."""
|
||||
"""Parse carrier positions from news article titles.
|
||||
|
||||
Issue #245 (tg12): the position is a region centroid, which is
|
||||
coarse — we now stamp ``position_confidence = "approximate"`` so
|
||||
the UI can render that uncertainty. Issue #244: the
|
||||
``position_source_at`` field is the news article's actual seen
|
||||
date, NOT now(), so the freshness check correctly flips entries
|
||||
to "stale" once they age past the configured window.
|
||||
"""
|
||||
updates: Dict[str, dict] = {}
|
||||
|
||||
for article in articles:
|
||||
title = article.get("title", "")
|
||||
|
||||
# Try to match a carrier from the title
|
||||
hull = _match_carrier(title)
|
||||
if not hull:
|
||||
continue
|
||||
|
||||
# Try to match a region from the title
|
||||
coords = _match_region(title)
|
||||
if not coords:
|
||||
continue
|
||||
|
||||
# Only update if we haven't seen this carrier yet (first match wins — most recent)
|
||||
# First match wins (most recent article, GDELT returns newest first
|
||||
# per term).
|
||||
if hull not in updates:
|
||||
iso_at = _gdelt_seendate_to_iso(str(article.get("seendate", ""))) or _now_iso()
|
||||
updates[hull] = {
|
||||
"lat": coords[0],
|
||||
"lng": coords[1],
|
||||
"heading": 0,
|
||||
"desc": title[:100],
|
||||
"source": "GDELT News API",
|
||||
"source": "GDELT News API (headline region match — approximate)",
|
||||
"source_url": article.get("url", "https://api.gdeltproject.org"),
|
||||
"updated": datetime.now(timezone.utc).isoformat(),
|
||||
"position_source_at": iso_at,
|
||||
# Headline-to-centroid match is explicitly approximate.
|
||||
"position_confidence": "approximate",
|
||||
}
|
||||
logger.info(
|
||||
f"Carrier update: {CARRIER_REGISTRY[hull]['name']} → {coords} (from: {title[:80]})"
|
||||
"Carrier update: %s → %s (from: %s)",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
coords,
|
||||
title[:80],
|
||||
)
|
||||
|
||||
return updates
|
||||
|
||||
|
||||
def _load_carrier_fallbacks() -> Dict[str, dict]:
|
||||
"""Build carrier positions from static fallbacks + disk cache (instant, no network)."""
|
||||
positions: Dict[str, dict] = {}
|
||||
for hull, info in CARRIER_REGISTRY.items():
|
||||
positions[hull] = {
|
||||
"name": info["name"],
|
||||
"lat": info["fallback_lat"],
|
||||
"lng": info["fallback_lng"],
|
||||
"heading": info["fallback_heading"],
|
||||
"desc": info["fallback_desc"],
|
||||
"wiki": info["wiki"],
|
||||
"source": "USNI News Fleet & Marine Tracker",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"updated": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
# Overlay cached positions from previous runs (may have GDELT data)
|
||||
cached = _load_cache()
|
||||
for hull, cached_pos in cached.items():
|
||||
if hull in positions:
|
||||
if cached_pos.get("source", "").startswith("GDELT") or cached_pos.get(
|
||||
"source", ""
|
||||
).startswith("News"):
|
||||
positions[hull].update(
|
||||
{
|
||||
"lat": cached_pos["lat"],
|
||||
"lng": cached_pos["lng"],
|
||||
"desc": cached_pos.get("desc", positions[hull]["desc"]),
|
||||
"source": cached_pos.get("source", "Cached OSINT"),
|
||||
"updated": cached_pos.get("updated", ""),
|
||||
}
|
||||
)
|
||||
return positions
|
||||
def _enrich_for_rendering(hull: str, entry: dict, *, now: Optional[datetime] = None) -> dict:
|
||||
"""Add live computed fields (confidence label, last_osint_update)
|
||||
on top of the persisted cache entry. The persisted entry is left
|
||||
untouched; this function builds the public-facing object.
|
||||
"""
|
||||
info = CARRIER_REGISTRY.get(hull, {})
|
||||
confidence = _compute_position_confidence(entry, now=now)
|
||||
return {
|
||||
"name": entry.get("name", info.get("name", hull)),
|
||||
"lat": entry["lat"],
|
||||
"lng": entry["lng"],
|
||||
"heading": entry.get("heading", 0),
|
||||
"desc": entry.get("desc", ""),
|
||||
"wiki": entry.get("wiki", info.get("wiki", "")),
|
||||
"source": entry.get("source", "OSINT estimated position"),
|
||||
"source_url": entry.get("source_url", ""),
|
||||
"position_source_at": entry.get("position_source_at", ""),
|
||||
"position_confidence": confidence,
|
||||
# Existing field preserved for backward compatibility with the
|
||||
# current frontend ShipPopup; now reflects the SOURCE's observed
|
||||
# time (not now()), so "last reported X days ago" is honest.
|
||||
"last_osint_update": entry.get("position_source_at", ""),
|
||||
# Convenience boolean for the UI: true when the position is
|
||||
# NOT live OSINT (used to render dimmed icons / badges).
|
||||
"is_fallback": confidence in {"seed", "stale", "stale_approximate", "homeport_default"},
|
||||
}
|
||||
|
||||
|
||||
def update_carrier_positions():
|
||||
"""Main update function — called on startup and every 12h.
|
||||
def update_carrier_positions() -> None:
|
||||
"""Refresh carrier positions.
|
||||
|
||||
Phase 1 (instant): publish fallback + cached positions so the map has carriers immediately.
|
||||
Phase 2 (slow): query GDELT for fresh OSINT positions and update in-place.
|
||||
Phase 1 (instant): publish whatever's in carrier_cache.json (or
|
||||
bootstrap from seed on first-ever run), so the map has carriers
|
||||
immediately.
|
||||
|
||||
Phase 2 (slow): query GDELT and replace position entries for any
|
||||
carrier mentioned in fresh news. Persist back to cache.
|
||||
"""
|
||||
global _last_update
|
||||
|
||||
# --- Phase 1: instant fallback + cache ---
|
||||
positions = _load_carrier_fallbacks()
|
||||
# --- Phase 1: instant cache (bootstrap from seed on first-ever run) ---
|
||||
positions = _bootstrap_cache_if_missing()
|
||||
|
||||
# Ensure every registered hull has SOMETHING in the cache. A hull
|
||||
# the seed didn't cover (e.g. added after install) renders at its
|
||||
# homeport with "homeport_default" confidence.
|
||||
for hull in CARRIER_REGISTRY:
|
||||
if hull not in positions:
|
||||
entry = _homeport_entry_for(hull)
|
||||
if entry is not None:
|
||||
positions[hull] = entry
|
||||
|
||||
with _positions_lock:
|
||||
# Only overwrite if positions are currently empty (first startup).
|
||||
# If we already have data from a previous cycle, keep it while GDELT runs.
|
||||
if not _carrier_positions:
|
||||
_carrier_positions.update(positions)
|
||||
_last_update = datetime.now(timezone.utc)
|
||||
logger.info(
|
||||
f"Carrier tracker: {len(positions)} carriers loaded from fallback/cache (GDELT enrichment starting...)"
|
||||
"Carrier tracker: %d carriers loaded from cache (GDELT enrichment starting...)",
|
||||
len(positions),
|
||||
)
|
||||
|
||||
# --- Phase 2: slow GDELT enrichment ---
|
||||
# --- Phase 2: GDELT enrichment ---
|
||||
try:
|
||||
articles = _fetch_gdelt_carrier_news()
|
||||
news_positions = _parse_carrier_positions_from_news(articles)
|
||||
for hull, pos in news_positions.items():
|
||||
if hull in positions:
|
||||
positions[hull].update(pos)
|
||||
logger.info(f"Carrier OSINT: updated {CARRIER_REGISTRY[hull]['name']} from news")
|
||||
# Always overwrite — newest GDELT mention wins. The previous
|
||||
# entry's position is preserved in git history and the next
|
||||
# cycle either confirms or replaces it.
|
||||
positions[hull] = pos
|
||||
logger.info("Carrier OSINT: updated %s from news", CARRIER_REGISTRY[hull]["name"])
|
||||
except (ValueError, KeyError, json.JSONDecodeError, OSError) as e:
|
||||
logger.warning(f"GDELT carrier fetch failed: {e}")
|
||||
logger.warning("GDELT carrier fetch failed: %s", e)
|
||||
|
||||
# Save and update the global state with enriched positions
|
||||
with _positions_lock:
|
||||
_carrier_positions.clear()
|
||||
_carrier_positions.update(positions)
|
||||
@@ -449,21 +651,15 @@ def update_carrier_positions():
|
||||
|
||||
_save_cache(positions)
|
||||
|
||||
sources = {}
|
||||
for p in positions.values():
|
||||
src = p.get("source", "unknown")
|
||||
sources[src] = sources.get(src, 0) + 1
|
||||
logger.info(f"Carrier tracker: {len(positions)} carriers updated. Sources: {sources}")
|
||||
confidences: Dict[str, int] = {}
|
||||
for entry in positions.values():
|
||||
label = _compute_position_confidence(entry)
|
||||
confidences[label] = confidences.get(label, 0) + 1
|
||||
logger.info("Carrier tracker: %d carriers updated. Confidence: %s", len(positions), confidences)
|
||||
|
||||
|
||||
def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
"""Offset carriers that share identical coordinates so they don't stack.
|
||||
|
||||
At port: offset along the pier axis (~500m / 0.004° apart).
|
||||
At sea: offset perpendicular to each other (~0.08° / ~9km apart)
|
||||
so they're visibly separate but clearly operating together.
|
||||
"""
|
||||
# Group by rounded lat/lng (within ~0.01° ≈ 1km = same spot)
|
||||
"""Offset carriers that share identical coordinates so they don't stack."""
|
||||
from collections import defaultdict
|
||||
|
||||
groups: dict[str, list[int]] = defaultdict(list)
|
||||
@@ -475,7 +671,6 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
if len(indices) < 2:
|
||||
continue
|
||||
n = len(indices)
|
||||
# Determine if this is a port (near a homeport) or at sea
|
||||
sample = result[indices[0]]
|
||||
at_port = any(
|
||||
abs(sample["lat"] - info.get("homeport_lat", 0)) < 0.05
|
||||
@@ -484,7 +679,6 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
)
|
||||
|
||||
if at_port:
|
||||
# Use each carrier's distinct homeport pier coordinates
|
||||
for idx in indices:
|
||||
carrier = result[idx]
|
||||
hull = None
|
||||
@@ -497,8 +691,7 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
carrier["lat"] = info["homeport_lat"]
|
||||
carrier["lng"] = info["homeport_lng"]
|
||||
else:
|
||||
# At sea: spread in a line perpendicular to travel (~0.08° apart)
|
||||
spacing = 0.08 # ~9km — close enough to see they're together
|
||||
spacing = 0.08
|
||||
start_offset = -(n - 1) * spacing / 2
|
||||
for j, idx in enumerate(indices):
|
||||
result[idx]["lng"] += start_offset + j * spacing
|
||||
@@ -507,36 +700,44 @@ def _deconflict_positions(result: List[dict]) -> List[dict]:
|
||||
|
||||
|
||||
def get_carrier_positions() -> List[dict]:
|
||||
"""Return current carrier positions for the data pipeline."""
|
||||
"""Return current carrier positions for the data pipeline.
|
||||
|
||||
Each entry has the full provenance + freshness fields; the UI can
|
||||
decide how to render them. Carriers are never hidden — only
|
||||
labeled.
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
with _positions_lock:
|
||||
result = []
|
||||
for hull, pos in _carrier_positions.items():
|
||||
info = CARRIER_REGISTRY.get(hull, {})
|
||||
result: List[dict] = []
|
||||
for hull, entry in _carrier_positions.items():
|
||||
enriched = _enrich_for_rendering(hull, entry, now=now)
|
||||
result.append(
|
||||
{
|
||||
"name": pos.get("name", info.get("name", hull)),
|
||||
"name": enriched["name"],
|
||||
"type": "carrier",
|
||||
"lat": pos["lat"],
|
||||
"lng": pos["lng"],
|
||||
"heading": None, # Heading unknown for carriers — OSINT cannot determine true heading
|
||||
"lat": enriched["lat"],
|
||||
"lng": enriched["lng"],
|
||||
"heading": None, # OSINT cannot determine true heading.
|
||||
"sog": 0,
|
||||
"cog": 0,
|
||||
"country": "United States",
|
||||
"desc": pos.get("desc", ""),
|
||||
"wiki": pos.get("wiki", info.get("wiki", "")),
|
||||
"desc": enriched["desc"],
|
||||
"wiki": enriched["wiki"],
|
||||
"estimated": True,
|
||||
"source": pos.get("source", "OSINT estimated position"),
|
||||
"source_url": pos.get(
|
||||
"source_url", "https://news.usni.org/category/fleet-tracker"
|
||||
),
|
||||
"last_osint_update": pos.get("updated", ""),
|
||||
"source": enriched["source"],
|
||||
"source_url": enriched["source_url"],
|
||||
"last_osint_update": enriched["last_osint_update"],
|
||||
# New fields (additive — existing UI continues to work):
|
||||
"position_source_at": enriched["position_source_at"],
|
||||
"position_confidence": enriched["position_confidence"],
|
||||
"is_fallback": enriched["is_fallback"],
|
||||
}
|
||||
)
|
||||
return _deconflict_positions(result)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Scheduler: runs at startup, then at 00:00 and 12:00 UTC daily
|
||||
# Scheduler: runs at startup, then at 00:00 and 12:00 UTC daily.
|
||||
# -----------------------------------------------------------------
|
||||
_scheduler_thread: Optional[threading.Thread] = None
|
||||
_scheduler_stop = threading.Event()
|
||||
@@ -544,7 +745,6 @@ _scheduler_stop = threading.Event()
|
||||
|
||||
def _scheduler_loop():
|
||||
"""Background thread that triggers updates at 00:00 and 12:00 UTC."""
|
||||
# Initial update on startup
|
||||
try:
|
||||
update_carrier_positions()
|
||||
except Exception as e:
|
||||
@@ -552,7 +752,6 @@ def _scheduler_loop():
|
||||
|
||||
while not _scheduler_stop.is_set():
|
||||
now = datetime.now(timezone.utc)
|
||||
# Next target: 00:00 or 12:00 UTC, whichever is sooner
|
||||
hour = now.hour
|
||||
if hour < 12:
|
||||
next_hour = 12
|
||||
@@ -561,18 +760,17 @@ def _scheduler_loop():
|
||||
|
||||
next_run = now.replace(hour=next_hour % 24, minute=0, second=0, microsecond=0)
|
||||
if next_hour == 24:
|
||||
from datetime import timedelta
|
||||
|
||||
next_run = (now + timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
|
||||
wait_seconds = (next_run - now).total_seconds()
|
||||
logger.info(
|
||||
f"Carrier tracker: next update at {next_run.isoformat()} ({wait_seconds/3600:.1f}h)"
|
||||
"Carrier tracker: next update at %s (%.1fh)",
|
||||
next_run.isoformat(),
|
||||
wait_seconds / 3600,
|
||||
)
|
||||
|
||||
# Wait until next scheduled time, or until stop event
|
||||
if _scheduler_stop.wait(timeout=wait_seconds):
|
||||
break # Stop event was set
|
||||
break
|
||||
|
||||
try:
|
||||
update_carrier_positions()
|
||||
|
||||
@@ -4,7 +4,7 @@ import concurrent.futures
|
||||
from urllib.parse import quote
|
||||
import requests as _requests
|
||||
from cachetools import TTLCache
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.network_utils import fetch_with_curl, DEFAULT_USER_AGENT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -15,6 +15,25 @@ dossier_cache = TTLCache(maxsize=500, ttl=86400)
|
||||
# Nominatim requires max 1 req/sec — track last call time
|
||||
_nominatim_last_call = 0.0
|
||||
|
||||
# Issue #218 / #219 (tg12): Wikimedia's User-Agent policy requires API
|
||||
# clients to identify themselves with a stable User-Agent that includes
|
||||
# a contact path. Bare "python-requests/x.y" or generic strings violate
|
||||
# the policy and risk getting blocked. We send the project default UA
|
||||
# (operator-overridable via SHADOWBROKER_USER_AGENT) on EVERY outbound
|
||||
# Wikimedia request, plus the policy-recommended Api-User-Agent which
|
||||
# Wikimedia explicitly accepts on top of the regular UA.
|
||||
#
|
||||
# This is documented and stable so a Wikimedia operator who wants to
|
||||
# rate-limit or contact us has a fixed identifier to grep for.
|
||||
_WIKIMEDIA_REQUEST_HEADERS = {
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
"Api-User-Agent": (
|
||||
f"{DEFAULT_USER_AGENT} "
|
||||
"(+https://github.com/BigBodyCobain/Shadowbroker; "
|
||||
"report issues at /issues)"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _reverse_geocode_offline(lat: float, lng: float) -> dict:
|
||||
"""Offline fallback via reverse_geocoder when external reverse geocoding is blocked."""
|
||||
@@ -121,7 +140,13 @@ def _fetch_wikidata_leader(country_name: str) -> dict:
|
||||
"""
|
||||
url = f"https://query.wikidata.org/sparql?query={quote(sparql)}&format=json"
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=6)
|
||||
# Issue #218 (tg12): Wikimedia's User-Agent policy requires
|
||||
# outbound API traffic to be identifiable. fetch_with_curl()
|
||||
# sends the project default, and we also add the Wikimedia-
|
||||
# specific Api-User-Agent that the policy specifically asks
|
||||
# for, since this request originates from a backend service
|
||||
# that proxies on behalf of (potentially many) browser users.
|
||||
res = fetch_with_curl(url, timeout=6, headers=_WIKIMEDIA_REQUEST_HEADERS)
|
||||
if res.status_code == 200:
|
||||
results = res.json().get("results", {}).get("bindings", [])
|
||||
if results:
|
||||
@@ -147,7 +172,9 @@ def _fetch_local_wiki_summary(place_name: str, country_name: str = "") -> dict:
|
||||
slug = quote(name.replace(" ", "_"))
|
||||
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{slug}"
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=5)
|
||||
# Issue #219 (tg12): identify ourselves to Wikimedia per
|
||||
# their UA policy; see _fetch_wikidata_leader above.
|
||||
res = fetch_with_curl(url, timeout=5, headers=_WIKIMEDIA_REQUEST_HEADERS)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
if data.get("type") != "disambiguation":
|
||||
|
||||
@@ -0,0 +1,677 @@
|
||||
{
|
||||
"_meta": {
|
||||
"issue": "#239",
|
||||
"note": "Snapshot of currently-tolerated duplicate route registrations. The test in test_no_new_duplicate_routes.py fails if any NEW (method, path) duplicate appears outside this list. Removing entries (by actually deduping) is fine and the test stays green. New entries here require explicit, reviewed updates.",
|
||||
"generated_with": "python -c 'see tests/test_no_new_duplicate_routes.py'"
|
||||
},
|
||||
"duplicates": {
|
||||
"DELETE /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"DELETE /api/wormhole/dm/contact/{peer_id}": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"DELETE /api/wormhole/dm/invite/handles/{handle}": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/cctv/media": [
|
||||
"main",
|
||||
"routers.cctv"
|
||||
],
|
||||
"GET /api/debug-latest": [
|
||||
"main",
|
||||
"routers.health"
|
||||
],
|
||||
"GET /api/geocode/reverse": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/geocode/search": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/health": [
|
||||
"main",
|
||||
"routers.health"
|
||||
],
|
||||
"GET /api/live-data": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/live-data/fast": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/live-data/slow": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/mesh/channels": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/dm/count": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/poll": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/prekey-bundle": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/pubkey": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/witness": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/gate/list": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/gate/{gate_id}": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/gate/{gate_id}/messages": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/event/{event_id}": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/events": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/locator": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/merkle": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/messages": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/messages/wait": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/node/{node_id}": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/status": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/sync": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/log": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/messages": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/metrics": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/oracle/consensus": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/markets": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/markets/more": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/predictions": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/profile": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/search": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/stakes/{message_id}": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/reputation": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/reputation/all": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/reputation/batch": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/rns/status": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/signals": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/status": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/trust/vouches": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/oracle/region-intel": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"GET /api/radio/nearest": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/nearest-list": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/openmhz/audio": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/openmhz/calls/{sys_name}": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/openmhz/systems": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/top": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/refresh": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/region-dossier": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/route/{callsign}": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/sentinel2/search": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/settings/api-keys": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/api-keys/meta": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/news-feeds": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/node": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/privacy-profile": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/settings/wormhole": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/settings/wormhole-status": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/sigint/nearest-sdr": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"GET /api/thermal/verify": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"GET /api/tools/shodan/status": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/tools/uw/status": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/wormhole/dm/contacts": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/dm/identity": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/dm/invite": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/dm/invite/handles": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/gate/{gate_id}/identity": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/gate/{gate_id}/key": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/gate/{gate_id}/personas": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/health": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/identity": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/status": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PATCH /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/ais/feed": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"POST /api/layers": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"POST /api/mesh/dm/block": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/count": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/poll": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/register": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/send": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/witness": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/gate/create": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/gate/peer-pull": [
|
||||
"main",
|
||||
"routers.mesh_peer_sync"
|
||||
],
|
||||
"POST /api/mesh/gate/peer-push": [
|
||||
"main",
|
||||
"routers.mesh_peer_sync"
|
||||
],
|
||||
"POST /api/mesh/gate/{gate_id}/message": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/identity/revoke": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/identity/rotate": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/infonet/ingest": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/infonet/peer-push": [
|
||||
"main",
|
||||
"routers.mesh_peer_sync"
|
||||
],
|
||||
"POST /api/mesh/infonet/sync": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/oracle/predict": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/oracle/resolve": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/oracle/resolve-stakes": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/oracle/stake": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/report": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/send": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/trust/vouch": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/vote": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/sentinel/tile": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/sentinel/token": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/settings/news-feeds/reset": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"POST /api/sigint/transmit": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"POST /api/system/update": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"POST /api/tools/shodan/count": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/shodan/host": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/shodan/search": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/uw/congress": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/uw/darkpool": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/uw/flow": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/viewport": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"POST /api/wormhole/connect": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/disconnect": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/bootstrap-decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/bootstrap-encrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/build-seal": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/compose": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/dead-drop-token": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/dead-drop-tokens": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/encrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/invite/import": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/open-seal": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/pairwise-alias": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/pairwise-alias/rotate": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/prekey/register": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/register-key": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/reset": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/sas": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/sender-token": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/enter": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/key/grant": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/key/rotate": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/leave": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/compose": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/post": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/post-encrypted": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/sign-encrypted": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/messages/decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/activate": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/clear": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/create": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/retire": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/proof": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/state/export": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/identity/bootstrap": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/join": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/leave": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/restart": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/sign": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/sign-raw": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PUT /api/mesh/gate/{gate_id}/envelope_policy": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"PUT /api/mesh/gate/{gate_id}/legacy_envelope_fallback": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"PUT /api/settings/news-feeds": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"PUT /api/settings/node": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"PUT /api/settings/privacy-profile": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PUT /api/settings/wormhole": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PUT /api/wormhole/dm/contact": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,389 @@
|
||||
"""Issues #244, #245, #246 (tg12 external audit): carrier tracker
|
||||
quality + provenance + freshness.
|
||||
|
||||
These tests pin the post-fix contract:
|
||||
|
||||
- **#244**: dated editorial snapshot positions no longer live in the
|
||||
registry. They live in a one-shot seed file that is consumed once
|
||||
on first-ever startup. After that, the runtime cache reflects only
|
||||
what THIS install has actually observed.
|
||||
|
||||
- **#245**: headline-derived positions (centroid of a region keyword)
|
||||
are stamped ``position_confidence = "approximate"`` so the UI can
|
||||
render them with appropriate uncertainty.
|
||||
|
||||
- **#246**: freshness is a *labelling* decision, not an eviction
|
||||
decision. Positions older than the configurable freshness window
|
||||
flip from ``"recent"`` to ``"stale"`` but are NEVER replaced with
|
||||
the registry default — that would teleport the carrier. The user
|
||||
always sees the last position the system actually observed.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fresh_tracker(tmp_path, monkeypatch):
|
||||
"""Isolated carrier_tracker with seed/cache paths redirected to tmp.
|
||||
|
||||
Yields the module so tests can call its functions; resets globals
|
||||
between tests so position caches don't leak across cases.
|
||||
"""
|
||||
from services import carrier_tracker
|
||||
|
||||
seed_path = tmp_path / "data" / "carrier_seed.json"
|
||||
cache_path = tmp_path / "carrier_cache.json"
|
||||
seed_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
monkeypatch.setattr(carrier_tracker, "SEED_FILE", seed_path)
|
||||
monkeypatch.setattr(carrier_tracker, "CACHE_FILE", cache_path)
|
||||
monkeypatch.delenv("SHADOWBROKER_CARRIER_FRESHNESS_DAYS", raising=False)
|
||||
|
||||
# Reset module-level mutable state.
|
||||
carrier_tracker._carrier_positions.clear()
|
||||
carrier_tracker._cached_gdelt_articles.clear()
|
||||
carrier_tracker._last_gdelt_fetch_at = 0.0
|
||||
|
||||
yield carrier_tracker
|
||||
|
||||
# Clean up so subsequent tests start fresh.
|
||||
carrier_tracker._carrier_positions.clear()
|
||||
carrier_tracker._cached_gdelt_articles.clear()
|
||||
|
||||
|
||||
def _write_seed(path: Path, hull: str = "CVN-78", **overrides) -> None:
|
||||
payload = {
|
||||
"_meta": {
|
||||
"as_of": "2026-03-09",
|
||||
"source": "USNI News Fleet & Marine Tracker",
|
||||
"source_url": "https://news.usni.org/...",
|
||||
"note": "test",
|
||||
},
|
||||
"carriers": {
|
||||
hull: {
|
||||
"lat": 18.0,
|
||||
"lng": 39.5,
|
||||
"heading": 0,
|
||||
"desc": "Red Sea — Operation Epic Fury (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed",
|
||||
**overrides,
|
||||
}
|
||||
},
|
||||
}
|
||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# #244 — first-run seed bootstrap, never re-seeds after that
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSeedBootstrap:
|
||||
def test_first_ever_startup_bootstraps_from_seed(self, fresh_tracker, tmp_path):
|
||||
_write_seed(fresh_tracker.SEED_FILE)
|
||||
# No cache exists yet.
|
||||
assert not fresh_tracker.CACHE_FILE.exists()
|
||||
|
||||
positions = fresh_tracker._bootstrap_cache_if_missing()
|
||||
|
||||
# The seed entry made it into the cache.
|
||||
assert "CVN-78" in positions
|
||||
assert positions["CVN-78"]["lat"] == 18.0
|
||||
assert positions["CVN-78"]["position_confidence"] == "seed"
|
||||
# And the cache file is now on disk so subsequent runs skip the seed.
|
||||
assert fresh_tracker.CACHE_FILE.exists()
|
||||
|
||||
def test_subsequent_startup_ignores_seed(self, fresh_tracker, tmp_path):
|
||||
# Pre-seed a different position into the cache; the seed file says Red Sea.
|
||||
cache_data = {
|
||||
"CVN-78": {
|
||||
"lat": 25.0,
|
||||
"lng": 55.0,
|
||||
"heading": 0,
|
||||
"desc": "Persian Gulf — operator-observed",
|
||||
"source": "Operator log",
|
||||
"source_url": "",
|
||||
"position_source_at": "2026-04-15T12:00:00Z",
|
||||
"position_confidence": "recent",
|
||||
}
|
||||
}
|
||||
fresh_tracker.CACHE_FILE.write_text(json.dumps(cache_data))
|
||||
_write_seed(fresh_tracker.SEED_FILE) # seed is present but should NOT be used
|
||||
|
||||
positions = fresh_tracker._bootstrap_cache_if_missing()
|
||||
|
||||
assert positions["CVN-78"]["lat"] == 25.0
|
||||
assert positions["CVN-78"]["desc"] == "Persian Gulf — operator-observed"
|
||||
|
||||
def test_no_seed_no_cache_falls_back_to_homeport(self, fresh_tracker):
|
||||
# Neither seed nor cache. Must fall back to homeport defaults
|
||||
# (carrier never disappears).
|
||||
assert not fresh_tracker.SEED_FILE.exists()
|
||||
assert not fresh_tracker.CACHE_FILE.exists()
|
||||
|
||||
positions = fresh_tracker._bootstrap_cache_if_missing()
|
||||
|
||||
# Every registered carrier has SOMETHING.
|
||||
assert set(positions.keys()) == set(fresh_tracker.CARRIER_REGISTRY.keys())
|
||||
# All entries are labelled as homeport defaults.
|
||||
for hull, entry in positions.items():
|
||||
assert entry["position_confidence"] == "homeport_default"
|
||||
registry = fresh_tracker.CARRIER_REGISTRY[hull]
|
||||
assert entry["lat"] == registry["homeport_lat"]
|
||||
assert entry["lng"] == registry["homeport_lng"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# #244 — no editorial fallbacks live in the registry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRegistryShape:
|
||||
def test_registry_has_no_dated_fallback_fields(self, fresh_tracker):
|
||||
"""The Mar 9 editorial coordinates are gone from the registry.
|
||||
They live only in the seed file."""
|
||||
forbidden = {"fallback_lat", "fallback_lng", "fallback_heading", "fallback_desc"}
|
||||
for hull, entry in fresh_tracker.CARRIER_REGISTRY.items():
|
||||
offending = forbidden & set(entry.keys())
|
||||
assert not offending, f"{hull} still has dated registry fields: {offending}"
|
||||
|
||||
def test_registry_keeps_homeport_for_every_hull(self, fresh_tracker):
|
||||
for hull, entry in fresh_tracker.CARRIER_REGISTRY.items():
|
||||
assert "homeport_lat" in entry, f"{hull} missing homeport_lat"
|
||||
assert "homeport_lng" in entry, f"{hull} missing homeport_lng"
|
||||
assert "name" in entry
|
||||
assert "wiki" in entry
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# #246 — freshness labelling, NOT eviction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFreshnessLabelling:
|
||||
def test_recent_observation_labels_recent(self, fresh_tracker):
|
||||
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||
entry = {
|
||||
"lat": 25.0,
|
||||
"lng": 55.0,
|
||||
"position_source_at": (now - timedelta(days=3)).isoformat(),
|
||||
}
|
||||
assert fresh_tracker._compute_position_confidence(entry, now=now) == "recent"
|
||||
|
||||
def test_aged_observation_flips_to_stale(self, fresh_tracker):
|
||||
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||
entry = {
|
||||
"lat": 25.0,
|
||||
"lng": 55.0,
|
||||
"position_source_at": (now - timedelta(days=30)).isoformat(),
|
||||
}
|
||||
assert fresh_tracker._compute_position_confidence(entry, now=now) == "stale"
|
||||
|
||||
def test_seed_label_is_preserved_explicitly(self, fresh_tracker):
|
||||
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||
entry = {
|
||||
"lat": 18.0,
|
||||
"lng": 39.5,
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed",
|
||||
}
|
||||
# Even though the source is months old, the explicit "seed" label wins
|
||||
# so the UI can render the seed-specific badge instead of generic "stale".
|
||||
assert fresh_tracker._compute_position_confidence(entry, now=now) == "seed"
|
||||
|
||||
def test_homeport_default_label_is_preserved(self, fresh_tracker):
|
||||
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||
entry = {
|
||||
"lat": 36.95,
|
||||
"lng": -76.32,
|
||||
"position_source_at": now.isoformat(),
|
||||
"position_confidence": "homeport_default",
|
||||
}
|
||||
assert fresh_tracker._compute_position_confidence(entry, now=now) == "homeport_default"
|
||||
|
||||
def test_freshness_window_is_env_configurable(self, fresh_tracker, monkeypatch):
|
||||
now = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
||||
entry = {
|
||||
"lat": 25.0,
|
||||
"lng": 55.0,
|
||||
"position_source_at": (now - timedelta(days=20)).isoformat(),
|
||||
}
|
||||
# Default window = 14 days → 20-day-old entry is stale.
|
||||
assert fresh_tracker._compute_position_confidence(entry, now=now) == "stale"
|
||||
# Stretch to 30 days → same entry is now "recent".
|
||||
monkeypatch.setenv("SHADOWBROKER_CARRIER_FRESHNESS_DAYS", "30")
|
||||
assert fresh_tracker._compute_position_confidence(entry, now=now) == "recent"
|
||||
|
||||
def test_aged_cache_entry_keeps_its_position_never_reverts(self, fresh_tracker):
|
||||
"""The core regression test for the user's intent: a year-old
|
||||
cache entry must NOT be replaced with the seed or homeport.
|
||||
The PHYSICAL position the user sees is the last one observed;
|
||||
only the freshness LABEL changes."""
|
||||
a_year_ago = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat()
|
||||
cache_data = {
|
||||
"CVN-78": {
|
||||
"lat": 25.0,
|
||||
"lng": 55.0,
|
||||
"heading": 0,
|
||||
"desc": "Persian Gulf",
|
||||
"source": "GDELT News API",
|
||||
"source_url": "https://news.example/...",
|
||||
"position_source_at": a_year_ago,
|
||||
"position_confidence": "recent", # was recent when written
|
||||
}
|
||||
}
|
||||
fresh_tracker.CACHE_FILE.write_text(json.dumps(cache_data))
|
||||
|
||||
positions = fresh_tracker._bootstrap_cache_if_missing()
|
||||
enriched = fresh_tracker._enrich_for_rendering("CVN-78", positions["CVN-78"])
|
||||
|
||||
# The position is preserved exactly.
|
||||
assert enriched["lat"] == 25.0
|
||||
assert enriched["lng"] == 55.0
|
||||
# But the live label has flipped to stale.
|
||||
assert enriched["position_confidence"] == "stale"
|
||||
assert enriched["is_fallback"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# #245 — approximate confidence for region-centroid positions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestApproximateConfidenceForNewsDerivedPositions:
|
||||
def test_news_parsing_stamps_approximate_confidence(self, fresh_tracker):
|
||||
articles = [
|
||||
{
|
||||
"title": "USS Ford carrier deployed in Mediterranean for joint exercise",
|
||||
"url": "https://news.example/ford-mediterranean",
|
||||
"seendate": "20260415120000",
|
||||
}
|
||||
]
|
||||
updates = fresh_tracker._parse_carrier_positions_from_news(articles)
|
||||
assert "CVN-78" in updates
|
||||
entry = updates["CVN-78"]
|
||||
assert entry["position_confidence"] == "approximate"
|
||||
# And the source_at is the article's seen date, not now().
|
||||
assert entry["position_source_at"].startswith("2026-04-15")
|
||||
|
||||
def test_gdelt_seendate_parser_handles_well_formed_input(self, fresh_tracker):
|
||||
iso = fresh_tracker._gdelt_seendate_to_iso("20260415120000")
|
||||
assert iso is not None
|
||||
assert iso.startswith("2026-04-15T12:00:00")
|
||||
|
||||
def test_gdelt_seendate_parser_returns_none_on_garbage(self, fresh_tracker):
|
||||
assert fresh_tracker._gdelt_seendate_to_iso("") is None
|
||||
assert fresh_tracker._gdelt_seendate_to_iso("not-a-date") is None
|
||||
assert fresh_tracker._gdelt_seendate_to_iso("2026") is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Full enrichment → public API shape
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEnrichForRendering:
|
||||
def test_seed_entry_produces_expected_public_fields(self, fresh_tracker):
|
||||
seed_entry = {
|
||||
"lat": 18.0,
|
||||
"lng": 39.5,
|
||||
"heading": 0,
|
||||
"desc": "Red Sea (USNI Mar 9)",
|
||||
"source": "USNI News Fleet & Marine Tracker (seed, as of 2026-03-09)",
|
||||
"source_url": "https://news.usni.org/category/fleet-tracker",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed",
|
||||
}
|
||||
enriched = fresh_tracker._enrich_for_rendering("CVN-78", seed_entry)
|
||||
# Existing UI fields preserved.
|
||||
assert enriched["lat"] == 18.0
|
||||
assert enriched["lng"] == 39.5
|
||||
assert enriched["source"].startswith("USNI")
|
||||
assert enriched["last_osint_update"] == "2026-03-09T00:00:00Z"
|
||||
# New audit-required fields.
|
||||
assert enriched["position_confidence"] == "seed"
|
||||
assert enriched["position_source_at"] == "2026-03-09T00:00:00Z"
|
||||
assert enriched["is_fallback"] is True
|
||||
|
||||
def test_recent_observation_is_not_fallback(self, fresh_tracker):
|
||||
now = datetime.now(timezone.utc)
|
||||
recent_entry = {
|
||||
"lat": 25.0,
|
||||
"lng": 55.0,
|
||||
"heading": 0,
|
||||
"desc": "Persian Gulf",
|
||||
"source": "GDELT News API",
|
||||
"source_url": "https://news.example/...",
|
||||
"position_source_at": (now - timedelta(days=2)).isoformat(),
|
||||
"position_confidence": "approximate",
|
||||
}
|
||||
enriched = fresh_tracker._enrich_for_rendering("CVN-78", recent_entry, now=now)
|
||||
assert enriched["position_confidence"] == "approximate"
|
||||
# Approximate (from a recent headline) is honest precision, but the UI
|
||||
# treats it as live data — is_fallback only flips True for explicit
|
||||
# fallback categories (seed / stale / homeport_default).
|
||||
assert enriched["is_fallback"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regression: existing frontend fields are preserved
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPublicResponseShapeBackwardCompat:
|
||||
"""The frontend ShipPopup expects `estimated`, `source`, `source_url`,
|
||||
`last_osint_update`. The new fields are additive and existing fields
|
||||
keep their meaning so the UI does not need updating to keep working."""
|
||||
|
||||
def test_get_carrier_positions_preserves_existing_keys(self, fresh_tracker):
|
||||
_write_seed(fresh_tracker.SEED_FILE)
|
||||
fresh_tracker._bootstrap_cache_if_missing()
|
||||
with fresh_tracker._positions_lock:
|
||||
fresh_tracker._carrier_positions.update(
|
||||
{
|
||||
"CVN-78": {
|
||||
"lat": 18.0,
|
||||
"lng": 39.5,
|
||||
"heading": 0,
|
||||
"desc": "Red Sea (seed)",
|
||||
"source": "Seed",
|
||||
"source_url": "",
|
||||
"position_source_at": "2026-03-09T00:00:00Z",
|
||||
"position_confidence": "seed",
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
out = fresh_tracker.get_carrier_positions()
|
||||
assert len(out) == 1
|
||||
c = out[0]
|
||||
# Old fields the frontend uses.
|
||||
for key in (
|
||||
"name",
|
||||
"type",
|
||||
"lat",
|
||||
"lng",
|
||||
"country",
|
||||
"desc",
|
||||
"wiki",
|
||||
"estimated",
|
||||
"source",
|
||||
"source_url",
|
||||
"last_osint_update",
|
||||
):
|
||||
assert key in c, f"missing legacy field {key!r}"
|
||||
# New fields.
|
||||
for key in ("position_confidence", "position_source_at", "is_fallback"):
|
||||
assert key in c, f"missing audit-required field {key!r}"
|
||||
assert c["type"] == "carrier"
|
||||
assert c["estimated"] is True
|
||||
@@ -0,0 +1,208 @@
|
||||
"""Issue #239 (tg12): backend registers duplicate API routes in both
|
||||
``main.py`` and router modules, so request behavior depends on the
|
||||
order ``FastAPI`` happened to register them.
|
||||
|
||||
This test is the **CI guard** that locks in the invariant going forward.
|
||||
It does NOT delete any existing duplicates — those are tolerated via an
|
||||
explicit baseline file. What it DOES block is *new* duplicates appearing
|
||||
later, which is what the audit was actually asking for: a way to stop
|
||||
the drift before it gets worse.
|
||||
|
||||
Findings (empirically verified, see PR #286 description):
|
||||
|
||||
- ``main.app`` calls ``include_router(...)`` for every router at module
|
||||
import time around line 3316.
|
||||
- Every ``@app.get/post/put/...`` decorator inside ``main.py`` runs
|
||||
*after* those include_router calls, so the router handler is the one
|
||||
that actually serves requests. The duplicates in ``main.py`` are
|
||||
dead code at the route-resolution layer.
|
||||
- Behavior today is deterministic (router wins), but if someone later
|
||||
adds a NEW route only in ``main.py``, or edits one copy of an
|
||||
existing pair without the other, drift starts.
|
||||
|
||||
How this test works:
|
||||
|
||||
- Walks ``main.app.routes`` and records every ``(method, path)`` that
|
||||
appears more than once, along with which modules registered each
|
||||
copy.
|
||||
- Compares that set against the baseline in
|
||||
``backend/tests/data/duplicate_routes_baseline.json``.
|
||||
- **Fails** if any duplicate appears that is NOT in the baseline
|
||||
(or if the registering modules for an existing duplicate change).
|
||||
- **Stays green** when duplicates are *removed* by genuinely deduping
|
||||
the code. (The baseline is a ceiling, not a floor.)
|
||||
|
||||
To extend in the future:
|
||||
|
||||
- If you actually dedupe a route, leave the baseline alone — the test
|
||||
still passes. Subsequent regenerations of the baseline (``python -m
|
||||
scripts.regen_duplicate_routes_baseline`` or the snippet in this
|
||||
test's docstring) will shrink it.
|
||||
- If you legitimately need a new duplicate (you probably do not), add
|
||||
it to the baseline AND explain why in the PR description so reviewers
|
||||
can push back.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
BASELINE_PATH = (
|
||||
Path(__file__).parent / "data" / "duplicate_routes_baseline.json"
|
||||
)
|
||||
|
||||
|
||||
def _current_duplicates() -> dict[str, list[str]]:
|
||||
"""Walk ``main.app.routes`` and return ``{'METHOD /path': [module, ...]}``
|
||||
for every (method, path) registered more than once."""
|
||||
import main
|
||||
|
||||
by_key: dict[str, list[str]] = defaultdict(list)
|
||||
for route in main.app.routes:
|
||||
path = getattr(route, "path", None)
|
||||
methods = getattr(route, "methods", None)
|
||||
endpoint = getattr(route, "endpoint", None)
|
||||
if not path or not methods or endpoint is None:
|
||||
continue
|
||||
for method in methods:
|
||||
if method in ("HEAD", "OPTIONS"):
|
||||
continue
|
||||
by_key[f"{method} {path}"].append(endpoint.__module__)
|
||||
|
||||
return {
|
||||
key: sorted(modules) for key, modules in by_key.items() if len(modules) > 1
|
||||
}
|
||||
|
||||
|
||||
def _load_baseline() -> dict[str, list[str]]:
|
||||
if not BASELINE_PATH.exists():
|
||||
return {}
|
||||
raw = json.loads(BASELINE_PATH.read_text(encoding="utf-8"))
|
||||
dups = raw.get("duplicates", {})
|
||||
if not isinstance(dups, dict):
|
||||
return {}
|
||||
return {k: sorted(v) for k, v in dups.items()}
|
||||
|
||||
|
||||
def test_no_new_duplicate_route_registrations():
|
||||
"""Block any (method, path) duplicate not already in the baseline.
|
||||
|
||||
This is the primary CI guard: PRs that add a NEW shadowed
|
||||
``@app.get`` while a router module already serves the same route
|
||||
fail here with an actionable message.
|
||||
"""
|
||||
current = _current_duplicates()
|
||||
baseline = _load_baseline()
|
||||
|
||||
new_or_changed = []
|
||||
for key, modules in sorted(current.items()):
|
||||
if key not in baseline:
|
||||
new_or_changed.append(
|
||||
f" + {key} (NEW duplicate; registered in: {modules})"
|
||||
)
|
||||
continue
|
||||
if modules != baseline[key]:
|
||||
new_or_changed.append(
|
||||
f" ~ {key} "
|
||||
f"(modules changed: was {baseline[key]}, now {modules})"
|
||||
)
|
||||
|
||||
if new_or_changed:
|
||||
pytest.fail(
|
||||
"Issue #239 CI guard: detected duplicate route registrations "
|
||||
"that are NOT in the tolerated baseline.\n"
|
||||
"\n"
|
||||
"If you added a new @app.get/post/... in main.py for a path "
|
||||
"that a router module already serves, please move the handler "
|
||||
"into the router and delete the main.py copy — the router "
|
||||
"version wins on request routing anyway, so the main.py copy "
|
||||
"is dead code that just creates drift risk.\n"
|
||||
"\n"
|
||||
"Offending entries:\n"
|
||||
+ "\n".join(new_or_changed)
|
||||
+ "\n\n"
|
||||
"Baseline lives at "
|
||||
f"{BASELINE_PATH.relative_to(BASELINE_PATH.parent.parent.parent)}."
|
||||
)
|
||||
|
||||
|
||||
def test_baseline_only_lists_real_duplicates():
|
||||
"""Catch baseline drift in the other direction: if an entry in the
|
||||
baseline is no longer actually a duplicate (because someone deduped
|
||||
it manually), the baseline is stale and should be shrunk so future
|
||||
re-introductions of that duplicate get caught.
|
||||
|
||||
This test is informational — it does NOT fail the build today (the
|
||||
audit's main concern is *new* duplicates, not stale baseline
|
||||
entries). It prints a warning so the next baseline regeneration
|
||||
can clean things up.
|
||||
"""
|
||||
current = _current_duplicates()
|
||||
baseline = _load_baseline()
|
||||
stale = sorted(k for k in baseline if k not in current)
|
||||
if stale:
|
||||
# Use warnings instead of fail so this is friendly housekeeping,
|
||||
# not a CI blocker. The other test catches the actual safety
|
||||
# concern.
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
f"duplicate_routes_baseline.json contains {len(stale)} entry/entries "
|
||||
"no longer present in app.routes — consider regenerating the baseline. "
|
||||
f"Stale: {stale[:5]}{'...' if len(stale) > 5 else ''}",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
def test_router_handler_is_the_one_that_serves():
|
||||
"""Pin the empirical claim from PR #286: for every duplicated
|
||||
(method, path), the FIRST-registered handler is in a router
|
||||
module, not in main.py. If this ever flips — e.g. someone moves
|
||||
include_router calls to the bottom of main.py — duplicate routes
|
||||
start silently changing which handler runs. This catches that
|
||||
rearrangement immediately.
|
||||
"""
|
||||
import main
|
||||
|
||||
first_seen: dict[str, str] = {}
|
||||
for route in main.app.routes:
|
||||
path = getattr(route, "path", None)
|
||||
methods = getattr(route, "methods", None)
|
||||
endpoint = getattr(route, "endpoint", None)
|
||||
if not path or not methods or endpoint is None:
|
||||
continue
|
||||
for method in methods:
|
||||
if method in ("HEAD", "OPTIONS"):
|
||||
continue
|
||||
key = f"{method} {path}"
|
||||
if key not in first_seen:
|
||||
first_seen[key] = endpoint.__module__
|
||||
|
||||
main_winning = sorted(
|
||||
k for k, mod in first_seen.items() if mod == "main"
|
||||
)
|
||||
# The duplicates we tolerate are router-first. If main is the first
|
||||
# registered for any duplicated path, the router copy gets shadowed
|
||||
# instead, which would invalidate every assumption made in audit
|
||||
# rounds 5 and 6 about "the router version is canonical."
|
||||
baseline = _load_baseline()
|
||||
main_first_in_baseline = [k for k in main_winning if k in baseline]
|
||||
if main_first_in_baseline:
|
||||
pytest.fail(
|
||||
"Issue #239 invariant broken: for at least one duplicated "
|
||||
"(method, path), main.py is now registered FIRST and is "
|
||||
"serving requests instead of the router copy. Audit rounds "
|
||||
"5 and 6 assumed the router handler wins.\n"
|
||||
"\n"
|
||||
"Affected entries:\n"
|
||||
+ "\n".join(f" {k}" for k in main_first_in_baseline)
|
||||
+ "\n\n"
|
||||
"Most likely cause: someone moved app.include_router(...) "
|
||||
"calls in main.py to after the @app.get decorators. Move "
|
||||
"them back to before the @app routes (currently around "
|
||||
"line 3316)."
|
||||
)
|
||||
@@ -0,0 +1,91 @@
|
||||
"""Issues #218 / #219 (tg12): outbound Wikipedia + Wikidata calls must
|
||||
identify ShadowBroker via the Wikimedia-recommended User-Agent /
|
||||
Api-User-Agent headers.
|
||||
|
||||
Before this fix, ``backend/services/region_dossier.py`` called
|
||||
``fetch_with_curl(url)`` with no explicit headers, falling back to the
|
||||
generic project default UA. That sent a too-anonymous identifier to
|
||||
Wikimedia. Per Wikimedia's policy
|
||||
(https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy)
|
||||
the API caller should send a stable, contactable identifier so Wikimedia
|
||||
operators can rate-limit or reach the project.
|
||||
|
||||
This test does NOT make network calls. It patches ``fetch_with_curl``
|
||||
and asserts the headers that get passed through.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _fake_resp(payload: dict, status: int = 200) -> MagicMock:
|
||||
r = MagicMock()
|
||||
r.status_code = status
|
||||
r.json.return_value = payload
|
||||
return r
|
||||
|
||||
|
||||
def test_wikidata_call_passes_wikimedia_request_headers():
|
||||
from services import region_dossier
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_fetch(url, **kwargs):
|
||||
calls.append(kwargs.get("headers"))
|
||||
return _fake_resp({"results": {"bindings": []}})
|
||||
|
||||
with patch.object(region_dossier, "fetch_with_curl", side_effect=fake_fetch):
|
||||
region_dossier._fetch_wikidata_leader("Testlandia")
|
||||
|
||||
assert calls, "fetch_with_curl was not called"
|
||||
headers = calls[0] or {}
|
||||
assert "User-Agent" in headers
|
||||
assert "Api-User-Agent" in headers
|
||||
# Stable identifier should mention the project + a contact path.
|
||||
assert "Shadowbroker" in headers["Api-User-Agent"] or "ShadowBroker" in headers["Api-User-Agent"]
|
||||
assert "github.com" in headers["Api-User-Agent"].lower()
|
||||
|
||||
|
||||
def test_wikipedia_summary_call_passes_wikimedia_request_headers():
|
||||
from services import region_dossier
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_fetch(url, **kwargs):
|
||||
calls.append((url, kwargs.get("headers")))
|
||||
return _fake_resp(
|
||||
{
|
||||
"type": "standard",
|
||||
"description": "test desc",
|
||||
"extract": "test extract",
|
||||
"thumbnail": {"source": ""},
|
||||
}
|
||||
)
|
||||
|
||||
with patch.object(region_dossier, "fetch_with_curl", side_effect=fake_fetch):
|
||||
region_dossier._fetch_local_wiki_summary("Paris", "France")
|
||||
|
||||
# At least one Wikipedia REST call was issued.
|
||||
wikipedia_calls = [c for c in calls if "wikipedia.org" in c[0]]
|
||||
assert wikipedia_calls, "no Wikipedia call was issued"
|
||||
for url, headers in wikipedia_calls:
|
||||
headers = headers or {}
|
||||
assert "User-Agent" in headers, f"missing User-Agent on {url}"
|
||||
assert "Api-User-Agent" in headers, f"missing Api-User-Agent on {url}"
|
||||
assert "github.com" in headers["Api-User-Agent"].lower()
|
||||
|
||||
|
||||
def test_wikimedia_headers_constant_is_stable():
|
||||
"""Regression guard: if someone removes the contact path from the
|
||||
Api-User-Agent we want a loud test failure, not a silent ToS drift.
|
||||
"""
|
||||
from services.region_dossier import _WIKIMEDIA_REQUEST_HEADERS
|
||||
|
||||
aua = _WIKIMEDIA_REQUEST_HEADERS.get("Api-User-Agent", "")
|
||||
assert "Shadowbroker" in aua or "ShadowBroker" in aua
|
||||
assert "github.com" in aua.lower()
|
||||
# Must include a path Wikimedia operators can use to contact us
|
||||
# (we use /issues against the public repo).
|
||||
assert "issues" in aua.lower()
|
||||
@@ -0,0 +1,263 @@
|
||||
"""Issues #243, #252, #253 (tg12): settings endpoints must not leak
|
||||
operational posture to unauthenticated callers.
|
||||
|
||||
- **#243**: ``GET /api/settings/wormhole``, ``/api/settings/privacy-profile``,
|
||||
and ``/api/settings/node`` were leaking transport choice, anonymous-mode
|
||||
state, the named privacy profile, and node-participant state to any
|
||||
unauthenticated caller. The fix tightens the redaction allowlists to
|
||||
expose ONLY a bare "is this feature on?" boolean and gates node mode
|
||||
behind authenticated reads.
|
||||
|
||||
- **#252**: ``GET /api/settings/news-feeds`` returned the operator's full
|
||||
curated feed inventory (names + URLs) to anyone. Now gated on
|
||||
local-operator.
|
||||
|
||||
- **#253**: ``GET /api/settings/timemachine`` returned whether archival
|
||||
capture is enabled to anyone. Now gated on local-operator.
|
||||
|
||||
Auth model: ``require_local_operator`` allows loopback (Tauri shell),
|
||||
the Docker bridge frontend container (via the hostname-bound trust from
|
||||
PR #278), and any caller that presents the configured admin key.
|
||||
Anonymous LAN or internet callers do NOT pass and either receive 403
|
||||
(news-feeds, timemachine) or a redacted minimum (wormhole / node).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
|
||||
_ADMIN_KEY = "test-admin-key-for-round5-fixture-32+chars"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
"""TestClient with the private-lane transport middleware disabled.
|
||||
|
||||
Same shape as the oracle resolve fixture — the mesh privacy
|
||||
middleware returns 202 for ``/api/settings/*`` under TestClient
|
||||
because Wormhole is not actually running. Patching out the tier
|
||||
requirement lets requests reach the route's auth gate.
|
||||
"""
|
||||
import main
|
||||
with patch("main._minimum_transport_tier", return_value=None):
|
||||
yield TestClient(main.app, raise_server_exceptions=False)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# #243: Wormhole posture redaction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestWormholeSettingsRedaction:
|
||||
"""``GET /api/settings/wormhole`` must NOT leak transport choice or
|
||||
anonymous-mode state to unauthenticated callers."""
|
||||
|
||||
def _read_settings_payload(self):
|
||||
return {
|
||||
"enabled": True,
|
||||
"transport": "tor_arti",
|
||||
"anonymous_mode": True,
|
||||
"privacy_profile": "high",
|
||||
"socks_proxy": "socks5h://127.0.0.1:9050",
|
||||
}
|
||||
|
||||
def test_anonymous_caller_sees_only_enabled_bool(self, client):
|
||||
with (
|
||||
patch("main.read_wormhole_settings", return_value=self._read_settings_payload()),
|
||||
patch("routers.wormhole.read_wormhole_settings", return_value=self._read_settings_payload()),
|
||||
patch("services.wormhole_settings.read_wormhole_settings", return_value=self._read_settings_payload()),
|
||||
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||
):
|
||||
r = client.get("/api/settings/wormhole")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
# Only the bare "is Wormhole on?" boolean is exposed publicly.
|
||||
assert "enabled" in body
|
||||
assert body["enabled"] is True
|
||||
# Posture fields the audit flagged must be absent.
|
||||
assert "transport" not in body
|
||||
assert "anonymous_mode" not in body
|
||||
assert "privacy_profile" not in body
|
||||
assert "socks_proxy" not in body
|
||||
|
||||
def test_authenticated_caller_sees_full_state(self, client):
|
||||
with (
|
||||
patch("main.read_wormhole_settings", return_value=self._read_settings_payload()),
|
||||
patch("routers.wormhole.read_wormhole_settings", return_value=self._read_settings_payload()),
|
||||
patch("services.wormhole_settings.read_wormhole_settings", return_value=self._read_settings_payload()),
|
||||
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||
):
|
||||
r = client.get(
|
||||
"/api/settings/wormhole",
|
||||
headers={"X-Admin-Key": _ADMIN_KEY},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
# All fields visible when authenticated.
|
||||
assert body["enabled"] is True
|
||||
assert body["transport"] == "tor_arti"
|
||||
assert body["anonymous_mode"] is True
|
||||
assert body["privacy_profile"] == "high"
|
||||
|
||||
|
||||
class TestPrivacyProfileRedaction:
|
||||
"""``GET /api/settings/privacy-profile`` must NOT leak the named
|
||||
profile to unauthenticated callers (the profile name itself
|
||||
discloses operator intent)."""
|
||||
|
||||
def _payload(self):
|
||||
return {
|
||||
"enabled": True,
|
||||
"transport": "tor_arti",
|
||||
"anonymous_mode": True,
|
||||
"privacy_profile": "high",
|
||||
}
|
||||
|
||||
def test_anonymous_caller_sees_only_wormhole_enabled_bool(self, client):
|
||||
with (
|
||||
patch("main.read_wormhole_settings", return_value=self._payload()),
|
||||
patch("routers.wormhole.read_wormhole_settings", return_value=self._payload()),
|
||||
patch("services.wormhole_settings.read_wormhole_settings", return_value=self._payload()),
|
||||
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||
):
|
||||
r = client.get("/api/settings/privacy-profile")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
assert "wormhole_enabled" in body
|
||||
assert body["wormhole_enabled"] is True
|
||||
# The named profile, transport, and anonymous mode must NOT
|
||||
# leak to anonymous callers.
|
||||
assert "profile" not in body or body.get("profile") is None
|
||||
assert "transport" not in body
|
||||
assert "anonymous_mode" not in body
|
||||
|
||||
def test_authenticated_caller_sees_named_profile_and_transport(self, client):
|
||||
with (
|
||||
patch("main.read_wormhole_settings", return_value=self._payload()),
|
||||
patch("routers.wormhole.read_wormhole_settings", return_value=self._payload()),
|
||||
patch("services.wormhole_settings.read_wormhole_settings", return_value=self._payload()),
|
||||
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||
):
|
||||
r = client.get(
|
||||
"/api/settings/privacy-profile",
|
||||
headers={"X-Admin-Key": _ADMIN_KEY},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
assert body["profile"] == "high"
|
||||
assert body["wormhole_enabled"] is True
|
||||
assert body["transport"] == "tor_arti"
|
||||
assert body["anonymous_mode"] is True
|
||||
|
||||
|
||||
class TestNodeSettingsRedaction:
|
||||
"""``GET /api/settings/node`` must NOT disclose node_mode or
|
||||
node_enabled to anonymous callers."""
|
||||
|
||||
def _node_data(self):
|
||||
return {"some_node_field": "value"}
|
||||
|
||||
def test_anonymous_caller_sees_empty_stub(self, client):
|
||||
with (
|
||||
patch("services.node_settings.read_node_settings", return_value=self._node_data()),
|
||||
patch("routers.admin._current_node_mode", return_value="participant"),
|
||||
patch("routers.admin._participant_node_enabled", return_value=True),
|
||||
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||
):
|
||||
r = client.get("/api/settings/node")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
# No posture fields.
|
||||
assert "node_mode" not in body
|
||||
assert "node_enabled" not in body
|
||||
assert "some_node_field" not in body
|
||||
|
||||
def test_authenticated_caller_sees_full_node_state(self, client):
|
||||
with (
|
||||
patch("services.node_settings.read_node_settings", return_value=self._node_data()),
|
||||
patch("routers.admin._current_node_mode", return_value="participant"),
|
||||
patch("routers.admin._participant_node_enabled", return_value=True),
|
||||
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||
):
|
||||
r = client.get(
|
||||
"/api/settings/node",
|
||||
headers={"X-Admin-Key": _ADMIN_KEY},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
assert body["node_mode"] == "participant"
|
||||
assert body["node_enabled"] is True
|
||||
assert body["some_node_field"] == "value"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# #252: news-feeds auth gate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNewsFeedsAuthGate:
|
||||
def _fake_feeds(self):
|
||||
return [
|
||||
{"name": "Custom Internal", "url": "https://internal.example/rss", "weight": 5},
|
||||
{"name": "Default News", "url": "https://news.example/rss", "weight": 3},
|
||||
]
|
||||
|
||||
def test_anonymous_caller_rejected(self, client):
|
||||
with (
|
||||
patch("services.news_feed_config.get_feeds", return_value=self._fake_feeds()) as get_feeds,
|
||||
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||
):
|
||||
r = client.get("/api/settings/news-feeds")
|
||||
assert r.status_code == 403
|
||||
# Critically: the underlying config read must NOT have been performed
|
||||
# (else the response body could leak the count via response timing).
|
||||
assert get_feeds.call_count == 0
|
||||
|
||||
def test_authenticated_caller_sees_full_feed_inventory(self, client):
|
||||
with (
|
||||
patch("services.news_feed_config.get_feeds", return_value=self._fake_feeds()),
|
||||
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||
):
|
||||
r = client.get(
|
||||
"/api/settings/news-feeds",
|
||||
headers={"X-Admin-Key": _ADMIN_KEY},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
assert len(body) == 2
|
||||
assert body[0]["name"] == "Custom Internal"
|
||||
assert body[0]["url"] == "https://internal.example/rss"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# #253: timemachine auth gate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTimemachineAuthGate:
|
||||
def test_anonymous_caller_rejected(self, client):
|
||||
node_data = {"timemachine_enabled": True}
|
||||
with (
|
||||
patch("services.node_settings.read_node_settings", return_value=node_data),
|
||||
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||
):
|
||||
r = client.get("/api/settings/timemachine")
|
||||
assert r.status_code == 403
|
||||
|
||||
def test_authenticated_caller_sees_enabled_state(self, client):
|
||||
node_data = {"timemachine_enabled": True}
|
||||
with (
|
||||
patch("services.node_settings.read_node_settings", return_value=node_data),
|
||||
patch("auth._current_admin_key", return_value=_ADMIN_KEY),
|
||||
):
|
||||
r = client.get(
|
||||
"/api/settings/timemachine",
|
||||
headers={"X-Admin-Key": _ADMIN_KEY},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
assert body["enabled"] is True
|
||||
assert "storage_warning" in body
|
||||
@@ -0,0 +1,164 @@
|
||||
/**
|
||||
* Issues #218 / #219 / #220 (tg12 external audit):
|
||||
*
|
||||
* Every browser-direct call to Wikipedia or Wikidata must send the
|
||||
* `Api-User-Agent` header that Wikimedia's UA policy asks for. These
|
||||
* tests pin that requirement on the shared `lib/wikimediaClient`
|
||||
* helper that WikiImage, NewsFeed, and useRegionDossier all route
|
||||
* through, so a future refactor that drops the header gets a loud
|
||||
* test failure rather than a silent ToS regression.
|
||||
*/
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
WIKIMEDIA_API_USER_AGENT,
|
||||
fetchWikipediaSummary,
|
||||
fetchWikidataSparql,
|
||||
_resetWikimediaClientCacheForTests,
|
||||
} from '@/lib/wikimediaClient';
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
describe('lib/wikimediaClient', () => {
|
||||
beforeEach(() => {
|
||||
_resetWikimediaClientCacheForTests();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('exposes a stable Api-User-Agent identifier with a contact path', () => {
|
||||
expect(WIKIMEDIA_API_USER_AGENT).toContain('Shadowbroker');
|
||||
expect(WIKIMEDIA_API_USER_AGENT.toLowerCase()).toContain('github.com');
|
||||
expect(WIKIMEDIA_API_USER_AGENT.toLowerCase()).toContain('issues');
|
||||
});
|
||||
|
||||
it('sends Api-User-Agent on Wikipedia summary fetch', async () => {
|
||||
const calls: Array<{ url: string; init?: RequestInit }> = [];
|
||||
globalThis.fetch = vi.fn(async (url: any, init?: RequestInit) => {
|
||||
calls.push({ url: String(url), init });
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
title: 'Boeing 747',
|
||||
description: 'aircraft',
|
||||
extract: 'long extract',
|
||||
thumbnail: { source: 'https://example.org/thumb.jpg' },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
const summary = await fetchWikipediaSummary('Boeing 747');
|
||||
expect(summary?.thumbnail).toBe('https://example.org/thumb.jpg');
|
||||
expect(calls).toHaveLength(1);
|
||||
const headers = (calls[0].init?.headers || {}) as Record<string, string>;
|
||||
expect(headers['Api-User-Agent']).toBe(WIKIMEDIA_API_USER_AGENT);
|
||||
});
|
||||
|
||||
it('sends Api-User-Agent on Wikidata SPARQL fetch', async () => {
|
||||
const calls: Array<{ url: string; init?: RequestInit }> = [];
|
||||
globalThis.fetch = vi.fn(async (url: any, init?: RequestInit) => {
|
||||
calls.push({ url: String(url), init });
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
results: {
|
||||
bindings: [
|
||||
{
|
||||
leaderLabel: { value: 'Test Leader' },
|
||||
govTypeLabel: { value: 'Test Government' },
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
const bindings = await fetchWikidataSparql('SELECT * WHERE { ?s ?p ?o }');
|
||||
expect(bindings).toHaveLength(1);
|
||||
const headers = (calls[0].init?.headers || {}) as Record<string, string>;
|
||||
expect(headers['Api-User-Agent']).toBe(WIKIMEDIA_API_USER_AGENT);
|
||||
expect(headers['Accept']).toBe('application/sparql-results+json');
|
||||
});
|
||||
|
||||
it('shares cache across consecutive callers for the same Wikipedia title', async () => {
|
||||
let fetchCount = 0;
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
fetchCount++;
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
title: 'Eiffel Tower',
|
||||
description: 'iron lattice tower',
|
||||
extract: '...',
|
||||
thumbnail: { source: 'https://example.org/eiffel.jpg' },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
const a = await fetchWikipediaSummary('Eiffel Tower');
|
||||
const b = await fetchWikipediaSummary('Eiffel Tower');
|
||||
expect(fetchCount).toBe(1);
|
||||
expect(a?.thumbnail).toBe(b?.thumbnail);
|
||||
});
|
||||
|
||||
it('deduplicates concurrent in-flight requests for the same title', async () => {
|
||||
let fetchCount = 0;
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
fetchCount++;
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
title: 'Mount Fuji',
|
||||
description: 'stratovolcano',
|
||||
extract: '...',
|
||||
thumbnail: { source: 'https://example.org/fuji.jpg' },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
const [a, b, c] = await Promise.all([
|
||||
fetchWikipediaSummary('Mount Fuji'),
|
||||
fetchWikipediaSummary('Mount Fuji'),
|
||||
fetchWikipediaSummary('Mount Fuji'),
|
||||
]);
|
||||
expect(fetchCount).toBe(1);
|
||||
expect(a?.thumbnail).toBe('https://example.org/fuji.jpg');
|
||||
expect(b).toEqual(a);
|
||||
expect(c).toEqual(a);
|
||||
});
|
||||
|
||||
it('returns null on disambiguation pages without throwing', async () => {
|
||||
globalThis.fetch = vi.fn(async () =>
|
||||
new Response(JSON.stringify({ type: 'disambiguation' }), { status: 200 }),
|
||||
) as any;
|
||||
const summary = await fetchWikipediaSummary('Mercury');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on HTTP error without throwing', async () => {
|
||||
globalThis.fetch = vi.fn(async () => new Response('not found', { status: 404 })) as any;
|
||||
const summary = await fetchWikipediaSummary('Nonexistent Article 12345');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on network error without throwing', async () => {
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
throw new Error('network down');
|
||||
}) as any;
|
||||
const summary = await fetchWikipediaSummary('Anything');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on empty input', async () => {
|
||||
globalThis.fetch = vi.fn(async () => new Response('{}', { status: 200 })) as any;
|
||||
expect(await fetchWikipediaSummary('')).toBeNull();
|
||||
expect(await fetchWikipediaSummary(' ')).toBeNull();
|
||||
expect(globalThis.fetch).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -5,6 +5,7 @@ import { motion, AnimatePresence } from 'framer-motion';
|
||||
import { AlertTriangle, Clock, Minus, Plus, ExternalLink, Brain, Loader2 } from 'lucide-react';
|
||||
import React, { useEffect, useRef, useCallback } from 'react';
|
||||
import WikiImage from '@/components/WikiImage';
|
||||
import { fetchWikipediaSummary } from '@/lib/wikimediaClient';
|
||||
import type { SelectedEntity, RegionDossier, FimiData } from "@/types/dashboard";
|
||||
import { useDataKeys } from '@/hooks/useDataStore';
|
||||
import { API_BASE } from '@/lib/api';
|
||||
@@ -203,34 +204,37 @@ function resolveAircraftWikiTitle(model: string | undefined): string | null {
|
||||
return AIRCRAFT_WIKI[model] || resolveAcTypeWiki(model);
|
||||
}
|
||||
|
||||
// Module-level cache for Wikipedia thumbnails (persists across re-renders)
|
||||
const _wikiThumbCache: Record<string, { url: string | null; loading: boolean }> = {};
|
||||
|
||||
// Issue #220 (tg12): the previous implementation kept its own
|
||||
// module-local Wikipedia thumbnail cache and issued anonymous fetches
|
||||
// without `Api-User-Agent`. We now delegate to lib/wikimediaClient,
|
||||
// which sends the policy-compliant header and shares one cache with
|
||||
// WikiImage and useRegionDossier.
|
||||
function useAircraftImage(model: string | undefined): { imgUrl: string | null; wikiUrl: string | null; loading: boolean } {
|
||||
const [, forceUpdate] = useState(0);
|
||||
const [imgUrl, setImgUrl] = useState<string | null>(null);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const wikiTitle = resolveAircraftWikiTitle(model) || undefined;
|
||||
const wikiUrl = wikiTitle ? `https://en.wikipedia.org/wiki/${wikiTitle.replace(/ /g, '_')}` : null;
|
||||
|
||||
useEffect(() => {
|
||||
if (!wikiTitle) return;
|
||||
const key = wikiTitle;
|
||||
if (_wikiThumbCache[key]) return; // Already fetched or in-flight
|
||||
_wikiThumbCache[key] = { url: null, loading: true };
|
||||
fetch(`https://en.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(wikiTitle)}`)
|
||||
.then(r => r.json())
|
||||
.then(d => {
|
||||
_wikiThumbCache[key] = { url: d.thumbnail?.source || null, loading: false };
|
||||
forceUpdate(n => n + 1);
|
||||
})
|
||||
.catch(() => {
|
||||
_wikiThumbCache[key] = { url: null, loading: false };
|
||||
forceUpdate(n => n + 1);
|
||||
});
|
||||
let cancelled = false;
|
||||
if (!wikiTitle) {
|
||||
setImgUrl(null);
|
||||
setLoading(false);
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
fetchWikipediaSummary(wikiTitle).then((summary) => {
|
||||
if (cancelled) return;
|
||||
setImgUrl(summary?.thumbnail || null);
|
||||
setLoading(false);
|
||||
});
|
||||
return () => {
|
||||
cancelled = true;
|
||||
};
|
||||
}, [wikiTitle]);
|
||||
|
||||
if (!wikiTitle) return { imgUrl: null, wikiUrl: null, loading: false };
|
||||
const cached = _wikiThumbCache[wikiTitle];
|
||||
return { imgUrl: cached?.url || null, wikiUrl, loading: cached?.loading || false };
|
||||
return { imgUrl, wikiUrl, loading };
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
'use client';
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import ExternalImage from '@/components/ExternalImage';
|
||||
|
||||
// Module-level cache: Wikipedia article title → thumbnail URL
|
||||
const _cache: Record<string, { url: string | null; done: boolean }> = {};
|
||||
import { fetchWikipediaSummary } from '@/lib/wikimediaClient';
|
||||
|
||||
/**
|
||||
* WikiImage — displays a Wikipedia thumbnail for a given article URL.
|
||||
* Uses the Wikipedia REST API with a module-level cache (only fetches once per article).
|
||||
*
|
||||
* Issue #220 (tg12): this component previously had its own
|
||||
* module-local Wikipedia fetch + cache. It now delegates to
|
||||
* `lib/wikimediaClient`, which sends the policy-compliant
|
||||
* `Api-User-Agent` header and shares one cache across every UI
|
||||
* component that asks Wikipedia for an article summary (WikiImage,
|
||||
* NewsFeed, useRegionDossier).
|
||||
*
|
||||
* Props:
|
||||
* wikiUrl: Full Wikipedia URL, e.g. "https://en.wikipedia.org/wiki/Boeing_787_Dreamliner"
|
||||
@@ -26,32 +30,30 @@ export default function WikiImage({
|
||||
maxH?: string;
|
||||
accent?: string;
|
||||
}) {
|
||||
const [, forceUpdate] = useState(0);
|
||||
const [imgUrl, setImgUrl] = useState<string | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
||||
// Extract article title from URL
|
||||
const title = wikiUrl.replace(/^https?:\/\/[^/]+\/wiki\//, '');
|
||||
|
||||
useEffect(() => {
|
||||
if (!title || _cache[title]?.done) return;
|
||||
if (_cache[title]) return; // In-flight
|
||||
_cache[title] = { url: null, done: false };
|
||||
|
||||
fetch(`https://en.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(title)}`)
|
||||
.then((r) => r.json())
|
||||
.then((d) => {
|
||||
_cache[title] = { url: d.thumbnail?.source || d.originalimage?.source || null, done: true };
|
||||
forceUpdate((n) => n + 1);
|
||||
})
|
||||
.catch(() => {
|
||||
_cache[title] = { url: null, done: true };
|
||||
forceUpdate((n) => n + 1);
|
||||
});
|
||||
let cancelled = false;
|
||||
if (!title) {
|
||||
setImgUrl(null);
|
||||
setLoading(false);
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
fetchWikipediaSummary(title).then((summary) => {
|
||||
if (cancelled) return;
|
||||
setImgUrl(summary?.thumbnail || null);
|
||||
setLoading(false);
|
||||
});
|
||||
return () => {
|
||||
cancelled = true;
|
||||
};
|
||||
}, [title]);
|
||||
|
||||
const cached = _cache[title];
|
||||
const imgUrl = cached?.url;
|
||||
const loading = cached && !cached.done;
|
||||
|
||||
return (
|
||||
<div className="pb-2">
|
||||
{loading && (
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { useCallback, useState, useEffect } from 'react';
|
||||
import type { RegionDossier, SelectedEntity } from '@/types/dashboard';
|
||||
import { fetchWikipediaSummary, fetchWikidataSparql } from '@/lib/wikimediaClient';
|
||||
|
||||
// ─── CACHE ─────────────────────────────────────────────────────────────────
|
||||
// Simple in-memory cache keyed by rounded lat/lng (0.1° ≈ 11km grid), 24h TTL.
|
||||
@@ -114,7 +115,11 @@ async function fetchCountryData(countryCode: string) {
|
||||
return Array.isArray(data) ? data[0] || {} : data || {};
|
||||
}
|
||||
|
||||
/** Fetch head of state + government type from Wikidata SPARQL (direct browser call). */
|
||||
/** Fetch head of state + government type from Wikidata SPARQL.
|
||||
*
|
||||
* Issue #218 (tg12): routes through lib/wikimediaClient so the
|
||||
* Api-User-Agent header is set per Wikimedia's UA policy.
|
||||
*/
|
||||
async function fetchLeader(countryName: string) {
|
||||
if (!countryName) return { leader: 'Unknown', government_type: 'Unknown' };
|
||||
const safeName = countryName.replace(/"/g, '\\"').replace(/'/g, "\\'");
|
||||
@@ -127,13 +132,11 @@ async function fetchLeader(countryName: string) {
|
||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
||||
} LIMIT 1
|
||||
`;
|
||||
const url = `https://query.wikidata.org/sparql?query=${encodeURIComponent(sparql)}&format=json`;
|
||||
const res = await fetch(url, {
|
||||
headers: { Accept: 'application/sparql-results+json' },
|
||||
});
|
||||
if (!res.ok) throw new Error(`Wikidata HTTP ${res.status}`);
|
||||
const results = (await res.json()).results?.bindings || [];
|
||||
if (results.length > 0) {
|
||||
const results = await fetchWikidataSparql<{
|
||||
leaderLabel?: { value: string };
|
||||
govTypeLabel?: { value: string };
|
||||
}>(sparql);
|
||||
if (results && results.length > 0) {
|
||||
return {
|
||||
leader: results[0].leaderLabel?.value || 'Unknown',
|
||||
government_type: results[0].govTypeLabel?.value || 'Unknown',
|
||||
@@ -142,27 +145,25 @@ async function fetchLeader(countryName: string) {
|
||||
return { leader: 'Unknown', government_type: 'Unknown' };
|
||||
}
|
||||
|
||||
/** Fetch Wikipedia summary for a place (direct browser call). */
|
||||
/** Fetch Wikipedia summary for a place.
|
||||
*
|
||||
* Issue #219 (tg12): routes through lib/wikimediaClient so the
|
||||
* Api-User-Agent header is set per Wikimedia's UA policy, AND the
|
||||
* shared cache means consecutive useRegionDossier + WikiImage +
|
||||
* NewsFeed lookups for the same article all hit the same slot.
|
||||
*/
|
||||
async function fetchLocalWikiSummary(placeName: string, countryName = '') {
|
||||
if (!placeName) return {};
|
||||
const candidates = [placeName];
|
||||
if (countryName) candidates.push(`${placeName}, ${countryName}`);
|
||||
|
||||
for (const name of candidates) {
|
||||
try {
|
||||
const slug = encodeURIComponent(name.replace(/ /g, '_'));
|
||||
const url = `https://en.wikipedia.org/api/rest_v1/page/summary/${slug}`;
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) continue;
|
||||
const data = await res.json();
|
||||
if (data.type === 'disambiguation') continue;
|
||||
const summary = await fetchWikipediaSummary(name);
|
||||
if (summary) {
|
||||
return {
|
||||
description: data.description || '',
|
||||
extract: data.extract || '',
|
||||
thumbnail: data.thumbnail?.source || '',
|
||||
description: summary.description,
|
||||
extract: summary.extract,
|
||||
thumbnail: summary.thumbnail,
|
||||
};
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
|
||||
@@ -0,0 +1,157 @@
|
||||
/**
|
||||
* wikimediaClient — single fetch surface for Wikipedia / Wikidata.
|
||||
*
|
||||
* Issues #218, #219, #220 (tg12 external audit):
|
||||
*
|
||||
* Wikimedia's User-Agent policy asks API clients to identify themselves
|
||||
* via `Api-User-Agent` when calling from browser JavaScript (because the
|
||||
* browser does not let JS set `User-Agent` directly). Before this
|
||||
* module existed, three independent components issued anonymous browser
|
||||
* fetches against Wikipedia / Wikidata:
|
||||
*
|
||||
* - useRegionDossier (Wikidata SPARQL + Wikipedia REST summary)
|
||||
* - WikiImage (Wikipedia REST summary)
|
||||
* - NewsFeed (Wikipedia REST summary)
|
||||
*
|
||||
* Each component shipped its own copy-pasted fetch + module-local cache.
|
||||
* Provider-policy compliance was missing in all three places.
|
||||
*
|
||||
* This module centralizes:
|
||||
*
|
||||
* 1. The `Api-User-Agent` header on every request.
|
||||
* 2. A single LRU cache for Wikipedia summary lookups (keyed by article
|
||||
* title). Multiple components asking for the same article share
|
||||
* one in-flight request and one cache slot.
|
||||
* 3. One predictable kill switch — if Wikimedia ever asks us to back
|
||||
* off, we change `WIKIMEDIA_API_USER_AGENT` here and the whole
|
||||
* frontend updates.
|
||||
*
|
||||
* This does NOT change end-user UX:
|
||||
*
|
||||
* - WikiImage still shows the same thumbnails.
|
||||
* - NewsFeed still shows aircraft thumbnails.
|
||||
* - useRegionDossier still returns the same place summary + leader.
|
||||
*
|
||||
* What changes:
|
||||
*
|
||||
* - Wikimedia can identify our traffic from any other anonymous
|
||||
* browser visitor pool.
|
||||
* - Provider-policy fixes happen here once, not in three places.
|
||||
*/
|
||||
|
||||
// Stable identifier per Wikimedia UA policy. Includes a contact path so
|
||||
// Wikimedia's operators can reach the project if they need to rate-limit
|
||||
// or coordinate. Bump the version when the contact path changes.
|
||||
export const WIKIMEDIA_API_USER_AGENT =
|
||||
'Shadowbroker/1.0 (+https://github.com/BigBodyCobain/Shadowbroker; ' +
|
||||
'report issues at /issues)';
|
||||
|
||||
// Module-level cache shared by WikiImage, NewsFeed, and useRegionDossier.
|
||||
// Keyed by Wikipedia article title (NOT slug — we keep the human-readable
|
||||
// form so debugging the cache is easier). Values track in-flight state
|
||||
// so concurrent callers for the same title share one network request.
|
||||
export interface WikipediaSummary {
|
||||
title: string;
|
||||
description: string;
|
||||
extract: string;
|
||||
thumbnail: string;
|
||||
type: string; // 'standard' | 'disambiguation' | etc.
|
||||
}
|
||||
|
||||
interface CacheEntry {
|
||||
summary: WikipediaSummary | null;
|
||||
inflight: Promise<WikipediaSummary | null> | null;
|
||||
loaded: boolean;
|
||||
}
|
||||
|
||||
const _summaryCache: Map<string, CacheEntry> = new Map();
|
||||
const SUMMARY_CACHE_MAX = 512;
|
||||
|
||||
function evictIfOverCap() {
|
||||
if (_summaryCache.size <= SUMMARY_CACHE_MAX) return;
|
||||
const oldest = _summaryCache.keys().next().value;
|
||||
if (oldest) _summaryCache.delete(oldest);
|
||||
}
|
||||
|
||||
/** Fetch a Wikipedia article summary (titles, NOT URLs).
|
||||
*
|
||||
* Empty / invalid input resolves to `null`. Network errors and disambig
|
||||
* pages also resolve to `null` so callers can render a fallback without
|
||||
* a try/catch. Per the audit's "fail forward, not loud" rule.
|
||||
*/
|
||||
export async function fetchWikipediaSummary(
|
||||
title: string,
|
||||
): Promise<WikipediaSummary | null> {
|
||||
const trimmed = (title || '').trim();
|
||||
if (!trimmed) return null;
|
||||
|
||||
const cached = _summaryCache.get(trimmed);
|
||||
if (cached?.loaded) return cached.summary;
|
||||
if (cached?.inflight) return cached.inflight;
|
||||
|
||||
const slug = encodeURIComponent(trimmed.replace(/ /g, '_'));
|
||||
const url = `https://en.wikipedia.org/api/rest_v1/page/summary/${slug}`;
|
||||
|
||||
const promise = fetch(url, {
|
||||
headers: { 'Api-User-Agent': WIKIMEDIA_API_USER_AGENT },
|
||||
})
|
||||
.then(async (r) => {
|
||||
if (!r.ok) return null;
|
||||
const d = await r.json();
|
||||
if (d?.type === 'disambiguation') return null;
|
||||
const summary: WikipediaSummary = {
|
||||
title: trimmed,
|
||||
description: d?.description || '',
|
||||
extract: d?.extract || '',
|
||||
thumbnail: d?.thumbnail?.source || d?.originalimage?.source || '',
|
||||
type: d?.type || 'standard',
|
||||
};
|
||||
return summary;
|
||||
})
|
||||
.catch(() => null)
|
||||
.then((summary) => {
|
||||
_summaryCache.set(trimmed, { summary, inflight: null, loaded: true });
|
||||
evictIfOverCap();
|
||||
return summary;
|
||||
});
|
||||
|
||||
_summaryCache.set(trimmed, { summary: null, inflight: promise, loaded: false });
|
||||
evictIfOverCap();
|
||||
return promise;
|
||||
}
|
||||
|
||||
/** Fetch a Wikidata SPARQL query result.
|
||||
*
|
||||
* Returns the parsed JSON `results.bindings` array on success; `null`
|
||||
* (not throwing) on any failure so callers can render fallbacks
|
||||
* silently. Kept as a thin wrapper so the audit-required UA header is
|
||||
* applied in exactly one place.
|
||||
*/
|
||||
export async function fetchWikidataSparql<T = Record<string, { value: string }>>(
|
||||
sparql: string,
|
||||
): Promise<T[] | null> {
|
||||
const trimmed = (sparql || '').trim();
|
||||
if (!trimmed) return null;
|
||||
const url = `https://query.wikidata.org/sparql?query=${encodeURIComponent(
|
||||
trimmed,
|
||||
)}&format=json`;
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
headers: {
|
||||
'Api-User-Agent': WIKIMEDIA_API_USER_AGENT,
|
||||
Accept: 'application/sparql-results+json',
|
||||
},
|
||||
});
|
||||
if (!res.ok) return null;
|
||||
const json = await res.json();
|
||||
const bindings = json?.results?.bindings;
|
||||
return Array.isArray(bindings) ? (bindings as T[]) : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Internal: clear the shared cache. Exposed for tests only. */
|
||||
export function _resetWikimediaClientCacheForTests() {
|
||||
_summaryCache.clear();
|
||||
}
|
||||
Reference in New Issue
Block a user