mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-03 12:58:11 +02:00
Compare commits
30 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f14d4feb6d | |||
| febcce9125 | |||
| 31ebcb5cd9 | |||
| b3fca3dc18 | |||
| 401f114e4f | |||
| 79b39e8985 | |||
| c3e38621fc | |||
| 9ef02dd06f | |||
| ba39d3b9aa | |||
| f91ddcf38b | |||
| 49151d8b9f | |||
| 767a2f6c00 | |||
| 2da739c9e8 | |||
| eca7f24e2c | |||
| 7bfaad17f0 | |||
| e3efcfd476 | |||
| 32b8421a1c | |||
| bc70cc3527 | |||
| 44e9b38ac2 | |||
| b01a69c172 | |||
| b041b5e97c | |||
| c54ea7fd9f | |||
| a3aa7b4dec | |||
| 19fb7f0b1e | |||
| 35cd4e4c71 | |||
| 31f79fd8e2 | |||
| fd7d6fa401 | |||
| 49621824b1 | |||
| 76750caa92 | |||
| c3ef9f4b9e |
@@ -7,6 +7,28 @@ on:
|
||||
branches: [main]
|
||||
workflow_call:
|
||||
|
||||
# CI flake mitigation:
|
||||
# ci.yml is triggered TWICE per PR on the same commit — once directly via
|
||||
# the `pull_request` trigger above ("Frontend Tests & Build" check) and once
|
||||
# via `workflow_call` from docker-publish.yml ("CI Gate / Frontend Tests &
|
||||
# Build" check). Both jobs land on the same Actions runner pool at the same
|
||||
# time and fight for CPU/RAM. Under contention, React's reconciliation in
|
||||
# `messagesViewFirstContact.test.tsx > removes an approved contact …`
|
||||
# overruns its 5s waitFor timeout — that's the single failure mode we've
|
||||
# seen flake on PRs #226, #237, #261, #262, #265, #294, #303, and the
|
||||
# fd7d6fa push. Backend tests and every other frontend test pass under
|
||||
# the same conditions, which is what made this look random.
|
||||
#
|
||||
# Pinning a concurrency group on the SHA (PR head, or the pushed commit
|
||||
# for main) serializes the two invocations so neither starves the other.
|
||||
# We use cancel-in-progress: false so the second one queues instead of
|
||||
# cancelling — cancelling could leave the PR check stuck "Expected" if
|
||||
# only one of the two ever finishes. Total CI time grows by ~2 min in
|
||||
# exchange for deterministic outcomes.
|
||||
concurrency:
|
||||
group: ci-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
frontend:
|
||||
name: Frontend Tests & Build
|
||||
|
||||
+29
@@ -261,3 +261,32 @@ backend/data/wormhole_stdout.log
|
||||
|
||||
# Compressed snapshot archives (can be 100 MB+)
|
||||
*.json.gz
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# AI assistant / coding-agent scratch
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Per-tool config + scratch directories. These are private to whichever
|
||||
# coding agent the operator happens to be using and have no business in
|
||||
# the repo. If a tool's instructions need to be canonical for the project,
|
||||
# we'll put them in docs/ explicitly — not let the agent dump them at the
|
||||
# repo root.
|
||||
|
||||
# OpenAI Codex CLI
|
||||
.codex/
|
||||
.codex-app-schema/
|
||||
.codex-app-ts/
|
||||
|
||||
# Per-agent instruction files dropped at repo root by various tools.
|
||||
# These are operator-side preferences, not part of the project contract.
|
||||
AGENTS.md
|
||||
GEMINI.md
|
||||
CLAUDE.md
|
||||
.github/copilot-instructions.md
|
||||
|
||||
# Stale AI-generated test file that referenced fields that don't exist in
|
||||
# the current `_parse_carrier_positions_from_news` implementation. Kept
|
||||
# ignored so it doesn't accidentally get committed if it shows up again
|
||||
# from a tool that's working off an out-of-date understanding of the
|
||||
# module. If a real test for that function is needed, write it under a
|
||||
# meaningful name in tests/test_carrier_tracker_quality.py.
|
||||
backend/tests/test_carrier_tracker_region_centers.py
|
||||
|
||||
+21
-7
@@ -24,14 +24,28 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# Requires MESH_DEBUG_MODE=true; do not enable this for ordinary use.
|
||||
# ALLOW_INSECURE_ADMIN=false
|
||||
|
||||
# Default outbound User-Agent for all third-party HTTP fetchers.
|
||||
# Project-generic by default — does NOT include any personal contact info or
|
||||
# operator-specific identifier. Override only if you run a public relay and
|
||||
# want upstreams to be able to reach you (e.g. Nominatim/OSM usage policy).
|
||||
# SHADOWBROKER_USER_AGENT=ShadowBroker-OSINT/0.9 (contact: ops@example.com)
|
||||
# Per-install operator handle. Round 7a: every outbound third-party API
|
||||
# call (Wikipedia, Wikidata, Nominatim, GDELT, OpenMHz, Broadcastify,
|
||||
# weather.gov, NUFORC, etc.) includes this handle in the User-Agent so
|
||||
# upstreams can rate-limit / contact the specific install instead of
|
||||
# treating every Shadowbroker user as one entity.
|
||||
#
|
||||
# Default empty -> a stable pseudonymous handle (e.g. "operator-7f3a92") is
|
||||
# auto-generated on first run and persisted to backend/data/operator_handle.json.
|
||||
# Operators who want a meaningful handle (real name, org, GitHub login) can
|
||||
# set it here. Special characters are sanitized to dashes.
|
||||
# OPERATOR_HANDLE=
|
||||
|
||||
# User-Agent for Nominatim geocoding requests (per OSM usage policy).
|
||||
# NOMINATIM_USER_AGENT=ShadowBroker/1.0
|
||||
# Default outbound User-Agent for all third-party HTTP fetchers. Operators
|
||||
# who run a public relay and want a completely custom UA can set this; it
|
||||
# bypasses the per-operator helper entirely. Most installs should leave it
|
||||
# unset and use OPERATOR_HANDLE instead.
|
||||
# SHADOWBROKER_USER_AGENT=
|
||||
|
||||
# Nominatim-specific User-Agent override (OSM usage policy). Leave unset to
|
||||
# use the per-install handle (default) — set only if you have a registered
|
||||
# Nominatim relay identity.
|
||||
# NOMINATIM_USER_AGENT=
|
||||
|
||||
# ── Third-party fetcher opt-ins ────────────────────────────────
|
||||
# These data sources phone home to politically/commercially sensitive
|
||||
|
||||
+105
-1
@@ -1,4 +1,108 @@
|
||||
"""Rate-limit key function for slowapi.
|
||||
|
||||
Issue #287 (tg12): the previous implementation used
|
||||
``slowapi.util.get_remote_address`` which only ever returns
|
||||
``request.client.host``. Behind the bundled Next.js proxy (or any other
|
||||
reverse proxy), every connected operator's ``client.host`` is the
|
||||
frontend container's bridge IP. ``@limiter.limit("120/minute")`` then
|
||||
collapses into one shared bucket for everybody on the same backend —
|
||||
one heavy tab can starve every other operator on the node.
|
||||
|
||||
This module replaces that key function with one that:
|
||||
|
||||
* Reads ``X-Forwarded-For`` ONLY when the immediate peer is a trusted
|
||||
frontend container (same allowlist used by the Docker bridge
|
||||
local-operator trust path — see ``backend/auth.py`` ``#250``).
|
||||
* Picks the FIRST entry in the XFF chain. That's the client end of
|
||||
the proxy chain, which is the operator we want to bucket on.
|
||||
* Falls back to ``request.client.host`` for any peer that isn't on
|
||||
the trusted-frontend allowlist. Direct hits, unrelated containers,
|
||||
and unknown hosts are bucketed exactly like before — there is no
|
||||
way for an untrusted caller to spoof XFF and steal another
|
||||
operator's rate-limit bucket.
|
||||
|
||||
Single-operator nodes are unaffected: the frontend resolves to one IP,
|
||||
that IP is on the trust list, the XFF header is read, and you get one
|
||||
bucket per operator (i.e. you).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from slowapi import Limiter
|
||||
from slowapi.util import get_remote_address
|
||||
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
|
||||
def _client_host(request: Any) -> str:
|
||||
"""Return the immediate peer's IP, normalised to a lowercase string."""
|
||||
client = getattr(request, "client", None)
|
||||
if client is None:
|
||||
return ""
|
||||
host = getattr(client, "host", "") or ""
|
||||
return host.lower()
|
||||
|
||||
|
||||
def _first_forwarded_for(value: str) -> str:
|
||||
"""Return the first non-empty entry from an ``X-Forwarded-For`` header.
|
||||
|
||||
RFC 7239 / de-facto XFF format is ``client, proxy1, proxy2, …``. The
|
||||
client end is what we want to bucket on. Empty parts (which appear
|
||||
in some malformed headers) are skipped so we don't end up keying on
|
||||
an empty string.
|
||||
"""
|
||||
for raw in value.split(","):
|
||||
candidate = raw.strip()
|
||||
if candidate:
|
||||
return candidate.lower()
|
||||
return ""
|
||||
|
||||
|
||||
def _is_trusted_frontend_peer(host: str) -> bool:
|
||||
"""True iff ``host`` is one of the resolved trusted-frontend IPs.
|
||||
|
||||
Imported lazily so this module stays usable in unit tests that
|
||||
don't want to pull the whole auth module into scope.
|
||||
"""
|
||||
if not host:
|
||||
return False
|
||||
try:
|
||||
from auth import _resolve_trusted_bridge_ips
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return False
|
||||
try:
|
||||
trusted_ips = _resolve_trusted_bridge_ips()
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return False
|
||||
return host in trusted_ips
|
||||
|
||||
|
||||
def shadowbroker_rate_limit_key(request: Any) -> str:
|
||||
"""slowapi key_func that is proxy-aware on trusted frontend peers only.
|
||||
|
||||
Behaviour matrix:
|
||||
|
||||
* Direct loopback / unknown peer → ``request.client.host``
|
||||
(identical to slowapi's default ``get_remote_address``).
|
||||
* Peer is a trusted frontend container AND ``X-Forwarded-For`` is
|
||||
present → first XFF entry (the actual operator).
|
||||
* Peer is a trusted frontend container but no XFF → fall back to
|
||||
``request.client.host`` (the bridge IP). One shared bucket for
|
||||
everyone in that case, same as before — but you only get there
|
||||
if the trusted frontend forgot to forward XFF, which it won't.
|
||||
"""
|
||||
peer = _client_host(request)
|
||||
if _is_trusted_frontend_peer(peer):
|
||||
headers = getattr(request, "headers", None)
|
||||
if headers is not None:
|
||||
xff = headers.get("x-forwarded-for") or headers.get("X-Forwarded-For")
|
||||
if xff:
|
||||
first = _first_forwarded_for(xff)
|
||||
if first:
|
||||
return first
|
||||
# Untrusted peer (or trusted peer without XFF): match the original
|
||||
# get_remote_address behaviour byte-for-byte.
|
||||
return get_remote_address(request)
|
||||
|
||||
|
||||
limiter = Limiter(key_func=shadowbroker_rate_limit_key)
|
||||
|
||||
+64
-11
@@ -1417,6 +1417,29 @@ def _peer_sync_response(peer_url: str, body: dict[str, Any]) -> dict[str, Any]:
|
||||
proxy = f"socks5h://127.0.0.1:{socks_port}"
|
||||
kwargs["proxies"] = {"http": proxy, "https": proxy}
|
||||
response = _requests.post(f"{normalized}/api/mesh/infonet/sync", **kwargs)
|
||||
# HTTP 429 must be surfaced as a typed exception carrying the
|
||||
# Retry-After value, so finish_sync can honor it and stop hammering
|
||||
# the upstream. Pre-fix this path just stringified the status into
|
||||
# a ValueError, which finish_sync then ignored — keeping the
|
||||
# upstream's rate-limit bucket full indefinitely.
|
||||
if response.status_code == 429:
|
||||
from services.mesh.mesh_infonet_sync_support import (
|
||||
PeerSyncRateLimited,
|
||||
parse_retry_after_header,
|
||||
)
|
||||
|
||||
retry_after_s = parse_retry_after_header(
|
||||
response.headers.get("Retry-After", "") or "",
|
||||
)
|
||||
try:
|
||||
body_text = response.text[:200]
|
||||
except Exception:
|
||||
body_text = ""
|
||||
raise PeerSyncRateLimited(
|
||||
f"HTTP 429 from {normalized} (retry_after={retry_after_s}s): {body_text}",
|
||||
retry_after_s=retry_after_s,
|
||||
status=429,
|
||||
)
|
||||
try:
|
||||
payload = response.json()
|
||||
except Exception as exc:
|
||||
@@ -1462,8 +1485,23 @@ def _hydrate_gate_store_from_chain(events: list[dict]) -> int:
|
||||
return count
|
||||
|
||||
|
||||
def _sync_from_peer(peer_url: str, *, page_limit: int = 100, max_rounds: int = 5) -> tuple[bool, str, bool]:
|
||||
def _sync_from_peer(
|
||||
peer_url: str,
|
||||
*,
|
||||
page_limit: int = 100,
|
||||
max_rounds: int = 5,
|
||||
) -> tuple[bool, str, bool, int]:
|
||||
"""Sync the local Infonet chain against ``peer_url``.
|
||||
|
||||
Returns ``(ok, error, forked, retry_after_s)``. The fourth tuple
|
||||
element is non-zero only when the peer responded with HTTP 429
|
||||
and supplied a parseable ``Retry-After`` header — see the typed
|
||||
``PeerSyncRateLimited`` exception in mesh_infonet_sync_support.py.
|
||||
Callers should pass that value to ``finish_sync(retry_after_s=...)``
|
||||
so the next attempt actually waits.
|
||||
"""
|
||||
from services.mesh.mesh_hashchain import infonet
|
||||
from services.mesh.mesh_infonet_sync_support import PeerSyncRateLimited
|
||||
|
||||
rounds = 0
|
||||
while rounds < max_rounds:
|
||||
@@ -1472,7 +1510,11 @@ def _sync_from_peer(peer_url: str, *, page_limit: int = 100, max_rounds: int = 5
|
||||
"locator": infonet.get_locator(),
|
||||
"limit": page_limit,
|
||||
}
|
||||
payload = _peer_sync_response(peer_url, body)
|
||||
try:
|
||||
payload = _peer_sync_response(peer_url, body)
|
||||
except PeerSyncRateLimited as exc:
|
||||
# Bubble up the retry-after so finish_sync can honor it.
|
||||
return False, str(exc), False, exc.retry_after_s
|
||||
if bool(payload.get("forked")):
|
||||
# Auto-recover small local forks: if the local chain is tiny
|
||||
# (< 20 events) and the remote has a longer chain, reset local
|
||||
@@ -1488,23 +1530,23 @@ def _sync_from_peer(peer_url: str, *, page_limit: int = 100, max_rounds: int = 5
|
||||
)
|
||||
infonet.reset_chain()
|
||||
continue # retry sync with clean genesis locator
|
||||
return False, "fork detected", True
|
||||
return False, "fork detected", True, 0
|
||||
events = payload.get("events", [])
|
||||
if not isinstance(events, list):
|
||||
return False, "peer sync events must be a list", False
|
||||
return False, "peer sync events must be a list", False, 0
|
||||
if not events:
|
||||
return True, "", False
|
||||
return True, "", False, 0
|
||||
result = infonet.ingest_events(events)
|
||||
_hydrate_gate_store_from_chain(events)
|
||||
rejected = list(result.get("rejected", []) or [])
|
||||
if rejected:
|
||||
return False, f"sync ingest rejected {len(rejected)} event(s)", False
|
||||
return False, f"sync ingest rejected {len(rejected)} event(s)", False, 0
|
||||
if int(result.get("accepted", 0) or 0) == 0 and int(result.get("duplicates", 0) or 0) >= len(events):
|
||||
return True, "", False
|
||||
return True, "", False, 0
|
||||
if len(events) < page_limit:
|
||||
return True, "", False
|
||||
return True, "", False, 0
|
||||
rounds += 1
|
||||
return True, "", False
|
||||
return True, "", False, 0
|
||||
|
||||
|
||||
def _run_public_sync_cycle() -> SyncWorkerState:
|
||||
@@ -1567,11 +1609,12 @@ def _run_public_sync_cycle() -> SyncWorkerState:
|
||||
with _NODE_RUNTIME_LOCK:
|
||||
set_sync_state(started)
|
||||
try:
|
||||
ok, error, forked = _sync_from_peer(record.peer_url)
|
||||
ok, error, forked, retry_after_s = _sync_from_peer(record.peer_url)
|
||||
except Exception as exc:
|
||||
ok = False
|
||||
error = str(exc or type(exc).__name__)
|
||||
forked = False
|
||||
retry_after_s = 0
|
||||
if ok:
|
||||
store.mark_seen(record.peer_url, "sync", now=time.time())
|
||||
store.mark_sync_success(record.peer_url, now=time.time())
|
||||
@@ -1618,6 +1661,12 @@ def _run_public_sync_cycle() -> SyncWorkerState:
|
||||
now=time.time(),
|
||||
interval_s=int(get_settings().MESH_SYNC_INTERVAL_S or 300),
|
||||
failure_backoff_s=failure_backoff_s,
|
||||
# 429 retry-storm fix: when the peer returned HTTP 429 with
|
||||
# a Retry-After header, finish_sync uses max(exponential,
|
||||
# retry_after) for next_sync_due_at — so we actually wait
|
||||
# the time the upstream asked for instead of hammering
|
||||
# every 60s and keeping its rate-limit bucket full forever.
|
||||
retry_after_s=retry_after_s,
|
||||
)
|
||||
with _NODE_RUNTIME_LOCK:
|
||||
set_sync_state(updated)
|
||||
@@ -8148,8 +8197,12 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
|
||||
|
||||
def _cctv_upstream_headers(request: Request, profile: _CCTVProxyProfile) -> dict[str, str]:
|
||||
# Round 7a: per-install operator handle. See routers/cctv.py for the
|
||||
# canonical handler; this duplicate stays in lockstep until the #239
|
||||
# dedup ladder removes it.
|
||||
from services.network_utils import outbound_user_agent
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; ShadowBroker CCTV proxy)",
|
||||
"User-Agent": f"Mozilla/5.0 (compatible; {outbound_user_agent('cctv-proxy')})",
|
||||
**profile.headers,
|
||||
}
|
||||
range_header = request.headers.get("range")
|
||||
|
||||
@@ -13,7 +13,6 @@ dependencies = [
|
||||
"apscheduler==3.10.3",
|
||||
"beautifulsoup4>=4.9.0",
|
||||
"cachetools==5.5.2",
|
||||
"cloudscraper==1.2.71",
|
||||
"cryptography>=41.0.0",
|
||||
"defusedxml>=0.7.1",
|
||||
"fastapi==0.115.12",
|
||||
|
||||
@@ -82,6 +82,28 @@ async def api_get_keys_meta(request: Request):
|
||||
return get_env_path_info()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/api/settings/operator-handle",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
)
|
||||
@limiter.limit("60/minute")
|
||||
async def api_get_operator_handle(request: Request):
|
||||
"""Round 7a: return the per-install operator handle so the frontend
|
||||
can include it in browser-direct third-party API calls (Wikipedia /
|
||||
Wikidata via lib/wikimediaClient). The handle is auto-generated on
|
||||
first use; operators can override it via the OPERATOR_HANDLE setting
|
||||
or the env var of the same name.
|
||||
|
||||
Gated on local-operator: legitimate browser usage goes through the
|
||||
Next.js proxy which auto-attaches the admin key; remote scanners get
|
||||
403. The handle itself isn't a secret (it's sent to every third-party
|
||||
API the operator touches), but admin-gating it matches the rest of
|
||||
the settings endpoints and follows least-privilege.
|
||||
"""
|
||||
from services.network_utils import get_operator_handle
|
||||
return {"handle": get_operator_handle()}
|
||||
|
||||
|
||||
@router.get(
|
||||
"/api/settings/news-feeds",
|
||||
dependencies=[Depends(require_local_operator)],
|
||||
|
||||
+200
-40
@@ -18,6 +18,12 @@ from auth import require_local_operator, require_openclaw_or_local
|
||||
from limiter import limiter
|
||||
from services.fetchers._store import latest_data as _latest_data
|
||||
|
||||
|
||||
|
||||
def _ai_intel_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("ai-intel")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
@@ -447,7 +453,7 @@ async def ai_satellite_images(
|
||||
"https://planetarycomputer.microsoft.com/api/stac/v1/search",
|
||||
json=search_payload,
|
||||
timeout=10,
|
||||
headers={"User-Agent": "ShadowBroker-OSINT/1.0 (ai-intel)"},
|
||||
headers={"User-Agent": _ai_intel_user_agent()},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
features = resp.json().get("features", [])
|
||||
@@ -2515,45 +2521,85 @@ async def api_capabilities(request: Request):
|
||||
# OpenClaw Connection Management (local-operator only — NOT via HMAC)
|
||||
# These endpoints manage the HMAC secret itself, so they MUST require
|
||||
# local operator access to prevent privilege escalation.
|
||||
#
|
||||
# Issue #302 (tg12): pre-fix, GET /api/ai/connect-info had two problems:
|
||||
#
|
||||
# 1. ``?reveal=true`` made the full secret travel through every operator
|
||||
# page-load that opened the Connect modal. Even gated to
|
||||
# ``require_local_operator``, that put the secret into browser
|
||||
# history, dev-tools network panels, browser disk caches, HAR
|
||||
# exports, and screen captures. Every time the modal opened.
|
||||
#
|
||||
# 2. The same GET endpoint auto-bootstrapped (generated + persisted)
|
||||
# the secret on first read. Side effects on a GET are a footgun:
|
||||
# browser prefetchers, mirror tools, and casual curl-from-history
|
||||
# would all silently mint+persist a fresh secret. (Gated, but
|
||||
# still surprising — and noisy in the audit log.)
|
||||
#
|
||||
# Resolution:
|
||||
#
|
||||
# GET /api/ai/connect-info — always returns the MASKED
|
||||
# secret. No ?reveal param.
|
||||
# No auto-bootstrap; if the
|
||||
# secret is missing,
|
||||
# ``hmac_secret_set: false``
|
||||
# tells the frontend to call
|
||||
# /bootstrap.
|
||||
#
|
||||
# POST /api/ai/connect-info/bootstrap — NEW. Generates + persists the
|
||||
# secret if missing. Idempotent.
|
||||
# Returns metadata only, never
|
||||
# the full secret.
|
||||
#
|
||||
# POST /api/ai/connect-info/reveal — NEW. Returns the full secret in
|
||||
# the body with strict
|
||||
# ``Cache-Control: no-store,
|
||||
# no-cache, must-revalidate``
|
||||
# + ``Pragma: no-cache`` so
|
||||
# it does not land in browser
|
||||
# caches. POST means it does
|
||||
# not land in URL history.
|
||||
#
|
||||
# POST /api/ai/connect-info/regenerate — keeps existing one-time-reveal
|
||||
# behavior (regenerate IS a
|
||||
# deliberate destructive action
|
||||
# the operator triggered, so
|
||||
# displaying the new secret
|
||||
# once is the only path that
|
||||
# makes the operation useful).
|
||||
# Same no-store headers added.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/api/ai/connect-info", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def get_connect_info(request: Request, reveal: bool = False):
|
||||
"""Return connection details for the OpenClaw Connect modal.
|
||||
# Cache-Control headers that should accompany every response carrying the
|
||||
# full HMAC secret. Reused across the reveal + regenerate endpoints so a
|
||||
# future refactor that splits or renames them can't forget the headers.
|
||||
_NO_STORE_HEADERS = {
|
||||
"Cache-Control": "no-store, no-cache, must-revalidate, private",
|
||||
"Pragma": "no-cache",
|
||||
"Expires": "0",
|
||||
}
|
||||
|
||||
The HMAC secret is masked by default. Pass ?reveal=true to see the full key.
|
||||
Private keys are NEVER returned.
|
||||
|
||||
def _mask_hmac_secret(secret: str) -> str:
|
||||
"""Return a fingerprint-style mask (first6 + bullets + last4) suitable
|
||||
for display in the UI before the operator clicks Reveal."""
|
||||
if not secret:
|
||||
return ""
|
||||
if len(secret) > 10:
|
||||
return secret[:6] + "••••••••" + secret[-4:]
|
||||
return "••••••••"
|
||||
|
||||
|
||||
def _connect_info_metadata(settings) -> dict:
|
||||
"""Return everything the Connect modal needs EXCEPT the secret itself.
|
||||
|
||||
Shared between GET /api/ai/connect-info (where the full secret is
|
||||
masked) and POST /api/ai/connect-info/bootstrap (where the operator
|
||||
just generated a secret but we don't return it inline — they have to
|
||||
call /reveal to see it).
|
||||
"""
|
||||
import os
|
||||
import secrets
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
access_tier = str(settings.OPENCLAW_ACCESS_TIER or "restricted").strip().lower()
|
||||
|
||||
# Auto-generate if not set
|
||||
if not hmac_secret:
|
||||
hmac_secret = secrets.token_hex(24) # 48 chars
|
||||
_write_env_value("OPENCLAW_HMAC_SECRET", hmac_secret)
|
||||
# Clear settings cache so next read picks up the new value
|
||||
get_settings.cache_clear()
|
||||
|
||||
masked = hmac_secret[:6] + "••••••••" + hmac_secret[-4:] if len(hmac_secret) > 10 else "••••••••"
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"hmac_secret": hmac_secret if reveal else masked,
|
||||
"hmac_secret_set": bool(hmac_secret),
|
||||
"bootstrap_behavior": {
|
||||
"auto_generates_when_missing": True,
|
||||
"auto_generated_this_call": not bool(settings.OPENCLAW_HMAC_SECRET or ""),
|
||||
"notes": [
|
||||
"If no HMAC secret exists yet, this endpoint bootstraps one and persists it to .env.",
|
||||
"Regenerating the HMAC secret revokes all existing direct-mode OpenClaw callers at once.",
|
||||
],
|
||||
},
|
||||
"access_tier": access_tier,
|
||||
"trust_model": {
|
||||
"remote_http_principal": "holder_of_openclaw_hmac_secret",
|
||||
@@ -2607,24 +2653,138 @@ async def get_connect_info(request: Request, reveal: bool = False):
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/regenerate", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("5/minute")
|
||||
async def regenerate_hmac_secret(request: Request):
|
||||
"""Generate a new HMAC secret. Old secret immediately stops working."""
|
||||
@router.get("/api/ai/connect-info", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def get_connect_info(request: Request):
|
||||
"""Return connection details for the OpenClaw Connect modal.
|
||||
|
||||
The HMAC secret is always returned as a fingerprint mask
|
||||
(``first6 + bullets + last4``); the full value is only ever served by
|
||||
``POST /api/ai/connect-info/reveal`` (see #302). When the secret has
|
||||
not been bootstrapped yet, ``hmac_secret_set`` is false and the
|
||||
frontend should call ``POST /api/ai/connect-info/bootstrap``.
|
||||
|
||||
Private keys are NEVER returned.
|
||||
"""
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"masked_hmac_secret": _mask_hmac_secret(hmac_secret),
|
||||
"hmac_secret_set": bool(hmac_secret),
|
||||
"bootstrap_behavior": {
|
||||
"auto_generates_when_missing": False,
|
||||
"notes": [
|
||||
"Call POST /api/ai/connect-info/bootstrap to mint a secret on first use.",
|
||||
"Call POST /api/ai/connect-info/reveal to see the full secret (no-store).",
|
||||
"Regenerating the HMAC secret revokes all existing direct-mode OpenClaw callers at once.",
|
||||
],
|
||||
},
|
||||
**_connect_info_metadata(settings),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/bootstrap", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def bootstrap_hmac_secret(request: Request):
|
||||
"""Mint and persist the OpenClaw HMAC secret if it isn't already set.
|
||||
|
||||
Idempotent: if a secret already exists, returns ``generated: false``
|
||||
and leaves the existing secret untouched. Never returns the secret
|
||||
value in the response body — the operator calls
|
||||
``POST /api/ai/connect-info/reveal`` to see it.
|
||||
"""
|
||||
import secrets
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
existing = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
if existing:
|
||||
return {
|
||||
"ok": True,
|
||||
"generated": False,
|
||||
"hmac_secret_set": True,
|
||||
"masked_hmac_secret": _mask_hmac_secret(existing),
|
||||
"detail": "HMAC secret already configured. Use /reveal to see it.",
|
||||
}
|
||||
|
||||
new_secret = secrets.token_hex(24) # 48 chars
|
||||
_write_env_value("OPENCLAW_HMAC_SECRET", new_secret)
|
||||
get_settings.cache_clear()
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"hmac_secret": new_secret,
|
||||
"detail": "HMAC secret regenerated. Update your OpenClaw agent configuration.",
|
||||
"generated": True,
|
||||
"hmac_secret_set": True,
|
||||
"masked_hmac_secret": _mask_hmac_secret(new_secret),
|
||||
"detail": "HMAC secret generated. Call /reveal to copy it into your OpenClaw config.",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/reveal", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def reveal_hmac_secret(request: Request):
|
||||
"""Return the full HMAC secret in the response body.
|
||||
|
||||
POST (not GET) so the secret never lands in URL history, access logs,
|
||||
or browser visit history. Strict ``Cache-Control: no-store`` headers
|
||||
prevent intermediaries from persisting the response. Returns 404 if
|
||||
no secret has been bootstrapped — the frontend should call
|
||||
``POST /api/ai/connect-info/bootstrap`` first.
|
||||
"""
|
||||
from services.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
hmac_secret = str(settings.OPENCLAW_HMAC_SECRET or "").strip()
|
||||
if not hmac_secret:
|
||||
raise HTTPException(
|
||||
404,
|
||||
"No HMAC secret configured. Call POST /api/ai/connect-info/bootstrap first.",
|
||||
)
|
||||
return JSONResponse(
|
||||
content={
|
||||
"ok": True,
|
||||
"hmac_secret": hmac_secret,
|
||||
"masked_hmac_secret": _mask_hmac_secret(hmac_secret),
|
||||
},
|
||||
headers=_NO_STORE_HEADERS,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/ai/connect-info/regenerate", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("5/minute")
|
||||
async def regenerate_hmac_secret(request: Request):
|
||||
"""Generate a new HMAC secret. Old secret immediately stops working.
|
||||
|
||||
Returns the new secret in the response body — this is the only
|
||||
operation where the full secret travels back through the response,
|
||||
because regenerating IS a deliberate destructive action the operator
|
||||
triggered and they need to see the new value once to update their
|
||||
OpenClaw configuration. Strict ``Cache-Control: no-store`` headers
|
||||
keep it from being persisted by browser caches, proxies, or HAR
|
||||
capture tooling.
|
||||
"""
|
||||
import secrets
|
||||
from services.config import get_settings
|
||||
|
||||
new_secret = secrets.token_hex(24) # 48 chars
|
||||
_write_env_value("OPENCLAW_HMAC_SECRET", new_secret)
|
||||
get_settings.cache_clear()
|
||||
|
||||
return JSONResponse(
|
||||
content={
|
||||
"ok": True,
|
||||
"hmac_secret": new_secret,
|
||||
"masked_hmac_secret": _mask_hmac_secret(new_secret),
|
||||
"detail": "HMAC secret regenerated. Update your OpenClaw agent configuration.",
|
||||
},
|
||||
headers=_NO_STORE_HEADERS,
|
||||
)
|
||||
|
||||
|
||||
@router.put("/api/ai/connect-info/access-tier", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def set_access_tier(request: Request, body: dict):
|
||||
|
||||
@@ -165,7 +165,13 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
|
||||
|
||||
def _cctv_upstream_headers(request: Request, profile: _CCTVProxyProfile) -> dict:
|
||||
headers = {"User-Agent": "Mozilla/5.0 (compatible; ShadowBroker CCTV proxy)", **profile.headers}
|
||||
# Round 7a: per-install operator handle. Mozilla/5.0 prefix retained
|
||||
# because many CCTV endpoints sniff for a browser-like prefix.
|
||||
from services.network_utils import outbound_user_agent
|
||||
headers = {
|
||||
"User-Agent": f"Mozilla/5.0 (compatible; {outbound_user_agent('cctv-proxy')})",
|
||||
**profile.headers,
|
||||
}
|
||||
range_header = request.headers.get("range")
|
||||
if range_header:
|
||||
headers["Range"] = range_header
|
||||
|
||||
+105
-10
@@ -98,6 +98,88 @@ def _current_etag(prefix: str = "") -> str:
|
||||
return f"{prefix}v{get_data_version()}-l{get_active_layers_version()}"
|
||||
|
||||
|
||||
# ── Issue #288: viewport-aware payloads ─────────────────────────────────────
|
||||
# Heavy, density-driven, time-sensitive layers that benefit from bbox
|
||||
# filtering. Light reference layers (datacenters, military_bases,
|
||||
# power_plants, satellites, weather, news, etc.) are intentionally NOT
|
||||
# in these sets — they ship world-scale even when bounds are supplied so
|
||||
# panning never reveals an "empty world" of static infrastructure.
|
||||
#
|
||||
# When the caller does NOT pass s/w/n/e, none of this runs and the response
|
||||
# is byte-for-byte identical to the pre-#288 behavior.
|
||||
_FAST_BBOX_HEAVY_KEYS: tuple[str, ...] = (
|
||||
"commercial_flights",
|
||||
"military_flights",
|
||||
"private_flights",
|
||||
"private_jets",
|
||||
"tracked_flights",
|
||||
"ships",
|
||||
"cctv",
|
||||
"uavs",
|
||||
"liveuamap",
|
||||
"gps_jamming",
|
||||
"sigint",
|
||||
"trains",
|
||||
)
|
||||
_SLOW_BBOX_HEAVY_KEYS: tuple[str, ...] = (
|
||||
"gdelt",
|
||||
"firms_fires",
|
||||
"kiwisdr",
|
||||
"scanners",
|
||||
"psk_reporter",
|
||||
)
|
||||
|
||||
|
||||
def _has_full_bbox(s, w, n, e) -> bool:
|
||||
return None not in (s, w, n, e)
|
||||
|
||||
|
||||
def _bbox_etag_suffix(s, w, n, e) -> str:
|
||||
"""Quantize bbox to 1° before mixing into the ETag.
|
||||
|
||||
The 20% padding inside _bbox_filter already absorbs sub-degree pans;
|
||||
quantizing here means small mouse drags don't blow the ETag cache
|
||||
on the client. Full-world bounds collapse to a single suffix.
|
||||
"""
|
||||
if not _has_full_bbox(s, w, n, e):
|
||||
return ""
|
||||
try:
|
||||
ss = math.floor(float(s))
|
||||
ww = math.floor(float(w))
|
||||
nn = math.ceil(float(n))
|
||||
ee = math.ceil(float(e))
|
||||
except (TypeError, ValueError):
|
||||
return ""
|
||||
# If the requested window covers basically the whole world, treat it as
|
||||
# "no bbox" for caching purposes so world-zoomed clients all hit the
|
||||
# same ETag and benefit from the existing 304 path.
|
||||
lat_span, lng_span = _bbox_spans(s, w, n, e)
|
||||
if lng_span >= 300 or lat_span >= 120:
|
||||
return ""
|
||||
return f"|bbox={ss},{ww},{nn},{ee}"
|
||||
|
||||
|
||||
def _apply_bbox_to_payload(payload: dict, heavy_keys: tuple[str, ...],
|
||||
s: float, w: float, n: float, e: float) -> dict:
|
||||
"""In-place filter the heavy-key collections in *payload* to a viewport.
|
||||
|
||||
Items without lat/lng are passed through (so e.g. summary blobs aren't
|
||||
accidentally dropped). The existing _bbox_filter helper applies a 20%
|
||||
pad and handles antimeridian crossings.
|
||||
"""
|
||||
lat_span, lng_span = _bbox_spans(s, w, n, e)
|
||||
# World-scale request → skip filtering entirely. Spares the CPU and
|
||||
# guarantees the response matches the no-params shape.
|
||||
if lng_span >= 300 or lat_span >= 120:
|
||||
return payload
|
||||
for key in heavy_keys:
|
||||
items = payload.get(key)
|
||||
if not isinstance(items, list) or not items:
|
||||
continue
|
||||
payload[key] = _bbox_filter(items, s, w, n, e)
|
||||
return payload
|
||||
|
||||
|
||||
def _json_safe(value):
|
||||
if isinstance(value, float):
|
||||
return value if math.isfinite(value) else None
|
||||
@@ -479,13 +561,14 @@ async def bootstrap_critical(request: Request):
|
||||
@limiter.limit("120/minute")
|
||||
async def live_data_fast(
|
||||
request: Request,
|
||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
||||
s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (vessels, aircraft, sigint, CCTV, …) are filtered to this viewport with 20% padding. Static reference layers (satellites, etc.) always ship world-scale.", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (see s)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (see s)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (see s)", ge=-180, le=180),
|
||||
initial: bool = Query(False, description="Return a capped startup payload for first paint"),
|
||||
):
|
||||
etag = _current_etag(prefix="fast|initial|" if initial else "fast|full|")
|
||||
bbox_suffix = _bbox_etag_suffix(s, w, n, e)
|
||||
etag = _current_etag(prefix=("fast|initial|" if initial else "fast|full|") + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else ""))
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
||||
@@ -525,6 +608,11 @@ async def live_data_fast(
|
||||
payload = _cap_fast_startup_payload(payload)
|
||||
else:
|
||||
payload = _cap_fast_dashboard_payload(payload)
|
||||
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||
# are supplied. Without bounds, behaviour is byte-for-byte identical
|
||||
# to the pre-#288 implementation.
|
||||
if _has_full_bbox(s, w, n, e):
|
||||
payload = _apply_bbox_to_payload(payload, _FAST_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||
return Response(content=orjson.dumps(_sanitize_payload(payload)), media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
|
||||
@@ -533,12 +621,13 @@ async def live_data_fast(
|
||||
@limiter.limit("60/minute")
|
||||
async def live_data_slow(
|
||||
request: Request,
|
||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
||||
s: float = Query(None, description="South bound — when all four bounds are supplied, heavy/dense layers (gdelt, firms_fires, kiwisdr, scanners, psk_reporter) are filtered to this viewport with 20% padding. Static reference layers (datacenters, military bases, power plants, weather, news, …) always ship world-scale.", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (see s)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (see s)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (see s)", ge=-180, le=180),
|
||||
):
|
||||
etag = _current_etag(prefix="slow|full|")
|
||||
bbox_suffix = _bbox_etag_suffix(s, w, n, e)
|
||||
etag = _current_etag(prefix="slow|full|" + bbox_suffix.lstrip("|") + ("|" if bbox_suffix else ""))
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import (active_layers, get_latest_data_subset_refs, get_source_timestamps_snapshot)
|
||||
@@ -592,6 +681,12 @@ async def live_data_slow(
|
||||
"crowdthreat": (d.get("crowdthreat") or []) if active_layers.get("crowdthreat", True) else [],
|
||||
"freshness": freshness,
|
||||
}
|
||||
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||
# are supplied. Static reference layers (datacenters, military bases,
|
||||
# power_plants, etc.) deliberately stay world-scale so panning never
|
||||
# hides the infrastructure overlay the operator already has on screen.
|
||||
if _has_full_bbox(s, w, n, e):
|
||||
payload = _apply_bbox_to_payload(payload, _SLOW_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||
return Response(
|
||||
content=orjson.dumps(_sanitize_payload(payload), default=str, option=orjson.OPT_NON_STR_KEYS),
|
||||
media_type="application/json",
|
||||
|
||||
@@ -85,6 +85,64 @@ async def infonet_peer_push(request: Request):
|
||||
return {"ok": True, **result}
|
||||
|
||||
|
||||
@router.post("/api/mesh/dm/replicate-envelope")
|
||||
@limiter.limit("60/minute")
|
||||
async def dm_replicate_envelope(request: Request):
|
||||
"""Accept a DM envelope replicated from a peer relay (cross-node mailbox).
|
||||
|
||||
Companion endpoint to ``DMRelay.replicate_to_peers`` (outbound, in
|
||||
``mesh_dm_relay.py``). The sender's relay POSTs an encrypted DM
|
||||
envelope here after a successful local ``deposit``; this endpoint
|
||||
re-enforces the per-(sender, recipient) anti-spam cap and stores
|
||||
the envelope in the local mailbox if accepted.
|
||||
|
||||
The cap is the network rule: a hostile sender's relay can spool
|
||||
extras locally, but every honest peer enforces the cap on inbound
|
||||
replication. Recipient polling from any honest peer therefore
|
||||
never sees more than ``MESH_DM_PENDING_PER_SENDER_LIMIT`` pending
|
||||
from any one sender, no matter how many spam attempts were tried.
|
||||
|
||||
Same HMAC auth pattern as ``infonet_peer_push`` and ``gate_peer_push``.
|
||||
"""
|
||||
content_length = request.headers.get("content-length")
|
||||
if content_length:
|
||||
try:
|
||||
# DM envelopes are bounded by MESH_DM_MAX_MSG_BYTES + envelope
|
||||
# overhead; 64 KB is a generous ceiling.
|
||||
if int(content_length) > 65_536:
|
||||
return Response(
|
||||
content='{"ok":false,"detail":"Request body too large (max 64KB)"}',
|
||||
status_code=413, media_type="application/json",
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
body_bytes = await request.body()
|
||||
if not _verify_peer_push_hmac(request, body_bytes):
|
||||
return Response(
|
||||
content='{"ok":false,"detail":"Invalid or missing peer HMAC"}',
|
||||
status_code=403, media_type="application/json",
|
||||
)
|
||||
try:
|
||||
body = json_mod.loads(body_bytes or b"{}")
|
||||
except (ValueError, TypeError):
|
||||
return Response(
|
||||
content='{"ok":false,"detail":"Invalid JSON body"}',
|
||||
status_code=400, media_type="application/json",
|
||||
)
|
||||
envelope = body.get("envelope")
|
||||
if not isinstance(envelope, dict):
|
||||
return {"ok": False, "detail": "envelope must be an object"}
|
||||
|
||||
originating_peer = _peer_hmac_url_from_request(request) or ""
|
||||
|
||||
from services.mesh.mesh_dm_relay import dm_relay
|
||||
result = dm_relay.accept_replica(
|
||||
envelope=envelope,
|
||||
originating_peer_url=originating_peer,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
@router.post("/api/mesh/gate/peer-push")
|
||||
@limiter.limit("30/minute")
|
||||
async def gate_peer_push(request: Request):
|
||||
|
||||
+87
-10
@@ -85,7 +85,30 @@ async def api_geocode_reverse(
|
||||
return await asyncio.to_thread(reverse_geocode, lat, lng, local_only)
|
||||
|
||||
|
||||
@router.get("/api/sentinel2/search")
|
||||
# ── Sentinel proxy routes (Issue #299/#300/#301, reported by tg12) ──────────
|
||||
# These three endpoints relay external Sentinel / Planetary Computer
|
||||
# requests through the backend to avoid browser CORS blocks. They are
|
||||
# operator-only helpers — they MUST NOT be callable by anonymous remote
|
||||
# users, because:
|
||||
#
|
||||
# * /api/sentinel/token — caller supplies their own Sentinel client_id +
|
||||
# client_secret. Without operator gating, the backend becomes a free
|
||||
# anonymous OAuth-mint relay for any Copernicus account.
|
||||
# * /api/sentinel/tile — same shape as the token route but for tile
|
||||
# imagery. Without gating, the backend acts as an anonymous quota and
|
||||
# bandwidth relay for Sentinel Hub Process API calls.
|
||||
# * /api/sentinel2/search — hits the Planetary Computer STAC search API
|
||||
# and falls back to Esri imagery. No caller credentials are involved,
|
||||
# but the route is still an anonymous external-search relay. We gate
|
||||
# it the same way for consistency with the rest of the operator-only
|
||||
# helper surface.
|
||||
#
|
||||
# Gating is via require_local_operator (loopback / bridge / admin key),
|
||||
# matching the same allowlist already used by /api/region-dossier and
|
||||
# the other operator helpers further up this file. Single-operator nodes
|
||||
# see no behavior change — their dashboard already lives on loopback or
|
||||
# the trusted Docker bridge, so it still resolves.
|
||||
@router.get("/api/sentinel2/search", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
def api_sentinel2_search(
|
||||
request: Request,
|
||||
@@ -97,18 +120,60 @@ def api_sentinel2_search(
|
||||
return search_sentinel2_scene(lat, lng)
|
||||
|
||||
|
||||
@router.post("/api/sentinel/token")
|
||||
# Issue #298 (tg12): Sentinel credentials moved server-side
|
||||
# ---------------------------------------------------------------------------
|
||||
# Previously the frontend kept Copernicus CDSE client_id + client_secret in
|
||||
# browser localStorage / sessionStorage and forwarded them on every tile
|
||||
# request through this proxy. That exposed real third-party credentials to
|
||||
# any same-origin script (XSS, malicious browser extension, dev-tools HAR
|
||||
# export).
|
||||
#
|
||||
# Resolution order (first match wins):
|
||||
# 1. Request body — kept for back-compat. A small number of legacy
|
||||
# operator setups may still post credentials; we don't break them.
|
||||
# 2. Backend .env — SENTINEL_CLIENT_ID / SENTINEL_CLIENT_SECRET, managed
|
||||
# through the existing /api/settings/api-keys flow (admin-gated).
|
||||
#
|
||||
# The frontend in ``sentinelHub.ts`` no longer reads browser storage and no
|
||||
# longer forwards credentials — every dashboard request now lands in (2).
|
||||
# The require_local_operator gate (added in #303/PR #303) stays — both layers
|
||||
# are independent: the gate blocks anonymous callers, the env fallback lets
|
||||
# legitimate (gated) callers omit credentials from the body.
|
||||
# ---------------------------------------------------------------------------
|
||||
def _resolve_sentinel_credentials(body_id: str, body_secret: str) -> tuple[str, str]:
|
||||
"""Return (client_id, client_secret) using body values when present,
|
||||
otherwise falling back to backend .env. Empty strings if neither is set."""
|
||||
import os as _os
|
||||
cid = (body_id or "").strip() or (_os.environ.get("SENTINEL_CLIENT_ID", "") or "").strip()
|
||||
csec = (body_secret or "").strip() or (_os.environ.get("SENTINEL_CLIENT_SECRET", "") or "").strip()
|
||||
return cid, csec
|
||||
|
||||
|
||||
@router.post("/api/sentinel/token", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("60/minute")
|
||||
async def api_sentinel_token(request: Request):
|
||||
"""Proxy Copernicus CDSE OAuth2 token request (avoids browser CORS block)."""
|
||||
"""Proxy Copernicus CDSE OAuth2 token request (avoids browser CORS block).
|
||||
|
||||
Credentials are resolved by ``_resolve_sentinel_credentials`` — body
|
||||
fields are honored for back-compat, otherwise the backend .env values
|
||||
populated through ``/api/settings/api-keys`` are used.
|
||||
"""
|
||||
import requests as req
|
||||
body = await request.body()
|
||||
from urllib.parse import parse_qs
|
||||
params = parse_qs(body.decode("utf-8"))
|
||||
client_id = params.get("client_id", [""])[0]
|
||||
client_secret = params.get("client_secret", [""])[0]
|
||||
body_id = params.get("client_id", [""])[0]
|
||||
body_secret = params.get("client_secret", [""])[0]
|
||||
client_id, client_secret = _resolve_sentinel_credentials(body_id, body_secret)
|
||||
if not client_id or not client_secret:
|
||||
raise HTTPException(400, "client_id and client_secret required")
|
||||
# Friendly, non-hostile error — points the operator at the place
|
||||
# they configure other API keys instead of just saying "required".
|
||||
raise HTTPException(
|
||||
400,
|
||||
"Sentinel client_id/client_secret are not configured. "
|
||||
"Set SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET in the "
|
||||
"API Keys panel (Settings → API Keys) or your backend .env.",
|
||||
)
|
||||
token_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
|
||||
try:
|
||||
resp = await asyncio.to_thread(req.post, token_url,
|
||||
@@ -152,7 +217,7 @@ import os as _os
|
||||
_SH_TOKEN_CACHE_HMAC_KEY = _os.urandom(32)
|
||||
|
||||
|
||||
@router.post("/api/sentinel/tile")
|
||||
@router.post("/api/sentinel/tile", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("300/minute")
|
||||
async def api_sentinel_tile(request: Request):
|
||||
"""Proxy Sentinel Hub Process API tile request (avoids CORS block)."""
|
||||
@@ -163,8 +228,11 @@ async def api_sentinel_tile(request: Request):
|
||||
except Exception:
|
||||
return JSONResponse(status_code=422, content={"ok": False, "detail": "invalid JSON body"})
|
||||
|
||||
client_id = body.get("client_id", "")
|
||||
client_secret = body.get("client_secret", "")
|
||||
# Issue #298: same resolution order as /api/sentinel/token — body
|
||||
# values for back-compat, otherwise backend .env.
|
||||
body_id = body.get("client_id", "")
|
||||
body_secret = body.get("client_secret", "")
|
||||
client_id, client_secret = _resolve_sentinel_credentials(body_id, body_secret)
|
||||
preset = body.get("preset", "TRUE-COLOR")
|
||||
date_str = body.get("date", "")
|
||||
z = body.get("z", 0)
|
||||
@@ -172,7 +240,16 @@ async def api_sentinel_tile(request: Request):
|
||||
y = body.get("y", 0)
|
||||
|
||||
if not client_id or not client_secret or not date_str:
|
||||
raise HTTPException(400, "client_id, client_secret, and date required")
|
||||
# Distinguish "no creds" from "no date" so the operator knows
|
||||
# what to fix. Same friendly pointer as the /token route.
|
||||
if not client_id or not client_secret:
|
||||
raise HTTPException(
|
||||
400,
|
||||
"Sentinel client_id/client_secret are not configured. "
|
||||
"Set SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET in the "
|
||||
"API Keys panel (Settings → API Keys) or your backend .env.",
|
||||
)
|
||||
raise HTTPException(400, "date required")
|
||||
|
||||
now = _time.time()
|
||||
credential_fp = _credential_fingerprint(client_id, client_secret)
|
||||
|
||||
@@ -20,7 +20,17 @@ OUT_PATH = Path(__file__).parent.parent / "data" / "power_plants.json"
|
||||
|
||||
def main() -> None:
|
||||
print(f"Downloading WRI Global Power Plant Database from GitHub...")
|
||||
req = urllib.request.Request(CSV_URL, headers={"User-Agent": "ShadowBroker-OSINT/1.0"})
|
||||
# Round 7a: release-time data refresher. Uses the per-operator UA if
|
||||
# available, otherwise a release-script-specific identifier. This
|
||||
# script is run by the maintainer at release time, NOT at runtime,
|
||||
# so an aggregate UA is acceptable; we still use the helper so the
|
||||
# behavior matches the rest of the project.
|
||||
try:
|
||||
from services.network_utils import outbound_user_agent
|
||||
ua = outbound_user_agent("release-script-power-plants")
|
||||
except Exception:
|
||||
ua = "Shadowbroker/0.9 (release-script-power-plants; +https://github.com/BigBodyCobain/Shadowbroker/issues)"
|
||||
req = urllib.request.Request(CSV_URL, headers={"User-Agent": ua})
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
raw = resp.read().decode("utf-8")
|
||||
|
||||
|
||||
@@ -150,6 +150,31 @@ API_REGISTRY = [
|
||||
"url": "https://finnhub.io/register",
|
||||
"required": False,
|
||||
},
|
||||
# Issue #298 (tg12): Sentinel Hub / Copernicus Data Space Ecosystem
|
||||
# credentials were previously held in browser localStorage / sessionStorage
|
||||
# by the Settings panel. Moved server-side to the same .env-backed
|
||||
# store every other third-party API key lives in. The Sentinel proxy
|
||||
# routes (POST /api/sentinel/token, /tile) now fall back to these
|
||||
# env values when the request body omits credentials — see
|
||||
# backend/routers/tools.py for the resolution order.
|
||||
{
|
||||
"id": "sentinel_client_id",
|
||||
"env_key": "SENTINEL_CLIENT_ID",
|
||||
"name": "Sentinel Hub / Copernicus — Client ID",
|
||||
"description": "OAuth2 client ID for Copernicus Data Space Ecosystem (CDSE). Required for the Sentinel-2 imagery overlay and the right-click Sentinel-2 Intel Card. Sign in at dataspace.copernicus.eu and create OAuth credentials.",
|
||||
"category": "Imagery",
|
||||
"url": "https://dataspace.copernicus.eu/",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"id": "sentinel_client_secret",
|
||||
"env_key": "SENTINEL_CLIENT_SECRET",
|
||||
"name": "Sentinel Hub / Copernicus — Client Secret",
|
||||
"description": "OAuth2 client secret paired with the Client ID above. Used by the backend to mint short-lived access tokens against the CDSE identity provider. Stored in the backend .env; never sent to the browser.",
|
||||
"category": "Imagery",
|
||||
"url": "https://dataspace.copernicus.eu/",
|
||||
"required": False,
|
||||
},
|
||||
]
|
||||
|
||||
ALLOWED_ENV_KEYS = {
|
||||
|
||||
@@ -627,20 +627,56 @@ def update_carrier_positions() -> None:
|
||||
_carrier_positions.update(positions)
|
||||
_last_update = datetime.now(timezone.utc)
|
||||
logger.info(
|
||||
"Carrier tracker: %d carriers loaded from cache (GDELT enrichment starting...)",
|
||||
"Carrier tracker: %d carriers loaded from cache (USNI + GDELT enrichment starting...)",
|
||||
len(positions),
|
||||
)
|
||||
|
||||
# --- Phase 2: GDELT enrichment ---
|
||||
# --- Phase 2: USNI Fleet & Marine Tracker (PRIMARY source) ---
|
||||
#
|
||||
# USNI publishes a weekly editorial tracker with each carrier's
|
||||
# actual operating area, parsed from explicit prose like
|
||||
# "The Gerald R. Ford Carrier Strike Group is operating in the Red Sea"
|
||||
# These positions are tagged ``position_confidence: "recent"`` because
|
||||
# they reflect actual reporting, not headline-keyword centroids.
|
||||
# USNI updates are preferred over GDELT — they're authoritative on
|
||||
# US Navy positions where GDELT is just article-title text mining.
|
||||
try:
|
||||
from services.fetchers.usni_fleet_tracker import (
|
||||
fetch_latest_fleet_tracker_positions,
|
||||
)
|
||||
usni_positions = fetch_latest_fleet_tracker_positions()
|
||||
for hull, pos in usni_positions.items():
|
||||
positions[hull] = pos
|
||||
logger.info(
|
||||
"Carrier USNI update: %s → %s",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
pos.get("desc", ""),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("USNI fleet-tracker fetch failed: %s", e)
|
||||
|
||||
# --- Phase 3: GDELT enrichment (SECONDARY — fills gaps) ---
|
||||
#
|
||||
# Used only to backfill carriers USNI didn't mention this week. The
|
||||
# position is stamped ``approximate`` so the UI knows it's a
|
||||
# headline-centroid match (Issue #245).
|
||||
try:
|
||||
articles = _fetch_gdelt_carrier_news()
|
||||
news_positions = _parse_carrier_positions_from_news(articles)
|
||||
for hull, pos in news_positions.items():
|
||||
# Always overwrite — newest GDELT mention wins. The previous
|
||||
# entry's position is preserved in git history and the next
|
||||
# cycle either confirms or replaces it.
|
||||
# Only overwrite if the existing entry is NOT a recent USNI
|
||||
# observation. A "recent" USNI position is higher-confidence
|
||||
# than a GDELT headline-centroid match — don't let GDELT
|
||||
# demote a real position to an approximate one.
|
||||
existing = positions.get(hull, {})
|
||||
existing_conf = _compute_position_confidence(existing)
|
||||
if existing_conf == "recent":
|
||||
continue
|
||||
positions[hull] = pos
|
||||
logger.info("Carrier OSINT: updated %s from news", CARRIER_REGISTRY[hull]["name"])
|
||||
logger.info(
|
||||
"Carrier OSINT: updated %s from GDELT news",
|
||||
CARRIER_REGISTRY[hull]["name"],
|
||||
)
|
||||
except (ValueError, KeyError, json.JSONDecodeError, OSError) as e:
|
||||
logger.warning("GDELT carrier fetch failed: %s", e)
|
||||
|
||||
|
||||
@@ -116,6 +116,21 @@ class Settings(BaseSettings):
|
||||
MESH_DM_REQUEST_MAILBOX_LIMIT: int = 12
|
||||
MESH_DM_SHARED_MAILBOX_LIMIT: int = 48
|
||||
MESH_DM_SELF_MAILBOX_LIMIT: int = 12
|
||||
# Anti-spam: cap on distinct UNACKED messages a single sender can have
|
||||
# parked in a single recipient's mailbox at any one time. Once the
|
||||
# recipient pulls (acks) a message, the sender's quota for that pair
|
||||
# frees up. Default 2 — a sender who wants to deliver more must wait
|
||||
# for the recipient to actually read the prior messages.
|
||||
#
|
||||
# This cap is enforced TWICE: once on the local deposit path (the
|
||||
# sender's own node refuses to spool the 3rd message) AND once on
|
||||
# the replication-acceptance path (honest peer relays refuse to
|
||||
# accept inbound replicas that would put them over the cap). The
|
||||
# double enforcement makes the rule a NETWORK rule — patching out
|
||||
# the local check on a hostile sender's relay doesn't let extras
|
||||
# propagate, because every honest peer enforces the same cap on
|
||||
# inbound replication.
|
||||
MESH_DM_PENDING_PER_SENDER_LIMIT: int = 2
|
||||
MESH_BLOCK_LEGACY_AGENT_ID_LOOKUP: bool = True
|
||||
MESH_ALLOW_COMPAT_DM_INVITE_IMPORT: bool = False
|
||||
MESH_ALLOW_COMPAT_DM_INVITE_IMPORT_UNTIL: str = ""
|
||||
@@ -295,6 +310,19 @@ class Settings(BaseSettings):
|
||||
# service operator can identify per-install traffic instead of a generic
|
||||
# "ShadowBroker" aggregate.
|
||||
MESHTASTIC_OPERATOR_CALLSIGN: str = ""
|
||||
# Per-install operator handle used in the User-Agent for EVERY third-party
|
||||
# API the backend calls (Wikipedia, Wikidata, Nominatim, GDELT, OpenMHz,
|
||||
# Broadcastify, weather.gov, NUFORC, etc.). The default is empty, in which
|
||||
# case backend/services/network_utils.py auto-generates a stable
|
||||
# pseudonymous handle like "operator-7f3a92" on first use and caches it.
|
||||
# Operators who want to identify themselves with a real handle can set
|
||||
# this; operators who want to stay pseudonymous can leave it empty.
|
||||
#
|
||||
# The handle is sent ONLY to public third-party APIs. It is NEVER mixed
|
||||
# into mesh / Wormhole / Infonet identity (those have their own crypto
|
||||
# identity layer; conflating the two would leak public attribution into
|
||||
# private mesh state).
|
||||
OPERATOR_HANDLE: str = ""
|
||||
|
||||
# SAR (Synthetic Aperture Radar) data layer
|
||||
# Mode A — free catalog metadata, no account, default-on
|
||||
|
||||
@@ -16,8 +16,15 @@ from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _feed_ingester_user_agent() -> str:
|
||||
# Round 7a: per-install attribution for operator-curated feed URLs.
|
||||
return outbound_user_agent("feed-ingester")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -157,7 +164,7 @@ def _fetch_layer_feed(layer: dict[str, Any]) -> None:
|
||||
resp = requests.get(
|
||||
feed_url,
|
||||
timeout=_FETCH_TIMEOUT,
|
||||
headers={"User-Agent": "ShadowBroker-FeedIngester/1.0"},
|
||||
headers={"User-Agent": _feed_ingester_user_agent()},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
@@ -21,6 +21,13 @@ from typing import Any
|
||||
import defusedxml.ElementTree as ET
|
||||
import requests
|
||||
|
||||
|
||||
|
||||
def _aircraft_db_user_agent() -> str:
|
||||
"""Round 7a: lazy import so the per-install operator handle is included."""
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("aircraft-database")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_BUCKET_LIST_URL = (
|
||||
@@ -44,7 +51,7 @@ def _latest_snapshot_key() -> str:
|
||||
response = requests.get(
|
||||
_BUCKET_LIST_URL,
|
||||
timeout=_LIST_TIMEOUT_S,
|
||||
headers={"User-Agent": _USER_AGENT},
|
||||
headers={"User-Agent": _aircraft_db_user_agent()},
|
||||
)
|
||||
response.raise_for_status()
|
||||
root = ET.fromstring(response.text)
|
||||
@@ -71,7 +78,7 @@ def _stream_csv_index(url: str) -> dict[str, dict[str, str]]:
|
||||
url,
|
||||
timeout=_DOWNLOAD_TIMEOUT_S,
|
||||
stream=True,
|
||||
headers={"User-Agent": _USER_AGENT},
|
||||
headers={"User-Agent": _aircraft_db_user_agent()},
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
line_iter = (
|
||||
|
||||
@@ -15,7 +15,11 @@ import time
|
||||
import heapq
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from services.network_utils import external_curl_fallback_enabled, fetch_with_curl
|
||||
from services.network_utils import (
|
||||
external_curl_fallback_enabled,
|
||||
fetch_with_curl,
|
||||
outbound_user_agent,
|
||||
)
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.nuforc_enrichment import enrich_sighting
|
||||
from services.fetchers.retry import with_retry
|
||||
@@ -279,13 +283,13 @@ def fetch_weather_alerts():
|
||||
return
|
||||
alerts = []
|
||||
try:
|
||||
# weather.gov requires a User-Agent per their API policy, but it
|
||||
# need not identify the operator. Use a project-generic string and
|
||||
# let the user override via SHADOWBROKER_USER_AGENT if needed.
|
||||
from services.network_utils import DEFAULT_USER_AGENT
|
||||
# weather.gov requires a User-Agent per their API policy. Round 7a:
|
||||
# send the per-install operator handle so they can rate-limit per
|
||||
# operator instead of treating "Shadowbroker" as one entity.
|
||||
from services.network_utils import outbound_user_agent
|
||||
url = "https://api.weather.gov/alerts/active?status=actual"
|
||||
headers = {
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
"User-Agent": outbound_user_agent("weather-gov"),
|
||||
"Accept": "application/geo+json",
|
||||
}
|
||||
response = fetch_with_curl(url, timeout=15, headers=headers)
|
||||
@@ -713,7 +717,12 @@ _NUFORC_LIVE_NONCE_RE = re.compile(
|
||||
r'id=["\']wdtNonceFrontendServerSide_1["\'][^>]*value=["\']([a-f0-9]+)["\']'
|
||||
)
|
||||
_NUFORC_LIVE_SIGHTING_ID_RE = re.compile(r"id=(\d+)")
|
||||
_NUFORC_LIVE_USER_AGENT = "Mozilla/5.0 (ShadowBroker-OSINT NUFORC-fetcher)"
|
||||
# Round 7a: NUFORC's site is sensitive to non-browser UAs but we send a
|
||||
# per-install operator handle prefixed by Mozilla/5.0 so we're identifiable
|
||||
# without being aggregately blocked. Operators who want stricter privacy
|
||||
# can override the entire UA via SHADOWBROKER_USER_AGENT.
|
||||
def _nuforc_live_user_agent() -> str:
|
||||
return f"Mozilla/5.0 ({outbound_user_agent('nuforc-live')})"
|
||||
_NUFORC_LIVE_SESSION_COOKIES = _NUFORC_DATA_DIR / "nuforc_session.cookies"
|
||||
|
||||
# Sample grid covering continental US, Alaska, Hawaii, Canada, UK, Australia
|
||||
@@ -957,7 +966,7 @@ def _photon_lookup(query: str) -> list[float] | None:
|
||||
res = fetch_with_curl(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0 (NUFORC-UAP-layer)",
|
||||
"User-Agent": outbound_user_agent("nuforc-uap-geocode"),
|
||||
"Accept-Language": "en",
|
||||
},
|
||||
timeout=10,
|
||||
@@ -1053,7 +1062,7 @@ def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
index_res = subprocess.run(
|
||||
[
|
||||
curl_bin, "-sL",
|
||||
"-A", _NUFORC_LIVE_USER_AGENT,
|
||||
"-A", _nuforc_live_user_agent(),
|
||||
"-c", str(cookie_jar),
|
||||
"-b", str(cookie_jar),
|
||||
index_url,
|
||||
@@ -1089,7 +1098,7 @@ def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
ajax_res = subprocess.run(
|
||||
[
|
||||
curl_bin, "-sL",
|
||||
"-A", _NUFORC_LIVE_USER_AGENT,
|
||||
"-A", _nuforc_live_user_agent(),
|
||||
"-c", str(cookie_jar),
|
||||
"-b", str(cookie_jar),
|
||||
"-X", "POST",
|
||||
|
||||
@@ -459,6 +459,18 @@ def _classify_and_publish(all_adsb_flights):
|
||||
|
||||
ac_category = "heli" if model_upper in _HELI_TYPES_BACKEND else "plane"
|
||||
|
||||
# Source attribution: prefer the explicit ``source`` tag stamped
|
||||
# at fetch time (adsb.lol, OpenSky). If absent, fall back to the
|
||||
# legacy ``supplemental_source`` (airplanes.live, adsb.fi) so
|
||||
# supplementals are still attributed without changing their
|
||||
# tagger. Final fallback "adsb.lol" preserves prior behavior for
|
||||
# any caller that synthesizes records without going through one
|
||||
# of our fetchers (e.g. tests).
|
||||
source = (
|
||||
f.get("source")
|
||||
or f.get("supplemental_source")
|
||||
or "adsb.lol"
|
||||
)
|
||||
flights.append(
|
||||
{
|
||||
"callsign": flight_str,
|
||||
@@ -480,6 +492,7 @@ def _classify_and_publish(all_adsb_flights):
|
||||
"airline_code": airline_code,
|
||||
"aircraft_category": ac_category,
|
||||
"nac_p": f.get("nac_p"),
|
||||
"source": source,
|
||||
}
|
||||
)
|
||||
except (ValueError, TypeError, KeyError, AttributeError) as loop_e:
|
||||
@@ -849,7 +862,15 @@ def _fetch_adsb_lol_regions():
|
||||
res = fetch_with_curl(url, timeout=10)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
return data.get("ac", [])
|
||||
aircraft = data.get("ac", [])
|
||||
# Stamp the source at the fetch site so attribution survives
|
||||
# the OpenSky/supplemental dedupe-by-hex merge downstream.
|
||||
# Previously adsb.lol records carried no marker while OpenSky
|
||||
# records got ``is_opensky: True`` — which made flight tooltips
|
||||
# look like everything came from OpenSky.
|
||||
for a in aircraft:
|
||||
a["source"] = "adsb.lol"
|
||||
return aircraft
|
||||
except (
|
||||
requests.RequestException,
|
||||
ConnectionError,
|
||||
@@ -932,6 +953,7 @@ def _enrich_with_opensky_and_supplemental(adsb_flights):
|
||||
"gs": (s[9] * 1.94384) if s[9] else 0,
|
||||
"t": "Unknown",
|
||||
"is_opensky": True,
|
||||
"source": "OpenSky",
|
||||
}
|
||||
)
|
||||
elif os_res.status_code == 429:
|
||||
|
||||
@@ -6,7 +6,7 @@ import heapq
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from cachetools import TTLCache
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.retry import with_retry
|
||||
|
||||
@@ -29,7 +29,7 @@ def _geocode_region(region_name: str, country_name: str) -> tuple:
|
||||
|
||||
query = urllib.parse.quote(f"{region_name}, {country_name}")
|
||||
url = f"https://nominatim.openstreetmap.org/search?q={query}&format=json&limit=1"
|
||||
response = fetch_with_curl(url, timeout=8, headers={"User-Agent": "ShadowBroker-OSINT/1.0"})
|
||||
response = fetch_with_curl(url, timeout=8, headers={"User-Agent": outbound_user_agent("infrastructure-data")})
|
||||
if response.status_code == 200:
|
||||
results = response.json()
|
||||
if results:
|
||||
|
||||
@@ -191,8 +191,13 @@ def fetch_meshtastic_nodes():
|
||||
_os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")
|
||||
).strip().lower() not in {"0", "false", "no", "off", ""}
|
||||
|
||||
from services.network_utils import DEFAULT_USER_AGENT
|
||||
ua_base = f"{DEFAULT_USER_AGENT}; 24h polling"
|
||||
# Round 7a: outbound_user_agent already includes the per-install handle.
|
||||
# The optional Meshtastic callsign is appended as additional context so
|
||||
# meshtastic.liamcottle.net's operator can identify both the install AND
|
||||
# the registered radio operator (when MESHTASTIC_OPERATOR_CALLSIGN is set
|
||||
# and MESHTASTIC_SEND_CALLSIGN_HEADER is true; see issue #203).
|
||||
from services.network_utils import outbound_user_agent
|
||||
ua_base = f"{outbound_user_agent('meshtastic-map')}; 24h polling"
|
||||
if callsign and send_callsign_header:
|
||||
user_agent = f"{ua_base}; node={callsign}"
|
||||
else:
|
||||
|
||||
@@ -171,6 +171,7 @@ def fetch_military_flights():
|
||||
h = a.get("hex", "").lower()
|
||||
if h and h not in seen_hex:
|
||||
seen_hex.add(h)
|
||||
a["source"] = "adsb.lol"
|
||||
all_mil_ac.append(a)
|
||||
except Exception as e:
|
||||
logger.warning(f"adsb.lol mil fetch failed: {e}")
|
||||
@@ -182,6 +183,7 @@ def fetch_military_flights():
|
||||
h = a.get("hex", "").lower()
|
||||
if h and h not in seen_hex:
|
||||
seen_hex.add(h)
|
||||
a["source"] = "airplanes.live"
|
||||
all_mil_ac.append(a)
|
||||
logger.info(f"airplanes.live mil: +{len(resp2.json().get('ac', []))} raw, {len(all_mil_ac)} total unique")
|
||||
except Exception as e:
|
||||
@@ -234,6 +236,7 @@ def fetch_military_flights():
|
||||
"registration": f.get("r", "N/A"),
|
||||
"icao24": icao_hex,
|
||||
"squawk": f.get("squawk", ""),
|
||||
"source": f.get("source") or "adsb.lol",
|
||||
})
|
||||
continue
|
||||
|
||||
@@ -258,7 +261,8 @@ def fetch_military_flights():
|
||||
"model": f.get("t", "Unknown"),
|
||||
"icao24": icao_hex,
|
||||
"speed_knots": speed_knots,
|
||||
"squawk": f.get("squawk", "")
|
||||
"squawk": f.get("squawk", ""),
|
||||
"source": f.get("source") or "adsb.lol",
|
||||
})
|
||||
except Exception as loop_e:
|
||||
logger.error(f"Mil flight interpolation error: {loop_e}")
|
||||
|
||||
@@ -17,6 +17,12 @@ from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
|
||||
def _route_db_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("route-database")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ROUTES_URL = "https://vrs-standing-data.adsb.lol/routes.csv.gz"
|
||||
@@ -37,7 +43,7 @@ def _fetch_csv_gz(url: str) -> list[dict[str, str]]:
|
||||
response = requests.get(
|
||||
url,
|
||||
timeout=_HTTP_TIMEOUT_S,
|
||||
headers={"User-Agent": _USER_AGENT, "Accept-Encoding": "gzip"},
|
||||
headers={"User-Agent": _route_db_user_agent(), "Accept-Encoding": "gzip"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
text = gzip.decompress(response.content).decode("utf-8-sig")
|
||||
|
||||
@@ -10,6 +10,12 @@ from datetime import datetime, timezone
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, latest_data
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
|
||||
|
||||
def _trains_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("trains")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_EARTH_RADIUS_KM = 6371.0
|
||||
@@ -379,7 +385,7 @@ def _fetch_digitraffic() -> list[dict]:
|
||||
timeout=15,
|
||||
headers={
|
||||
"Accept-Encoding": "gzip",
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0",
|
||||
"User-Agent": _trains_user_agent(),
|
||||
},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
|
||||
@@ -0,0 +1,457 @@
|
||||
"""USNI News Fleet & Marine Tracker — authoritative weekly carrier
|
||||
position publication.
|
||||
|
||||
Why this exists
|
||||
---------------
|
||||
The previous carrier_tracker pipeline relied on GDELT headline matching
|
||||
(``api.gdeltproject.org``) to derive positions from text like "USS Ford
|
||||
in the Mediterranean" → centroid of "Mediterranean Sea". That was
|
||||
- low-precision (audit issue #245 — false precision from text mentions),
|
||||
- unreliable (``api.gdeltproject.org`` is sometimes unreachable from
|
||||
certain network paths, including Docker Desktop on some Windows hosts).
|
||||
|
||||
USNI publishes a weekly tracker that explicitly lists where every U.S.
|
||||
carrier is operating. The article body uses extremely consistent phrasing:
|
||||
|
||||
"The Gerald R. Ford Carrier Strike Group is operating in the Red Sea"
|
||||
"Aircraft carrier USS George Washington (CVN-73) is in port in
|
||||
Yokosuka, Japan."
|
||||
"USS Dwight D. Eisenhower (CVN-69) sails down the Elizabeth River"
|
||||
|
||||
Those are deterministic to parse. This module:
|
||||
|
||||
1. Pulls the WordPress RSS feeds (both site-wide and category) — the
|
||||
site-wide feed often has fresher posts before the category feed
|
||||
catches up, so we union them.
|
||||
2. Picks the most recent post by parsed ``pubDate``.
|
||||
3. For each carrier in the registry, scans the article body for a
|
||||
"is operating in / is in port in / departed from" pattern near
|
||||
the carrier's name.
|
||||
4. Maps the extracted region phrase to coordinates via the carrier
|
||||
tracker's existing REGION_COORDS.
|
||||
|
||||
The result is a ``{hull: position_entry}`` dict that the carrier tracker
|
||||
consumes as a high-confidence source — ``position_confidence: "recent"``
|
||||
with ``position_source_at`` set to the article's actual publication
|
||||
timestamp (not ``now()``).
|
||||
|
||||
Politeness
|
||||
----------
|
||||
We send the per-install operator handle via ``outbound_user_agent``
|
||||
(Round 7a) so USNI can rate-limit / contact the specific install if
|
||||
needed. Article-body pages return 403 to non-browser UAs (Cloudflare),
|
||||
but WordPress RSS feeds are open and serve the full article in
|
||||
``<content:encoded>`` — that's the supported path for aggregators and
|
||||
the one we use. We do not spoof browser headers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime, timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
from typing import Iterable
|
||||
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_RSS_URLS: tuple[str, ...] = (
|
||||
# Site-wide feed often has the freshest posts before the category
|
||||
# feed catches up. We try this first.
|
||||
"https://news.usni.org/feed",
|
||||
# Category feed has older fleet trackers for backfill.
|
||||
"https://news.usni.org/category/fleet-tracker/feed",
|
||||
)
|
||||
|
||||
_RSS_NS = {"content": "http://purl.org/rss/1.0/modules/content/"}
|
||||
|
||||
_FLEET_TRACKER_TITLE_RE = re.compile(
|
||||
r"fleet\s+and\s+marine\s+tracker", re.IGNORECASE
|
||||
)
|
||||
|
||||
_TAG_STRIP_RE = re.compile(r"<[^>]+>")
|
||||
_WHITESPACE_RE = re.compile(r"\s+")
|
||||
|
||||
|
||||
def _strip_html(html: str) -> str:
|
||||
text = _TAG_STRIP_RE.sub(" ", html or "")
|
||||
return _WHITESPACE_RE.sub(" ", text).strip()
|
||||
|
||||
|
||||
def _request_headers() -> dict[str, str]:
|
||||
"""Headers USNI's WordPress feed accepts from a legitimate aggregator.
|
||||
|
||||
The ``Referer`` is the category index page — that's where a real
|
||||
feed reader navigates from. ``Accept`` declares RSS preference but
|
||||
falls back to HTML. No browser UA spoofing.
|
||||
"""
|
||||
return {
|
||||
"User-Agent": outbound_user_agent("usni-fleet-tracker"),
|
||||
"Accept": "application/rss+xml, application/xml;q=0.9, */*;q=0.1",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Referer": "https://news.usni.org/category/fleet-tracker",
|
||||
}
|
||||
|
||||
|
||||
def _parse_pubdate(raw: str) -> datetime | None:
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
dt = parsedate_to_datetime(raw)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _iter_fleet_tracker_items(rss_urls: Iterable[str]) -> list[dict]:
|
||||
"""Pull every fleet-tracker post visible across the given RSS feeds.
|
||||
|
||||
De-duplicates by article link. Returns a list of dicts:
|
||||
{"title", "link", "pub_date" (datetime), "body" (plain text)}
|
||||
"""
|
||||
items_by_link: dict[str, dict] = {}
|
||||
for url in rss_urls:
|
||||
try:
|
||||
r = fetch_with_curl(url, timeout=15, headers=_request_headers())
|
||||
except Exception as exc:
|
||||
logger.debug("USNI RSS %s exception: %s", url, exc)
|
||||
continue
|
||||
if not r or r.status_code != 200 or not r.text:
|
||||
logger.debug(
|
||||
"USNI RSS %s returned status=%s body=%d",
|
||||
url,
|
||||
getattr(r, "status_code", "?"),
|
||||
len(getattr(r, "text", "") or ""),
|
||||
)
|
||||
continue
|
||||
try:
|
||||
root = ET.fromstring(r.text)
|
||||
except ET.ParseError as exc:
|
||||
logger.warning("USNI RSS parse error from %s: %s", url, exc)
|
||||
continue
|
||||
for item in root.findall(".//item"):
|
||||
title = (item.findtext("title") or "").strip()
|
||||
if not _FLEET_TRACKER_TITLE_RE.search(title):
|
||||
continue
|
||||
link = (item.findtext("link") or "").strip()
|
||||
if not link or link in items_by_link:
|
||||
continue
|
||||
pub_dt = _parse_pubdate(item.findtext("pubDate") or "")
|
||||
body_html = (
|
||||
item.findtext("content:encoded", default="", namespaces=_RSS_NS)
|
||||
or item.findtext("description", default="")
|
||||
or ""
|
||||
)
|
||||
items_by_link[link] = {
|
||||
"title": title,
|
||||
"link": link,
|
||||
"pub_date": pub_dt,
|
||||
"body": _strip_html(body_html),
|
||||
}
|
||||
return list(items_by_link.values())
|
||||
|
||||
|
||||
# Map USNI region phrases to keys in carrier_tracker.REGION_COORDS.
|
||||
# The carrier_tracker table already covers most named bodies of water and
|
||||
# major ports — we just need to teach this module to RECOGNIZE the
|
||||
# specific phrases USNI's editorial style uses, which sometimes spell
|
||||
# the same body of water differently.
|
||||
_USNI_REGION_ALIASES: tuple[tuple[str, str], ...] = (
|
||||
# USNI phrase (lowercase) -> REGION_COORDS key
|
||||
("eastern mediterranean", "eastern mediterranean"),
|
||||
("western mediterranean", "western mediterranean"),
|
||||
("mediterranean sea", "mediterranean"),
|
||||
("the mediterranean", "mediterranean"),
|
||||
("red sea", "red sea"),
|
||||
("arabian sea area of responsibility", "arabian sea"),
|
||||
("north arabian sea", "north arabian sea"),
|
||||
("arabian sea", "arabian sea"),
|
||||
("persian gulf", "persian gulf"),
|
||||
("gulf of oman", "gulf of oman"),
|
||||
("strait of hormuz", "strait of hormuz"),
|
||||
("south china sea", "south china sea"),
|
||||
("east china sea", "east china sea"),
|
||||
("philippine sea", "philippine sea"),
|
||||
("sea of japan", "sea of japan"),
|
||||
("taiwan strait", "taiwan strait"),
|
||||
("western pacific", "western pacific"),
|
||||
("pacific ocean", "pacific"),
|
||||
("indian ocean", "indian ocean"),
|
||||
("north atlantic", "north atlantic"),
|
||||
("western atlantic", "atlantic"),
|
||||
("eastern atlantic", "atlantic"),
|
||||
("atlantic ocean", "atlantic"),
|
||||
("gulf of aden", "gulf of aden"),
|
||||
("horn of africa", "horn of africa"),
|
||||
("bab el-mandeb", "bab el-mandeb"),
|
||||
("suez canal", "suez canal"),
|
||||
("baltic sea", "baltic sea"),
|
||||
("north sea", "north sea"),
|
||||
("black sea", "black sea"),
|
||||
("south atlantic", "south atlantic"),
|
||||
("coral sea", "coral sea"),
|
||||
("gulf of mexico", "gulf of mexico"),
|
||||
("caribbean sea", "caribbean"),
|
||||
("caribbean", "caribbean"),
|
||||
# Specific ports
|
||||
("naval station norfolk", "norfolk"),
|
||||
("norfolk naval shipyard", "newport news"),
|
||||
("newport news shipbuilding", "newport news"),
|
||||
("newport news", "newport news"),
|
||||
# USNI tags Norfolk mentions with state suffix; match both.
|
||||
("norfolk, va", "norfolk"),
|
||||
("norfolk", "norfolk"),
|
||||
("naval station everett", "puget sound"),
|
||||
("naval base kitsap", "bremerton"),
|
||||
("bremerton", "bremerton"),
|
||||
("puget sound", "puget sound"),
|
||||
("naval base san diego", "san diego"),
|
||||
("san diego, calif", "san diego"),
|
||||
("san diego", "san diego"),
|
||||
("yokosuka, japan", "yokosuka"),
|
||||
("yokosuka", "yokosuka"),
|
||||
("pearl harbor", "pearl harbor"),
|
||||
("apra harbor, guam", "guam"),
|
||||
("guam", "guam"),
|
||||
("bahrain", "bahrain"),
|
||||
("naval station rota", "rota"),
|
||||
("rota, spain", "rota"),
|
||||
("naples, italy", "naples"),
|
||||
# Fleets / AORs
|
||||
("5th fleet", "5th fleet"),
|
||||
("6th fleet", "6th fleet"),
|
||||
("7th fleet", "7th fleet"),
|
||||
("3rd fleet", "3rd fleet"),
|
||||
("2nd fleet", "2nd fleet"),
|
||||
("centcom", "centcom"),
|
||||
("indo-pacific command", "indopacom"),
|
||||
("eucom", "eucom"),
|
||||
("southcom", "southcom"),
|
||||
)
|
||||
|
||||
|
||||
def _resolve_region_phrase(phrase: str) -> tuple[str, str] | None:
|
||||
"""Map a USNI region phrase to a ``(canonical_key, display)`` tuple,
|
||||
or ``None`` if we don't recognize it.
|
||||
|
||||
``canonical_key`` is what ``carrier_tracker.REGION_COORDS`` keys on.
|
||||
``display`` is the phrase we'll show in the dossier description.
|
||||
"""
|
||||
p = (phrase or "").lower().strip()
|
||||
if not p:
|
||||
return None
|
||||
for usni_phrase, canonical in _USNI_REGION_ALIASES:
|
||||
if usni_phrase in p:
|
||||
return canonical, usni_phrase
|
||||
return None
|
||||
|
||||
|
||||
# Operating-verb phrases USNI uses, with a capture group for the region
|
||||
# phrase that immediately follows. Each pattern is designed to swallow
|
||||
# the optional editorial filler that often appears between verb and
|
||||
# location (e.g. "returned Friday to Norfolk" — "Friday" goes in the
|
||||
# filler; "Norfolk" is the location).
|
||||
#
|
||||
# Order matters: most-specific patterns first, so e.g. "is in port in"
|
||||
# wins over the generic "is".
|
||||
_DAY_FILLER = r"(?:[A-Z][a-z]+(?:day)?,?\s+)?" # optional "Friday" / "Monday" / etc.
|
||||
_LOC_CAPTURE = r"([A-Za-z][A-Za-z0-9\s,\.\-']{2,80})"
|
||||
|
||||
_OPERATING_PATTERNS: tuple[re.Pattern, ...] = (
|
||||
# "is operating in [the] {REGION}" / "is also operating in [the] {REGION}"
|
||||
re.compile(r"\bis\s+(?:also\s+|now\s+)?operating\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is conducting <stuff> in [the] {REGION}"
|
||||
re.compile(r"\bis\s+conducting\s+[A-Za-z0-9\-\s]{2,40}\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is in port in {LOCATION}"
|
||||
re.compile(r"\bis\s+in\s+port\s+in\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is in port" (no location — degenerate, use carrier's homeport via separate path)
|
||||
# → not captured here; falls through to homeport
|
||||
# "is underway in [the] {REGION}"
|
||||
re.compile(r"\bis\s+underway\s+in\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is deployed to [the] {REGION}" / "deployed in"
|
||||
re.compile(r"\bis\s+deployed\s+(?:to|in)\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "returned [Day] to {LOCATION}" / "returned [Day] from {REGION}"
|
||||
re.compile(r"\breturned\s+" + _DAY_FILLER + r"to\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
re.compile(r"\breturned\s+" + _DAY_FILLER + r"from\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "arrived [Day] in/at {LOCATION}"
|
||||
re.compile(r"\barrived\s+" + _DAY_FILLER + r"(?:in|at)\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "departed [Day] from {LOCATION}"
|
||||
re.compile(r"\bdeparted\s+" + _DAY_FILLER + r"(?:from\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "transiting [the] {REGION}" / "sailing through [the] {REGION}"
|
||||
re.compile(r"\btransiting\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
re.compile(r"\bsailing\s+through\s+(?:the\s+)?" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
# "is homeported at {LOCATION}"
|
||||
re.compile(r"\bis\s+homeported\s+at\s+" + _LOC_CAPTURE, re.IGNORECASE),
|
||||
)
|
||||
|
||||
|
||||
def _extract_region_for_carrier(
|
||||
body: str,
|
||||
carrier_names: list[str],
|
||||
hull_code: str,
|
||||
) -> str | None:
|
||||
"""Return the best-guess region phrase for one carrier from the
|
||||
article body, or None if no confident match.
|
||||
|
||||
Algorithm:
|
||||
1. Find every mention of the carrier (any name variant or the hull
|
||||
code) in the body.
|
||||
2. For each mention, look in the ~300-char window AFTER it for any
|
||||
of the operating-verb patterns.
|
||||
3. Return the first hit. If a more-confident match later turns up
|
||||
(e.g. "is operating in the X" beats "is homeported at Y"), the
|
||||
first one in document order still wins — USNI's structure puts
|
||||
the position-update sentence near the top of each carrier's
|
||||
section, and the homeport mention later.
|
||||
"""
|
||||
# Build a master mention regex covering every name variant + the hull.
|
||||
candidates: list[str] = []
|
||||
for name in carrier_names:
|
||||
if name and len(name) >= 4:
|
||||
candidates.append(re.escape(name))
|
||||
if hull_code:
|
||||
candidates.append(re.escape(hull_code))
|
||||
if not candidates:
|
||||
return None
|
||||
mention_re = re.compile(r"\b(?:" + "|".join(candidates) + r")\b", re.IGNORECASE)
|
||||
|
||||
window_chars = 320
|
||||
seen_phrases: list[str] = []
|
||||
for mention in mention_re.finditer(body):
|
||||
end = mention.end()
|
||||
window = body[end : end + window_chars]
|
||||
# Cut window at the next sentence break for tighter context.
|
||||
# (We use the LAST period within the window so "Norfolk, Va." isn't
|
||||
# confused for a sentence end — USNI uses ", Va." prolifically.)
|
||||
# Sentence break candidates: ". " followed by uppercase OR newline.
|
||||
sent_break = re.search(r"[\.!?]\s+[A-Z]", window)
|
||||
if sent_break:
|
||||
window = window[: sent_break.start() + 1]
|
||||
# Try patterns in priority order.
|
||||
for pat in _OPERATING_PATTERNS:
|
||||
m = pat.search(window)
|
||||
if not m:
|
||||
continue
|
||||
phrase = m.group(1).strip().rstrip(",.;: ")
|
||||
if not phrase:
|
||||
continue
|
||||
# Strip trailing editorial filler — USNI often writes
|
||||
# "Norfolk, Va., according to ship spotters" or
|
||||
# "Yokosuka, Japan, according to..."
|
||||
phrase = re.split(
|
||||
r",\s+(?:according|as of|for|while|where|in support|in the)",
|
||||
phrase,
|
||||
maxsplit=1,
|
||||
)[0].strip()
|
||||
seen_phrases.append(phrase)
|
||||
return phrase
|
||||
return seen_phrases[0] if seen_phrases else None
|
||||
|
||||
|
||||
def fetch_latest_fleet_tracker_positions(
|
||||
carrier_registry: dict | None = None,
|
||||
region_coords: dict | None = None,
|
||||
) -> dict[str, dict]:
|
||||
"""Return ``{hull: position_entry}`` for the latest USNI fleet tracker.
|
||||
|
||||
Entries look like::
|
||||
|
||||
{
|
||||
"lat": 18.0, "lng": 39.5, "heading": 0,
|
||||
"desc": "Red Sea (USNI May 18, 2026)",
|
||||
"source": "USNI News Fleet & Marine Tracker (May 18, 2026)",
|
||||
"source_url": "https://news.usni.org/2026/05/18/...",
|
||||
"position_source_at": "2026-05-18T18:58:44+00:00",
|
||||
"position_confidence": "recent",
|
||||
}
|
||||
|
||||
Carriers whose section can't be parsed (e.g. an off-week with no
|
||||
mention) are simply absent from the result — the caller keeps
|
||||
whatever position they had before.
|
||||
|
||||
``carrier_registry`` and ``region_coords`` default to the carrier_tracker
|
||||
module's own tables; passed in here for testability.
|
||||
"""
|
||||
if carrier_registry is None or region_coords is None:
|
||||
from services.carrier_tracker import CARRIER_REGISTRY, REGION_COORDS
|
||||
carrier_registry = carrier_registry or CARRIER_REGISTRY
|
||||
region_coords = region_coords or REGION_COORDS
|
||||
|
||||
items = _iter_fleet_tracker_items(_RSS_URLS)
|
||||
if not items:
|
||||
logger.warning("USNI fleet-tracker: no parseable RSS items")
|
||||
return {}
|
||||
|
||||
# Pick the most recent by parsed pubDate. Items without a parseable
|
||||
# date fall to the back of the list.
|
||||
items.sort(
|
||||
key=lambda it: it["pub_date"] or datetime(1970, 1, 1, tzinfo=timezone.utc),
|
||||
reverse=True,
|
||||
)
|
||||
latest = items[0]
|
||||
|
||||
pub_dt: datetime | None = latest["pub_date"]
|
||||
pub_iso = pub_dt.isoformat() if pub_dt else ""
|
||||
pub_human = pub_dt.strftime("%b %d, %Y") if pub_dt else "unknown date"
|
||||
|
||||
body = latest["body"]
|
||||
if not body:
|
||||
logger.warning("USNI fleet-tracker: latest item has empty body")
|
||||
return {}
|
||||
|
||||
positions: dict[str, dict] = {}
|
||||
for hull, info in carrier_registry.items():
|
||||
# Build name variants we'll try in the body.
|
||||
full_name = info["name"] # "USS Gerald R. Ford (CVN-78)"
|
||||
without_hull = full_name.split("(")[0].strip() # "USS Gerald R. Ford"
|
||||
last_word = without_hull.split()[-1] # "Ford"
|
||||
ship_only = without_hull[4:] # "Gerald R. Ford"
|
||||
|
||||
# Variants ordered most-specific first.
|
||||
variants: list[str] = []
|
||||
for v in (without_hull, f"USS {ship_only}", ship_only, last_word):
|
||||
if v and v not in variants and len(v) >= 4:
|
||||
variants.append(v)
|
||||
|
||||
phrase = _extract_region_for_carrier(body, variants, hull)
|
||||
if not phrase:
|
||||
continue
|
||||
resolved = _resolve_region_phrase(phrase)
|
||||
if not resolved:
|
||||
logger.debug(
|
||||
"USNI: %s region phrase %r did not match any known region",
|
||||
hull, phrase,
|
||||
)
|
||||
continue
|
||||
canonical_key, display_phrase = resolved
|
||||
coords = region_coords.get(canonical_key)
|
||||
if not coords:
|
||||
continue
|
||||
|
||||
positions[hull] = {
|
||||
"lat": coords[0],
|
||||
"lng": coords[1],
|
||||
"heading": 0,
|
||||
"desc": f"{display_phrase.title()} (USNI {pub_human})",
|
||||
"source": f"USNI News Fleet & Marine Tracker ({pub_human})",
|
||||
"source_url": latest["link"],
|
||||
"position_source_at": pub_iso,
|
||||
"position_confidence": "recent",
|
||||
}
|
||||
|
||||
if positions:
|
||||
logger.info(
|
||||
"USNI fleet-tracker: parsed %d/%d carrier positions from %s",
|
||||
len(positions), len(carrier_registry), latest["link"],
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"USNI fleet-tracker: latest article %s yielded zero parseable carriers",
|
||||
latest["link"],
|
||||
)
|
||||
return positions
|
||||
@@ -21,9 +21,17 @@ _cache_lock = threading.Lock()
|
||||
_local_search_cache: List[Dict[str, Any]] | None = None
|
||||
_local_search_lock = threading.Lock()
|
||||
|
||||
_USER_AGENT = os.environ.get(
|
||||
"NOMINATIM_USER_AGENT", "ShadowBroker/1.0 (https://github.com/BigBodyCobain/Shadowbroker)"
|
||||
)
|
||||
# Round 7a: per-install operator handle threads through every Nominatim
|
||||
# call. NOMINATIM_USER_AGENT env override is still honored for operators
|
||||
# who run a custom relay / known good identity, but the default uses the
|
||||
# per-install handle so OpenStreetMap can rate-limit per install instead
|
||||
# of treating "Shadowbroker" as one big offender.
|
||||
def _nominatim_user_agent() -> str:
|
||||
override = os.environ.get("NOMINATIM_USER_AGENT", "").strip()
|
||||
if override:
|
||||
return override
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("nominatim")
|
||||
|
||||
|
||||
def _get_cache(key: str):
|
||||
@@ -178,7 +186,7 @@ def search_geocode(query: str, limit: int = 5, local_only: bool = False) -> List
|
||||
res = fetch_with_curl(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": _USER_AGENT,
|
||||
"User-Agent": _nominatim_user_agent(),
|
||||
"Accept-Language": "en",
|
||||
},
|
||||
timeout=6,
|
||||
@@ -241,7 +249,7 @@ def reverse_geocode(lat: float, lng: float, local_only: bool = False) -> Dict[st
|
||||
res = fetch_with_curl(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": _USER_AGENT,
|
||||
"User-Agent": _nominatim_user_agent(),
|
||||
"Accept-Language": "en",
|
||||
},
|
||||
timeout=6,
|
||||
|
||||
@@ -8,6 +8,13 @@ from datetime import datetime
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
|
||||
|
||||
def _geopolitics_user_agent() -> str:
|
||||
"""Round 7a: GDELT geopolitics fetcher attribution."""
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("geopolitics-gdelt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache Frontline data for 30 minutes, it doesn't move that fast
|
||||
@@ -316,7 +323,7 @@ def _fetch_article_title(url):
|
||||
resp = requests.get(
|
||||
current_url,
|
||||
timeout=4,
|
||||
headers={"User-Agent": "Mozilla/5.0 (compatible; OSINT Dashboard/1.0)"},
|
||||
headers={"User-Agent": _geopolitics_user_agent()},
|
||||
stream=True,
|
||||
allow_redirects=False,
|
||||
)
|
||||
@@ -521,10 +528,29 @@ def _parse_gdelt_export_zip(zip_bytes, conflict_codes, seen_locs, features, loc_
|
||||
logger.warning(f"Failed to parse GDELT export zip: {e}")
|
||||
|
||||
|
||||
# GDELT's data.gdeltproject.org is a CNAME to a Google Cloud Storage
|
||||
# bucket of the same name. GCS returns the wildcard ``*.storage.googleapis.com``
|
||||
# certificate, which legitimately does NOT cover the GDELT custom domain
|
||||
# — Python's TLS verification correctly refuses it. Some networks/POPs
|
||||
# happen to route through a path where this works; many do not (notably
|
||||
# Docker Desktop's outbound NAT on local installs).
|
||||
#
|
||||
# Fix: rewrite the URL to hit GCS directly with a path-style bucket
|
||||
# reference, where the standard GCS cert is genuinely valid. Same data,
|
||||
# verified TLS, no operator-side workaround needed.
|
||||
def _gcs_direct_gdelt_url(url: str) -> str:
|
||||
"""If ``url`` points at data.gdeltproject.org, return the equivalent
|
||||
GCS-direct URL. Otherwise return the URL unchanged."""
|
||||
prefix = "://data.gdeltproject.org/"
|
||||
if prefix in url:
|
||||
return url.replace(prefix, "://storage.googleapis.com/data.gdeltproject.org/", 1)
|
||||
return url
|
||||
|
||||
|
||||
def _download_gdelt_export(url):
|
||||
"""Download a single GDELT export file, return bytes or None."""
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=15)
|
||||
res = fetch_with_curl(_gcs_direct_gdelt_url(url), timeout=15)
|
||||
if res.status_code == 200:
|
||||
return res.content
|
||||
except (ConnectionError, TimeoutError, OSError): # non-critical
|
||||
@@ -620,8 +646,12 @@ def fetch_global_military_incidents():
|
||||
# HTTPS is used to prevent passive network observers from injecting
|
||||
# poisoned export records into the global incident map via MITM.
|
||||
# GDELT serves the same content over HTTPS as HTTP.
|
||||
# Use the GCS-direct URL because data.gdeltproject.org's CNAME
|
||||
# serves a wildcard *.storage.googleapis.com cert that legitimately
|
||||
# doesn't cover the GDELT hostname. See _gcs_direct_gdelt_url above.
|
||||
index_res = fetch_with_curl(
|
||||
"https://data.gdeltproject.org/gdeltv2/lastupdate.txt", timeout=10
|
||||
_gcs_direct_gdelt_url("https://data.gdeltproject.org/gdeltv2/lastupdate.txt"),
|
||||
timeout=10,
|
||||
)
|
||||
if index_res.status_code != 200:
|
||||
logger.error(f"GDELT lastupdate failed: {index_res.status_code}")
|
||||
|
||||
@@ -317,6 +317,39 @@ class DMRelay:
|
||||
def _self_mailbox_limit(self) -> int:
|
||||
return max(1, int(self._settings().MESH_DM_SELF_MAILBOX_LIMIT))
|
||||
|
||||
def _per_sender_pending_limit(self) -> int:
|
||||
"""Anti-spam cap on UNACKED messages a single sender can have parked
|
||||
in a single recipient mailbox at any one time. See ``config.py``
|
||||
``MESH_DM_PENDING_PER_SENDER_LIMIT`` for the threat model — this
|
||||
rule is enforced both at ``deposit`` (local) and at
|
||||
``accept_replica`` (peer push acceptance), making it a network
|
||||
rule rather than a client-side honor system."""
|
||||
try:
|
||||
limit = int(getattr(self._settings(), "MESH_DM_PENDING_PER_SENDER_LIMIT", 2) or 2)
|
||||
except (TypeError, ValueError):
|
||||
limit = 2
|
||||
return max(1, limit)
|
||||
|
||||
def _per_sender_pending_count(
|
||||
self,
|
||||
*,
|
||||
mailbox_key: str,
|
||||
sender_block_ref: str,
|
||||
) -> int:
|
||||
"""Count UNACKED messages from ``sender_block_ref`` currently parked
|
||||
in ``mailbox_key``. Caller already holds ``self._lock``.
|
||||
|
||||
Messages that have been claimed/acked are removed from the mailbox
|
||||
list (see ``claim_message_ids``), so anything still here is by
|
||||
definition unacked. We count by exact ``sender_block_ref`` match
|
||||
— that's the per-pair sender identity used for blocking too, so
|
||||
the cap is naturally per-(sender, recipient).
|
||||
"""
|
||||
if not mailbox_key or not sender_block_ref:
|
||||
return 0
|
||||
messages = self._mailboxes.get(mailbox_key, [])
|
||||
return sum(1 for m in messages if m.sender_block_ref == sender_block_ref)
|
||||
|
||||
def _nonce_ttl_seconds(self) -> int:
|
||||
return max(30, int(self._settings().MESH_DM_NONCE_TTL_S))
|
||||
|
||||
@@ -1515,6 +1548,29 @@ class DMRelay:
|
||||
if len(self._mailboxes[mailbox_key]) >= self._mailbox_limit_for_class(delivery_class):
|
||||
metrics_inc("dm_drop_full")
|
||||
return {"ok": False, "detail": "Recipient mailbox full"}
|
||||
# Anti-spam: per-(sender, recipient) cap on unacked messages.
|
||||
# A sender who already has the configured number of messages
|
||||
# parked in this mailbox can't deposit more until the recipient
|
||||
# pulls (acks) at least one. The same cap is re-enforced on
|
||||
# inbound replication in ``accept_replica`` so this rule isn't
|
||||
# bypassable by patching out the local check on a hostile
|
||||
# sender's relay — see config.py
|
||||
# MESH_DM_PENDING_PER_SENDER_LIMIT for the threat model.
|
||||
per_sender_limit = self._per_sender_pending_limit()
|
||||
pending = self._per_sender_pending_count(
|
||||
mailbox_key=mailbox_key,
|
||||
sender_block_ref=sender_block_ref,
|
||||
)
|
||||
if pending >= per_sender_limit:
|
||||
metrics_inc("dm_drop_per_sender_cap")
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": (
|
||||
f"Recipient already has {pending} unread message"
|
||||
f"{'s' if pending != 1 else ''} from you. Wait for "
|
||||
"them to read your messages before sending more."
|
||||
),
|
||||
}
|
||||
if not msg_id:
|
||||
msg_id = f"dm_{int(time.time() * 1000)}_{secrets.token_hex(6)}"
|
||||
elif any(m.msg_id == msg_id for m in self._mailboxes[mailbox_key]):
|
||||
@@ -1539,8 +1595,245 @@ class DMRelay:
|
||||
)
|
||||
self._stats["messages_in_memory"] = sum(len(v) for v in self._mailboxes.values())
|
||||
self._save()
|
||||
# Cross-node mailbox replication: push the freshly-stored
|
||||
# envelope to every authenticated relay peer so the recipient
|
||||
# can log into ANY node and find their messages. The push is
|
||||
# async (fire-and-forget thread) so deposit() returns
|
||||
# immediately — slow Tor peers can't block the sender's UX.
|
||||
# Each receiving peer re-enforces the per-sender cap on
|
||||
# acceptance, so hostile relays can't widen the cap.
|
||||
try:
|
||||
envelope_for_push = self.envelope_for_replication(
|
||||
mailbox_key=mailbox_key, msg_id=msg_id,
|
||||
)
|
||||
if envelope_for_push:
|
||||
self._replicate_envelope_to_peers_async(
|
||||
envelope=envelope_for_push,
|
||||
)
|
||||
except Exception:
|
||||
metrics_inc("dm_replication_push_error")
|
||||
return {"ok": True, "msg_id": msg_id}
|
||||
|
||||
def accept_replica(
|
||||
self,
|
||||
*,
|
||||
envelope: dict[str, Any],
|
||||
originating_peer_url: str = "",
|
||||
) -> dict[str, Any]:
|
||||
"""Receive a DM envelope replicated from a peer relay.
|
||||
|
||||
Cross-node mailbox replication entry point. When a sender's local
|
||||
relay accepts a ``deposit`` and pushes the envelope to
|
||||
``MESH_RELAY_PEERS`` (so the recipient can log into any peer
|
||||
node and find their messages), each receiving peer calls
|
||||
``accept_replica`` to ingest it.
|
||||
|
||||
The per-(sender, recipient) cap is re-enforced HERE. That's what
|
||||
makes the rule a NETWORK rule rather than a client-side honor
|
||||
system: a hostile sender who patches out the local ``deposit``
|
||||
check still can't get a 3rd unacked message to spread, because
|
||||
every honest peer enforces the same cap on inbound replicas.
|
||||
Result: hostile relays can hold extras locally, but those extras
|
||||
never reach any node a legitimate recipient is polling from.
|
||||
|
||||
Returns the same shape as ``deposit`` so the calling endpoint can
|
||||
forward the result back to the originating peer.
|
||||
"""
|
||||
if not isinstance(envelope, dict):
|
||||
return {"ok": False, "detail": "envelope must be an object"}
|
||||
msg_id = str(envelope.get("msg_id", "") or "").strip()
|
||||
mailbox_key = str(envelope.get("mailbox_key", "") or "").strip()
|
||||
sender_block_ref = str(envelope.get("sender_block_ref", "") or "").strip()
|
||||
ciphertext = str(envelope.get("ciphertext", "") or "")
|
||||
if not msg_id or not mailbox_key or not sender_block_ref or not ciphertext:
|
||||
return {"ok": False, "detail": "envelope missing required fields"}
|
||||
|
||||
with self._lock:
|
||||
self._refresh_from_shared_relay()
|
||||
self._cleanup_expired()
|
||||
|
||||
# Idempotent — if we already hold this exact msg_id, the
|
||||
# replication round-tripped or a peer pushed the same
|
||||
# envelope through multiple paths. Accept silently.
|
||||
if any(m.msg_id == msg_id for m in self._mailboxes.get(mailbox_key, [])):
|
||||
metrics_inc("dm_replica_duplicate")
|
||||
return {"ok": True, "msg_id": msg_id, "duplicate": True}
|
||||
|
||||
# Same per-class cap as the deposit path — defense in depth
|
||||
# against a peer that wraps a "deposit" as a "replica" to
|
||||
# bypass the class limit.
|
||||
delivery_class = str(envelope.get("delivery_class", "") or "")
|
||||
if delivery_class in ("request", "shared", "self"):
|
||||
class_limit = self._mailbox_limit_for_class(delivery_class)
|
||||
else:
|
||||
class_limit = self._shared_mailbox_limit()
|
||||
if len(self._mailboxes.get(mailbox_key, [])) >= class_limit:
|
||||
metrics_inc("dm_replica_drop_full")
|
||||
return {"ok": False, "detail": "Recipient mailbox full"}
|
||||
|
||||
# THE network rule: per-(sender, recipient) anti-spam cap.
|
||||
per_sender_limit = self._per_sender_pending_limit()
|
||||
pending = self._per_sender_pending_count(
|
||||
mailbox_key=mailbox_key,
|
||||
sender_block_ref=sender_block_ref,
|
||||
)
|
||||
if pending >= per_sender_limit:
|
||||
metrics_inc("dm_replica_drop_per_sender_cap")
|
||||
# Returning a structured rejection — the sender's relay
|
||||
# learns its envelope was rejected by an honest peer and
|
||||
# can stop trying to push it.
|
||||
return {
|
||||
"ok": False,
|
||||
"detail": (
|
||||
"Per-sender cap reached on this relay; refusing replica"
|
||||
),
|
||||
"cap_violation": True,
|
||||
"pending": pending,
|
||||
"limit": per_sender_limit,
|
||||
}
|
||||
|
||||
# Accept the replica into the local mailbox.
|
||||
self._mailboxes[mailbox_key].append(
|
||||
DMMessage(
|
||||
sender_id=str(envelope.get("sender_id", "") or ""),
|
||||
ciphertext=ciphertext,
|
||||
timestamp=float(envelope.get("timestamp", time.time()) or time.time()),
|
||||
msg_id=msg_id,
|
||||
delivery_class=str(envelope.get("delivery_class", "shared") or "shared"),
|
||||
sender_seal=str(envelope.get("sender_seal", "") or ""),
|
||||
relay_salt=str(envelope.get("relay_salt", "") or ""),
|
||||
sender_block_ref=sender_block_ref,
|
||||
payload_format=str(envelope.get("payload_format", "dm1") or "dm1"),
|
||||
session_welcome=str(envelope.get("session_welcome", "") or ""),
|
||||
)
|
||||
)
|
||||
self._stats["messages_in_memory"] = sum(len(v) for v in self._mailboxes.values())
|
||||
self._save()
|
||||
metrics_inc("dm_replica_accepted")
|
||||
return {"ok": True, "msg_id": msg_id}
|
||||
|
||||
def _replicate_envelope_to_peers_async(
|
||||
self,
|
||||
*,
|
||||
envelope: dict[str, Any],
|
||||
) -> None:
|
||||
"""Push an outbound DM envelope to every authenticated relay peer.
|
||||
|
||||
Fire-and-forget: spawned in a background thread so ``deposit``
|
||||
returns to the caller immediately. Per-peer errors are logged
|
||||
and swallowed — the sender's UX must not block on slow Tor
|
||||
peers, and a peer that's down today gets the next message
|
||||
whenever it comes back. Inbound recipient polling from a healthy
|
||||
peer keeps the system functional during peer failures.
|
||||
|
||||
Each peer is authed with the existing per-peer HMAC pattern
|
||||
(#256) — same headers and key resolver gate-message replication
|
||||
uses, so a hostile node that doesn't know any peer's HMAC key
|
||||
can't impersonate a legitimate relay.
|
||||
"""
|
||||
import threading
|
||||
|
||||
def _do_push():
|
||||
try:
|
||||
import hashlib
|
||||
import hmac
|
||||
import requests as _requests
|
||||
|
||||
from services.mesh.mesh_crypto import (
|
||||
normalize_peer_url,
|
||||
resolve_peer_key_for_url,
|
||||
)
|
||||
from services.mesh.mesh_router import (
|
||||
authenticated_push_peer_urls,
|
||||
)
|
||||
|
||||
peers = authenticated_push_peer_urls()
|
||||
if not peers:
|
||||
return
|
||||
|
||||
payload = json.dumps(
|
||||
{"envelope": envelope},
|
||||
separators=(",", ":"),
|
||||
ensure_ascii=False,
|
||||
).encode("utf-8")
|
||||
|
||||
timeout = max(
|
||||
1,
|
||||
int(getattr(self._settings(), "MESH_RELAY_PUSH_TIMEOUT_S", 10) or 10),
|
||||
)
|
||||
|
||||
for peer_url in peers:
|
||||
try:
|
||||
normalized = normalize_peer_url(peer_url)
|
||||
headers = {"Content-Type": "application/json"}
|
||||
peer_key = resolve_peer_key_for_url(normalized)
|
||||
if peer_key:
|
||||
headers["X-Peer-Url"] = normalized
|
||||
headers["X-Peer-HMAC"] = hmac.new(
|
||||
peer_key, payload, hashlib.sha256
|
||||
).hexdigest()
|
||||
url = f"{peer_url}/api/mesh/dm/replicate-envelope"
|
||||
resp = _requests.post(
|
||||
url, data=payload, timeout=timeout, headers=headers,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
metrics_inc("dm_replication_push_ok")
|
||||
else:
|
||||
# 4xx including the structured cap_violation
|
||||
# rejection from accept_replica — sender's
|
||||
# relay learns and stops retrying this msg_id.
|
||||
metrics_inc("dm_replication_push_rejected")
|
||||
except Exception:
|
||||
# Per-peer failure is non-fatal — log to metrics
|
||||
# but don't break the loop. Other peers and a
|
||||
# future retry can still propagate the envelope.
|
||||
metrics_inc("dm_replication_push_error")
|
||||
continue
|
||||
except Exception:
|
||||
# Outer guard — never let replication errors propagate
|
||||
# back to the sender's deposit() caller.
|
||||
metrics_inc("dm_replication_push_error")
|
||||
|
||||
thread = threading.Thread(
|
||||
target=_do_push,
|
||||
name="dm-replicate-push",
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
|
||||
def envelope_for_replication(
|
||||
self,
|
||||
*,
|
||||
mailbox_key: str,
|
||||
msg_id: str,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Return the wire-form envelope for a stored message, suitable
|
||||
for POSTing to a peer relay's replicate-envelope endpoint.
|
||||
|
||||
Returns ``None`` if the message isn't in the mailbox (already
|
||||
acked, expired, never existed). The caller holds the
|
||||
responsibility for transport security (Tor SOCKS for .onion
|
||||
peers, per-peer HMAC) and for not leaking the envelope to
|
||||
clearnet peers when private transport is required.
|
||||
"""
|
||||
with self._lock:
|
||||
for m in self._mailboxes.get(mailbox_key, []):
|
||||
if m.msg_id == msg_id:
|
||||
return {
|
||||
"msg_id": m.msg_id,
|
||||
"mailbox_key": mailbox_key,
|
||||
"sender_id": m.sender_id,
|
||||
"sender_block_ref": m.sender_block_ref,
|
||||
"sender_seal": m.sender_seal,
|
||||
"ciphertext": m.ciphertext,
|
||||
"timestamp": m.timestamp,
|
||||
"delivery_class": m.delivery_class,
|
||||
"relay_salt": m.relay_salt,
|
||||
"payload_format": m.payload_format,
|
||||
"session_welcome": m.session_welcome,
|
||||
}
|
||||
return None
|
||||
|
||||
def is_blocked(self, recipient_id: str, sender_id: str) -> bool:
|
||||
with self._lock:
|
||||
self._refresh_from_shared_relay()
|
||||
|
||||
@@ -2,10 +2,64 @@ from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import asdict, dataclass
|
||||
from email.utils import parsedate_to_datetime
|
||||
from datetime import timezone
|
||||
|
||||
from services.mesh.mesh_peer_store import PeerRecord
|
||||
|
||||
|
||||
class PeerSyncRateLimited(Exception):
|
||||
"""Upstream peer returned HTTP 429 — Too Many Requests.
|
||||
|
||||
Carries the ``Retry-After`` header value (parsed to seconds) so
|
||||
the caller can pass it to ``finish_sync(retry_after_s=...)`` and
|
||||
actually wait that long instead of hammering the upstream every
|
||||
60s and keeping its rate-limit bucket full.
|
||||
|
||||
``retry_after_s`` is 0 when the upstream didn't provide a header.
|
||||
Caller should still apply the exponential backoff in that case.
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, retry_after_s: int = 0, status: int = 429):
|
||||
super().__init__(message)
|
||||
self.retry_after_s = max(0, int(retry_after_s or 0))
|
||||
self.status = int(status or 429)
|
||||
|
||||
|
||||
def parse_retry_after_header(header_value: str, *, now: float | None = None) -> int:
|
||||
"""Parse the ``Retry-After`` HTTP header.
|
||||
|
||||
Two valid forms per RFC 7231 §7.1.3:
|
||||
|
||||
* Delay-seconds: a non-negative integer (e.g. ``Retry-After: 120``)
|
||||
* HTTP-date: an absolute time (e.g. ``Retry-After: Wed, 21 Oct 2026 07:28:00 GMT``)
|
||||
|
||||
Returns the wait in **seconds from now**. Unparseable / empty headers
|
||||
return 0 (caller falls back to exponential backoff). Clamped at a
|
||||
sane upper bound (1 hour) so a typo'd or hostile peer can't pin us
|
||||
silent for days.
|
||||
"""
|
||||
value = str(header_value or "").strip()
|
||||
if not value:
|
||||
return 0
|
||||
upper_bound = 3600 # never trust a peer to silence us > 1h
|
||||
# Form 1: pure integer seconds.
|
||||
if value.isdigit():
|
||||
return min(max(0, int(value)), upper_bound)
|
||||
# Form 2: HTTP-date.
|
||||
try:
|
||||
target = parsedate_to_datetime(value)
|
||||
if target is None:
|
||||
return 0
|
||||
if target.tzinfo is None:
|
||||
target = target.replace(tzinfo=timezone.utc)
|
||||
current = float(now if now is not None else time.time())
|
||||
delta = int(target.timestamp() - current)
|
||||
return min(max(0, delta), upper_bound)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SyncWorkerState:
|
||||
last_sync_started_at: int = 0
|
||||
@@ -72,6 +126,59 @@ def begin_sync(
|
||||
)
|
||||
|
||||
|
||||
def _failure_backoff_seconds(
|
||||
*,
|
||||
base_backoff_s: int,
|
||||
consecutive_failures: int,
|
||||
retry_after_s: int,
|
||||
cap_s: int = 1800,
|
||||
) -> int:
|
||||
"""Compute the next-attempt delay after a failed sync.
|
||||
|
||||
Two inputs combine:
|
||||
|
||||
* ``retry_after_s`` — when an upstream peer answered HTTP 429
|
||||
with a ``Retry-After`` header, we honor it exactly. Continuing
|
||||
to hammer the upstream every 60s is the bug this fix exists to
|
||||
close: it keeps the upstream's rate-limit bucket full
|
||||
indefinitely and no sync ever lands.
|
||||
|
||||
* Exponential growth on ``consecutive_failures`` — even without an
|
||||
explicit Retry-After, repeated failures should slow us down. The
|
||||
first failure waits ``base`` (preserves pre-fix behavior for
|
||||
one-off blips). Each subsequent failure doubles the wait, capped
|
||||
to ``cap_s`` (default 30 minutes). With base=60 and cap=1800,
|
||||
the schedule is 60s → 120s → 240s → 480s → 960s → 1800s →
|
||||
1800s → … .
|
||||
|
||||
The actual delay is the MAX of the two — whichever asks for more
|
||||
patience wins. ``retry_after_s == 0`` (no header) falls back to
|
||||
pure exponential. An aggressive ``Retry-After`` (say 600s while
|
||||
we're only at 1 failure) wins over the exponential ladder.
|
||||
"""
|
||||
base = max(0, int(base_backoff_s or 0))
|
||||
failures = max(0, int(consecutive_failures or 0))
|
||||
cap = max(0, int(cap_s or 0))
|
||||
retry_after = max(0, int(retry_after_s or 0))
|
||||
# ``cap_s=0`` explicitly disables the exponential ladder entirely
|
||||
# — operators who want the pre-fix "honor Retry-After only" behavior
|
||||
# can set this. The default cap of 1800s is what saturates the
|
||||
# ladder at the 5th-6th failure for base=60.
|
||||
if cap == 0:
|
||||
return retry_after
|
||||
# 2^(failures-1) — so failure #1 = base (preserves the pre-fix
|
||||
# default for transient blips), failure #2 = 2*base, etc. Cap on
|
||||
# the exponent (16) is defense against integer overflow on a
|
||||
# hostile or very large failures counter.
|
||||
if base > 0 and failures > 0:
|
||||
exponent = min(max(0, failures - 1), 16)
|
||||
grown = base * (2 ** exponent)
|
||||
else:
|
||||
grown = 0
|
||||
exponential = min(max(0, grown), cap)
|
||||
return max(exponential, retry_after)
|
||||
|
||||
|
||||
def finish_sync(
|
||||
state: SyncWorkerState,
|
||||
*,
|
||||
@@ -83,7 +190,26 @@ def finish_sync(
|
||||
now: float | None = None,
|
||||
interval_s: int = 300,
|
||||
failure_backoff_s: int = 60,
|
||||
retry_after_s: int = 0,
|
||||
failure_backoff_cap_s: int = 1800,
|
||||
) -> SyncWorkerState:
|
||||
"""Finalise a sync attempt and compute when the next one should run.
|
||||
|
||||
New args (added for the 429 retry storm fix):
|
||||
|
||||
* ``retry_after_s`` — if the peer responded with HTTP 429 + a
|
||||
``Retry-After`` header, pass that value here. ``finish_sync``
|
||||
will use ``max(exponential, retry_after_s)`` for the delay so
|
||||
we never hammer a peer that asked us to back off.
|
||||
* ``failure_backoff_cap_s`` — upper bound on the exponential
|
||||
ladder. Default 1800 (30 min) — keeps a sync queue from going
|
||||
silent for hours while still cutting the request rate to
|
||||
something the upstream can absorb.
|
||||
|
||||
The pre-fix behavior (constant 60s on every failure) is recoverable
|
||||
by passing ``failure_backoff_cap_s=0`` and ``retry_after_s=0``, but
|
||||
there's no reason to.
|
||||
"""
|
||||
timestamp = int(now if now is not None else time.time())
|
||||
if ok:
|
||||
return SyncWorkerState(
|
||||
@@ -99,17 +225,25 @@ def finish_sync(
|
||||
consecutive_failures=0,
|
||||
)
|
||||
|
||||
next_failures = state.consecutive_failures + 1
|
||||
delay_s = _failure_backoff_seconds(
|
||||
base_backoff_s=failure_backoff_s,
|
||||
consecutive_failures=next_failures,
|
||||
retry_after_s=retry_after_s,
|
||||
cap_s=failure_backoff_cap_s,
|
||||
)
|
||||
|
||||
return SyncWorkerState(
|
||||
last_sync_started_at=state.last_sync_started_at,
|
||||
last_sync_finished_at=timestamp,
|
||||
last_sync_ok_at=state.last_sync_ok_at,
|
||||
next_sync_due_at=timestamp + max(0, int(failure_backoff_s or 0)),
|
||||
next_sync_due_at=timestamp + delay_s,
|
||||
last_peer_url=peer_url or state.last_peer_url,
|
||||
last_error=str(error or "").strip(),
|
||||
last_outcome="fork" if fork_detected else "error",
|
||||
current_head=current_head or state.current_head,
|
||||
fork_detected=bool(fork_detected),
|
||||
consecutive_failures=state.consecutive_failures + 1,
|
||||
consecutive_failures=next_failures,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,9 @@ import subprocess
|
||||
import shutil
|
||||
import time
|
||||
import threading
|
||||
import uuid
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
@@ -20,14 +22,211 @@ _session.mount("https://", HTTPAdapter(max_retries=_retry, pool_maxsize=20))
|
||||
_session.mount("http://", HTTPAdapter(max_retries=_retry, pool_maxsize=10))
|
||||
|
||||
|
||||
# Default outbound User-Agent. Generic by design — does NOT include any
|
||||
# personal contact info or a fork-specific repo URL. Operators who run a
|
||||
# public-facing relay and want to identify themselves to upstreams (e.g.
|
||||
# for Nominatim / weather.gov usage-policy compliance) can override this
|
||||
# via the SHADOWBROKER_USER_AGENT env var.
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-operator outbound identification
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Issues #289 / #290 / #291 and the retrofit of PR #284 (#218 / #219 / #220):
|
||||
# every third-party API the backend calls used to identify itself with a
|
||||
# single "Shadowbroker" aggregate User-Agent. From the upstream's
|
||||
# perspective, that meant every Shadowbroker install in the world looked
|
||||
# like one giant entity hammering them. If one install misbehaved, the
|
||||
# upstream's only recourse was to block "Shadowbroker" as a whole — which
|
||||
# would take out every other install too.
|
||||
#
|
||||
# Fix: give each install a stable pseudonymous handle and include it in
|
||||
# the User-Agent. Now an upstream can rate-limit or block the offending
|
||||
# operator without affecting anyone else.
|
||||
#
|
||||
# The handle:
|
||||
#
|
||||
# - Is auto-generated on first call if no `OPERATOR_HANDLE` is configured
|
||||
# (looks like "operator-7f3a92" — 6 hex chars from uuid4()).
|
||||
# - Is persisted to ``backend/data/operator_handle.json`` so it survives
|
||||
# restarts. Under Docker compose that file lives in the volume mount
|
||||
# alongside `carrier_cache.json` and the other persistent state.
|
||||
# - Can be overridden by the operator via the `OPERATOR_HANDLE` setting
|
||||
# (env var or settings UI). Operators with their own GitHub handle,
|
||||
# organization name, etc. can use that for traceability.
|
||||
# - Is NEVER mixed into mesh / Wormhole / Infonet identity. This layer is
|
||||
# strictly for public third-party API attribution.
|
||||
|
||||
_SHADOWBROKER_VERSION = "0.9"
|
||||
_OPERATOR_HANDLE_FILE = (
|
||||
Path(__file__).parent.parent / "data" / "operator_handle.json"
|
||||
)
|
||||
_OPERATOR_HANDLE_CACHE: str = ""
|
||||
_OPERATOR_HANDLE_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _generate_operator_handle() -> str:
|
||||
"""Produce a stable pseudonymous handle for first-launch installs.
|
||||
|
||||
Format: ``operator-7f3a92`` (6 hex chars from a fresh uuid4()).
|
||||
Distinct per install. Carries no real-world identity by default —
|
||||
operators who want one can override via ``OPERATOR_HANDLE``.
|
||||
|
||||
Note: the prefix is deliberately neutral. Earlier drafts used
|
||||
``shadow-`` which, while accurate to the project name, looks
|
||||
exactly like the kind of pattern a third-party abuse-detection
|
||||
system would auto-block as suspicious. ``operator-`` describes
|
||||
what the value actually is and doesn't pattern-match malware.
|
||||
"""
|
||||
return f"operator-{uuid.uuid4().hex[:6]}"
|
||||
|
||||
|
||||
def _load_persisted_operator_handle() -> str:
|
||||
"""Return the previously-saved handle from disk, or empty if none.
|
||||
|
||||
Reads ``backend/data/operator_handle.json`` if it exists. Any read
|
||||
error returns empty so a fresh handle gets generated rather than
|
||||
crashing the request.
|
||||
"""
|
||||
try:
|
||||
if _OPERATOR_HANDLE_FILE.exists():
|
||||
data = json.loads(_OPERATOR_HANDLE_FILE.read_text(encoding="utf-8"))
|
||||
return str(data.get("handle", "") or "").strip()
|
||||
except (OSError, json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _persist_operator_handle(handle: str) -> None:
|
||||
"""Atomically save the auto-generated handle so subsequent restarts
|
||||
use the same one. Failure to persist is non-fatal — the request still
|
||||
succeeds with the in-memory handle, we just may generate a different
|
||||
one on the next process restart."""
|
||||
try:
|
||||
_OPERATOR_HANDLE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = _OPERATOR_HANDLE_FILE.with_suffix(_OPERATOR_HANDLE_FILE.suffix + ".tmp")
|
||||
tmp.write_text(
|
||||
json.dumps({"handle": handle, "_meta": {
|
||||
"purpose": "Per-install operator handle for outbound third-party API attribution.",
|
||||
"see": "backend/services/network_utils.py:outbound_user_agent",
|
||||
}}, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
os.replace(tmp, _OPERATOR_HANDLE_FILE)
|
||||
except OSError as exc:
|
||||
logger.debug("Could not persist operator_handle (continuing in-memory): %s", exc)
|
||||
|
||||
|
||||
def get_operator_handle() -> str:
|
||||
"""Return the stable per-install operator handle.
|
||||
|
||||
Resolution order:
|
||||
1. ``OPERATOR_HANDLE`` setting (env var / settings UI) if non-empty.
|
||||
2. Process-cached value from previous call this run.
|
||||
3. Value persisted to ``operator_handle.json`` (from a previous run).
|
||||
4. Newly generated pseudonymous handle, persisted to disk.
|
||||
|
||||
The handle is normalized: stripped of whitespace, lowercased,
|
||||
non-alphanumeric chars (except ``-`` and ``_``) replaced with ``-``.
|
||||
This both sanitizes any HTTP-header-unsafe characters AND prevents
|
||||
the operator from impersonating real third-party projects via
|
||||
inventive whitespace.
|
||||
"""
|
||||
global _OPERATOR_HANDLE_CACHE
|
||||
with _OPERATOR_HANDLE_LOCK:
|
||||
# 1. Configured override always wins.
|
||||
configured = ""
|
||||
try:
|
||||
from services.config import get_settings
|
||||
|
||||
configured = str(getattr(get_settings(), "OPERATOR_HANDLE", "") or "").strip()
|
||||
except Exception:
|
||||
configured = ""
|
||||
if configured:
|
||||
return _normalize_handle(configured)
|
||||
|
||||
# 2. In-memory cache (fast path for repeated calls).
|
||||
if _OPERATOR_HANDLE_CACHE:
|
||||
return _OPERATOR_HANDLE_CACHE
|
||||
|
||||
# 3. On-disk handle from a previous run.
|
||||
persisted = _load_persisted_operator_handle()
|
||||
if persisted:
|
||||
_OPERATOR_HANDLE_CACHE = _normalize_handle(persisted)
|
||||
return _OPERATOR_HANDLE_CACHE
|
||||
|
||||
# 4. Generate, persist, return.
|
||||
fresh = _generate_operator_handle()
|
||||
_persist_operator_handle(fresh)
|
||||
_OPERATOR_HANDLE_CACHE = fresh
|
||||
return fresh
|
||||
|
||||
|
||||
def _normalize_handle(raw: str) -> str:
|
||||
"""Strip whitespace, lowercase, replace unsafe characters with dashes."""
|
||||
safe = "".join(
|
||||
ch if (ch.isalnum() or ch in "-_") else "-"
|
||||
for ch in raw.strip().lower()
|
||||
)
|
||||
# Collapse runs of dashes and trim to a reasonable length so an
|
||||
# operator can't make our outbound logs unreadable.
|
||||
while "--" in safe:
|
||||
safe = safe.replace("--", "-")
|
||||
safe = safe.strip("-")
|
||||
return safe[:48] if safe else "anonymous"
|
||||
|
||||
|
||||
_CONTACT_URL = "https://github.com/BigBodyCobain/Shadowbroker/issues"
|
||||
|
||||
|
||||
def outbound_user_agent(purpose: str = "") -> str:
|
||||
"""Build a User-Agent for an outbound third-party HTTP request.
|
||||
|
||||
Returns something like::
|
||||
|
||||
Shadowbroker/0.9 (operator: shadow-7f3a92; purpose: wikipedia;
|
||||
+https://github.com/BigBodyCobain/Shadowbroker/issues)
|
||||
|
||||
The ``purpose`` is optional but recommended — it tells the upstream
|
||||
what feature of ours is making the call (``wikipedia``, ``openmhz``,
|
||||
``nominatim``, etc.), which makes their logs and our complaints
|
||||
actionable.
|
||||
|
||||
Every outbound call in the backend that previously sent a custom
|
||||
User-Agent should call this helper instead. Centralizing here means:
|
||||
- one place to change the contact URL,
|
||||
- one place to bump the version on release,
|
||||
- one place a Wikimedia / OpenMHz operator can reach to ask for
|
||||
the project to back off, with a per-install handle so they can
|
||||
target the specific install instead of the project as a whole.
|
||||
"""
|
||||
handle = get_operator_handle()
|
||||
if purpose:
|
||||
purpose_clean = _normalize_handle(purpose)
|
||||
return (
|
||||
f"Shadowbroker/{_SHADOWBROKER_VERSION} "
|
||||
f"(operator: {handle}; purpose: {purpose_clean}; +{_CONTACT_URL})"
|
||||
)
|
||||
return (
|
||||
f"Shadowbroker/{_SHADOWBROKER_VERSION} "
|
||||
f"(operator: {handle}; +{_CONTACT_URL})"
|
||||
)
|
||||
|
||||
|
||||
def _reset_operator_handle_cache_for_tests() -> None:
|
||||
"""Test-only: invalidate the in-memory cache so a test can set a
|
||||
new ``OPERATOR_HANDLE`` env var and see it picked up immediately."""
|
||||
global _OPERATOR_HANDLE_CACHE
|
||||
with _OPERATOR_HANDLE_LOCK:
|
||||
_OPERATOR_HANDLE_CACHE = ""
|
||||
|
||||
|
||||
# Default outbound User-Agent. Retained for backwards compatibility with
|
||||
# call sites that haven't been migrated to ``outbound_user_agent()`` yet.
|
||||
# Operators who want full per-install attribution should set the
|
||||
# ``OPERATOR_HANDLE`` setting and migrate call sites incrementally.
|
||||
#
|
||||
# Operators who run a public-facing relay can also override the whole UA
|
||||
# string via the ``SHADOWBROKER_USER_AGENT`` env var. That override
|
||||
# completely bypasses the per-operator helper; only use it if you know
|
||||
# what you're doing.
|
||||
DEFAULT_USER_AGENT = os.environ.get(
|
||||
"SHADOWBROKER_USER_AGENT",
|
||||
"ShadowBroker-OSINT/0.9",
|
||||
f"Shadowbroker/{_SHADOWBROKER_VERSION}",
|
||||
)
|
||||
|
||||
# Find bash for curl fallback — Git bash's curl has the TLS features
|
||||
|
||||
@@ -2,14 +2,34 @@ import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import logging
|
||||
from cachetools import cached, TTLCache
|
||||
import cloudscraper
|
||||
import reverse_geocoder as rg
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_OPENMHZ_AUDIO_HOSTS = {"media.openmhz.com", "media2.openmhz.com", "media3.openmhz.com"}
|
||||
|
||||
|
||||
# Round 7a / Issues #289, #290, #291 (tg12 audit):
|
||||
# We previously sent a spoofed Chrome User-Agent and (for OpenMHz) used
|
||||
# cloudscraper to bypass anti-bot challenges. Both are dishonest and ToS-
|
||||
# unfriendly. We now send the per-install Shadowbroker UA — the upstream
|
||||
# can identify us, rate-limit us per install, and contact us if needed.
|
||||
#
|
||||
# If the upstream actively blocks our honest UA, the feature degrades
|
||||
# gracefully (returns an empty list / cached results) rather than
|
||||
# escalating to deception.
|
||||
|
||||
|
||||
def _broadcastify_user_agent() -> str:
|
||||
return outbound_user_agent("broadcastify")
|
||||
|
||||
|
||||
def _openmhz_user_agent() -> str:
|
||||
return outbound_user_agent("openmhz")
|
||||
|
||||
# Cache the top feeds for 5 minutes so we don't hammer Broadcastify
|
||||
radio_cache = TTLCache(maxsize=1, ttl=300)
|
||||
|
||||
@@ -22,8 +42,12 @@ def get_top_broadcastify_feeds():
|
||||
"""
|
||||
logger.info("Scraping Broadcastify Top Feeds (Cache Miss)")
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
# Issue #289 (tg12) + Round 7a: identify ourselves honestly as a
|
||||
# per-install Shadowbroker scraper. Broadcastify can rate-limit
|
||||
# us per install or block us; either way we stop pretending to be
|
||||
# a browser. If they block, the panel degrades gracefully.
|
||||
"User-Agent": _broadcastify_user_agent(),
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
}
|
||||
|
||||
@@ -89,21 +113,32 @@ openmhz_systems_cache = TTLCache(maxsize=1, ttl=3600)
|
||||
|
||||
@cached(openmhz_systems_cache)
|
||||
def get_openmhz_systems():
|
||||
"""Fetches the full directory of OpenMHZ systems."""
|
||||
logger.info("Scraping OpenMHZ Systems (Cache Miss)")
|
||||
scraper = cloudscraper.create_scraper(
|
||||
browser={"browser": "chrome", "platform": "windows", "desktop": True}
|
||||
)
|
||||
"""Fetches the full directory of OpenMHZ systems.
|
||||
|
||||
Issue #290 (tg12) + Round 7a: replaced cloudscraper-based Chrome
|
||||
impersonation with an honest per-install Shadowbroker User-Agent.
|
||||
If OpenMHz's Cloudflare layer blocks honest traffic, we accept
|
||||
that degradation (return empty list) rather than spoof a browser.
|
||||
"""
|
||||
logger.info("Fetching OpenMHZ Systems (Cache Miss)")
|
||||
try:
|
||||
res = scraper.get("https://api.openmhz.com/systems", timeout=15)
|
||||
res = requests.get(
|
||||
"https://api.openmhz.com/systems",
|
||||
timeout=15,
|
||||
headers={"User-Agent": _openmhz_user_agent(), "Accept": "application/json"},
|
||||
)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
# Return list of systems
|
||||
return data.get("systems", []) if isinstance(data, dict) else []
|
||||
if res.status_code in (403, 503):
|
||||
logger.warning(
|
||||
"OpenMHZ returned %s for systems directory — Cloudflare may "
|
||||
"be blocking our honest UA. Feature degrades to empty result.",
|
||||
res.status_code,
|
||||
)
|
||||
return []
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.error(f"OpenMHZ Systems Scrape Exception: {e}")
|
||||
logger.error(f"OpenMHZ Systems Fetch Exception: {e}")
|
||||
return []
|
||||
|
||||
|
||||
@@ -113,21 +148,25 @@ openmhz_calls_cache = TTLCache(maxsize=100, ttl=20)
|
||||
|
||||
@cached(openmhz_calls_cache)
|
||||
def get_recent_openmhz_calls(sys_name: str):
|
||||
"""Fetches the actual audio burst .m4a URLs for a specific system (e.g., 'wmata')."""
|
||||
logger.info(f"Fetching OpenMHZ calls for {sys_name} (Cache Miss)")
|
||||
scraper = cloudscraper.create_scraper(
|
||||
browser={"browser": "chrome", "platform": "windows", "desktop": True}
|
||||
)
|
||||
"""Fetches the actual audio burst .m4a URLs for a specific system (e.g., 'wmata').
|
||||
|
||||
Issue #290 (tg12) + Round 7a: same honest-UA model as
|
||||
``get_openmhz_systems``.
|
||||
"""
|
||||
logger.info(f"Fetching OpenMHZ calls for {sys_name} (Cache Miss)")
|
||||
try:
|
||||
url = f"https://api.openmhz.com/{sys_name}/calls"
|
||||
res = scraper.get(url, timeout=15)
|
||||
res = requests.get(
|
||||
url,
|
||||
timeout=15,
|
||||
headers={"User-Agent": _openmhz_user_agent(), "Accept": "application/json"},
|
||||
)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
return data.get("calls", []) if isinstance(data, dict) else []
|
||||
return []
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.error(f"OpenMHZ Calls Scrape Exception ({sys_name}): {e}")
|
||||
logger.error(f"OpenMHZ Calls Fetch Exception ({sys_name}): {e}")
|
||||
return []
|
||||
|
||||
|
||||
@@ -163,9 +202,11 @@ def openmhz_audio_response(target_url: str):
|
||||
timeout=(5, 20),
|
||||
allow_redirects=False,
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0",
|
||||
# Issue #291 (tg12) + Round 7a: drop spoofed Mozilla
|
||||
# UA and the fake first-party Referer. Identify as
|
||||
# the per-install Shadowbroker proxy honestly.
|
||||
"User-Agent": _openmhz_user_agent(),
|
||||
"Accept": "audio/mpeg,audio/*,*/*;q=0.8",
|
||||
"Referer": "https://openmhz.com/",
|
||||
},
|
||||
)
|
||||
if upstream.is_redirect or upstream.status_code in (301, 302, 303, 307, 308):
|
||||
|
||||
@@ -4,7 +4,7 @@ import concurrent.futures
|
||||
from urllib.parse import quote
|
||||
import requests as _requests
|
||||
from cachetools import TTLCache
|
||||
from services.network_utils import fetch_with_curl, DEFAULT_USER_AGENT
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -15,24 +15,30 @@ dossier_cache = TTLCache(maxsize=500, ttl=86400)
|
||||
# Nominatim requires max 1 req/sec — track last call time
|
||||
_nominatim_last_call = 0.0
|
||||
|
||||
# Issue #218 / #219 (tg12): Wikimedia's User-Agent policy requires API
|
||||
# Issues #218 / #219 (tg12): Wikimedia's User-Agent policy requires API
|
||||
# clients to identify themselves with a stable User-Agent that includes
|
||||
# a contact path. Bare "python-requests/x.y" or generic strings violate
|
||||
# the policy and risk getting blocked. We send the project default UA
|
||||
# (operator-overridable via SHADOWBROKER_USER_AGENT) on EVERY outbound
|
||||
# Wikimedia request, plus the policy-recommended Api-User-Agent which
|
||||
# Wikimedia explicitly accepts on top of the regular UA.
|
||||
# a contact path.
|
||||
#
|
||||
# This is documented and stable so a Wikimedia operator who wants to
|
||||
# rate-limit or contact us has a fixed identifier to grep for.
|
||||
_WIKIMEDIA_REQUEST_HEADERS = {
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
"Api-User-Agent": (
|
||||
f"{DEFAULT_USER_AGENT} "
|
||||
"(+https://github.com/BigBodyCobain/Shadowbroker; "
|
||||
"report issues at /issues)"
|
||||
),
|
||||
}
|
||||
# Round 7a: the original fix in PR #284 used a single project-wide
|
||||
# identifier, which from Wikimedia's perspective made every Shadowbroker
|
||||
# install in the world look like one giant scraper. If one install
|
||||
# misbehaved, their only recourse was to block "Shadowbroker" as a
|
||||
# whole. We now build the headers from ``outbound_user_agent('wikimedia')``
|
||||
# which embeds the per-install operator handle (auto-generated or
|
||||
# operator-chosen), so Wikimedia can rate-limit / contact the specific
|
||||
# install instead of the project.
|
||||
|
||||
|
||||
def _wikimedia_request_headers() -> dict[str, str]:
|
||||
ua = outbound_user_agent("wikimedia")
|
||||
return {
|
||||
"User-Agent": ua,
|
||||
# Browser-JS-style header that Wikimedia's policy explicitly
|
||||
# accepts on top of (or instead of) User-Agent. We send both so
|
||||
# whichever the upstream prefers, the per-operator handle is
|
||||
# always available.
|
||||
"Api-User-Agent": ua,
|
||||
}
|
||||
|
||||
|
||||
def _reverse_geocode_offline(lat: float, lng: float) -> dict:
|
||||
@@ -64,9 +70,7 @@ def _reverse_geocode(lat: float, lng: float) -> dict:
|
||||
f"https://nominatim.openstreetmap.org/reverse?"
|
||||
f"lat={lat}&lon={lng}&format=json&zoom=10&addressdetails=1&accept-language=en"
|
||||
)
|
||||
headers = {
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0 (live-risk-dashboard; contact@shadowbroker.app)"
|
||||
}
|
||||
headers = {"User-Agent": outbound_user_agent("nominatim")}
|
||||
|
||||
for attempt in range(2):
|
||||
# Enforce Nominatim's 1 req/sec policy
|
||||
@@ -146,7 +150,7 @@ def _fetch_wikidata_leader(country_name: str) -> dict:
|
||||
# specific Api-User-Agent that the policy specifically asks
|
||||
# for, since this request originates from a backend service
|
||||
# that proxies on behalf of (potentially many) browser users.
|
||||
res = fetch_with_curl(url, timeout=6, headers=_WIKIMEDIA_REQUEST_HEADERS)
|
||||
res = fetch_with_curl(url, timeout=6, headers=_wikimedia_request_headers())
|
||||
if res.status_code == 200:
|
||||
results = res.json().get("results", {}).get("bindings", [])
|
||||
if results:
|
||||
@@ -174,7 +178,7 @@ def _fetch_local_wiki_summary(place_name: str, country_name: str = "") -> dict:
|
||||
try:
|
||||
# Issue #219 (tg12): identify ourselves to Wikimedia per
|
||||
# their UA policy; see _fetch_wikidata_leader above.
|
||||
res = fetch_with_curl(url, timeout=5, headers=_WIKIMEDIA_REQUEST_HEADERS)
|
||||
res = fetch_with_curl(url, timeout=5, headers=_wikimedia_request_headers())
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
if data.get("type") != "disambiguation":
|
||||
|
||||
@@ -34,6 +34,11 @@ from services.sar.sar_config import (
|
||||
copernicus_token,
|
||||
earthdata_token,
|
||||
)
|
||||
|
||||
|
||||
def _sar_user_agent() -> str:
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("sar-products")
|
||||
from services.sar.sar_normalize import (
|
||||
SarAnomaly,
|
||||
evidence_hash_for_payload,
|
||||
@@ -442,7 +447,7 @@ def _fetch_unosat_packages() -> list[dict[str, Any]]:
|
||||
# HDX CKAN returns 406 without explicit Accept + a browser-ish UA.
|
||||
hdx_headers = {
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "Mozilla/5.0 (compatible; ShadowBroker-SAR/1.0)",
|
||||
"User-Agent": _sar_user_agent(),
|
||||
}
|
||||
try:
|
||||
resp = fetch_with_curl(url, timeout=20, headers=hdx_headers)
|
||||
|
||||
@@ -11,12 +11,21 @@ import requests
|
||||
from datetime import datetime, timedelta
|
||||
from cachetools import TTLCache
|
||||
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache by rounded lat/lon (0.02° grid ~= 2km), TTL 1 hour
|
||||
_sentinel_cache = TTLCache(maxsize=200, ttl=3600)
|
||||
|
||||
|
||||
def _planetary_user_agent() -> str:
|
||||
# Round 7a: per-install handle so Microsoft Planetary Computer can
|
||||
# attribute requests to the specific operator rather than treating
|
||||
# the whole Shadowbroker user base as one entity.
|
||||
return outbound_user_agent("sentinel2-planetary-computer")
|
||||
|
||||
|
||||
def _esri_imagery_fallback(lat: float, lng: float) -> dict:
|
||||
lat_span = 0.18
|
||||
lng_span = 0.24
|
||||
@@ -64,7 +73,7 @@ def search_sentinel2_scene(lat: float, lng: float) -> dict:
|
||||
"https://planetarycomputer.microsoft.com/api/stac/v1/search",
|
||||
json=search_payload,
|
||||
timeout=8,
|
||||
headers={"User-Agent": "ShadowBroker-OSINT/1.0 (live-risk-dashboard)"},
|
||||
headers={"User-Agent": _planetary_user_agent()},
|
||||
)
|
||||
search_res.raise_for_status()
|
||||
data = search_res.json()
|
||||
|
||||
@@ -20,7 +20,11 @@ from cachetools import TTLCache
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_SHODAN_BASE = "https://api.shodan.io"
|
||||
_USER_AGENT = "ShadowBroker/0.9.79 local Shodan connector"
|
||||
# Round 7a: per-install attribution. Shodan already has the operator API
|
||||
# key for billing, but the UA still identifies the install.
|
||||
def _shodan_user_agent():
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("shodan")
|
||||
_REQUEST_TIMEOUT = 15
|
||||
_MIN_INTERVAL_SECONDS = 1.05 # Shodan docs say API plans are rate limited to ~1 req/sec.
|
||||
_DEFAULT_SEARCH_PAGES = 1
|
||||
@@ -179,7 +183,7 @@ def _request(path: str, *, params: dict[str, Any], cache: TTLCache[str, dict[str
|
||||
f"{_SHODAN_BASE}{path}",
|
||||
params=payload,
|
||||
timeout=_REQUEST_TIMEOUT,
|
||||
headers={"User-Agent": _USER_AGENT, "Accept": "application/json"},
|
||||
headers={"User-Agent": _shodan_user_agent(), "Accept": "application/json"},
|
||||
)
|
||||
finally:
|
||||
_last_request_at = time.monotonic()
|
||||
|
||||
@@ -19,6 +19,13 @@ from pathlib import Path
|
||||
import requests
|
||||
from sgp4.api import Satrec, WGS72, jday
|
||||
|
||||
|
||||
|
||||
def _tinygs_user_agent(purpose: str) -> str:
|
||||
"""Round 7a: per-install handle for CelesTrak / TinyGS attribution."""
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent(f"tinygs-{purpose}")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -113,7 +120,7 @@ def _fetch_celestrak_tles() -> list[dict]:
|
||||
params={"GROUP": group, "FORMAT": "json"},
|
||||
timeout=20,
|
||||
headers={
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0 (CelesTrak fair-use)",
|
||||
"User-Agent": _tinygs_user_agent("celestrak"),
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
@@ -259,7 +266,7 @@ def _fetch_tinygs_telemetry() -> None:
|
||||
timeout=15,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "ShadowBroker-OSINT/1.0",
|
||||
"User-Agent": _tinygs_user_agent("tinygs"),
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
@@ -24,7 +24,9 @@ from cachetools import TTLCache
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_FINNHUB_BASE = "https://finnhub.io/api/v1"
|
||||
_USER_AGENT = "ShadowBroker/0.9.79 Finnhub connector"
|
||||
def _finnhub_user_agent():
|
||||
from services.network_utils import outbound_user_agent
|
||||
return outbound_user_agent("finnhub")
|
||||
_REQUEST_TIMEOUT = 12
|
||||
_MIN_INTERVAL_SECONDS = 0.35 # Stay well under 60 calls/min
|
||||
|
||||
@@ -89,7 +91,7 @@ def _request(path: str, params: dict[str, Any] | None = None) -> Any:
|
||||
f"{_FINNHUB_BASE}{path}",
|
||||
params=payload,
|
||||
timeout=_REQUEST_TIMEOUT,
|
||||
headers={"User-Agent": _USER_AGENT, "Accept": "application/json"},
|
||||
headers={"User-Agent": _finnhub_user_agent(), "Accept": "application/json"},
|
||||
)
|
||||
finally:
|
||||
_last_request_at = time.monotonic()
|
||||
|
||||
@@ -0,0 +1,677 @@
|
||||
{
|
||||
"_meta": {
|
||||
"issue": "#239",
|
||||
"note": "Snapshot of currently-tolerated duplicate route registrations. The test in test_no_new_duplicate_routes.py fails if any NEW (method, path) duplicate appears outside this list. Removing entries (by actually deduping) is fine and the test stays green. New entries here require explicit, reviewed updates.",
|
||||
"generated_with": "python -c 'see tests/test_no_new_duplicate_routes.py'"
|
||||
},
|
||||
"duplicates": {
|
||||
"DELETE /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"DELETE /api/wormhole/dm/contact/{peer_id}": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"DELETE /api/wormhole/dm/invite/handles/{handle}": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/cctv/media": [
|
||||
"main",
|
||||
"routers.cctv"
|
||||
],
|
||||
"GET /api/debug-latest": [
|
||||
"main",
|
||||
"routers.health"
|
||||
],
|
||||
"GET /api/geocode/reverse": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/geocode/search": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/health": [
|
||||
"main",
|
||||
"routers.health"
|
||||
],
|
||||
"GET /api/live-data": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/live-data/fast": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/live-data/slow": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/mesh/channels": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/dm/count": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/poll": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/prekey-bundle": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/pubkey": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/dm/witness": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/mesh/gate/list": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/gate/{gate_id}": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/gate/{gate_id}/messages": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/event/{event_id}": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/events": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/locator": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/merkle": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/messages": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/messages/wait": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/node/{node_id}": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/status": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/infonet/sync": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/log": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/messages": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/metrics": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/oracle/consensus": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/markets": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/markets/more": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/predictions": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/profile": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/search": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/oracle/stakes/{message_id}": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"GET /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/reputation": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/reputation/all": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/reputation/batch": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/rns/status": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/signals": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/status": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"GET /api/mesh/trust/vouches": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"GET /api/oracle/region-intel": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"GET /api/radio/nearest": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/nearest-list": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/openmhz/audio": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/openmhz/calls/{sys_name}": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/openmhz/systems": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/radio/top": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/refresh": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/region-dossier": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/route/{callsign}": [
|
||||
"main",
|
||||
"routers.radio"
|
||||
],
|
||||
"GET /api/sentinel2/search": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/settings/api-keys": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/api-keys/meta": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/news-feeds": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/node": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"GET /api/settings/privacy-profile": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/settings/wormhole": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/settings/wormhole-status": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/sigint/nearest-sdr": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"GET /api/thermal/verify": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"GET /api/tools/shodan/status": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/tools/uw/status": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"GET /api/wormhole/dm/contacts": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/dm/identity": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/dm/invite": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/dm/invite/handles": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/gate/{gate_id}/identity": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/gate/{gate_id}/key": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/gate/{gate_id}/personas": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/health": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/identity": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"GET /api/wormhole/status": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PATCH /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/ais/feed": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"POST /api/layers": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"POST /api/mesh/dm/block": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/count": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/poll": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/register": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/send": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/dm/witness": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/gate/create": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/gate/peer-pull": [
|
||||
"main",
|
||||
"routers.mesh_peer_sync"
|
||||
],
|
||||
"POST /api/mesh/gate/peer-push": [
|
||||
"main",
|
||||
"routers.mesh_peer_sync"
|
||||
],
|
||||
"POST /api/mesh/gate/{gate_id}/message": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/identity/revoke": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/identity/rotate": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/infonet/ingest": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/infonet/peer-push": [
|
||||
"main",
|
||||
"routers.mesh_peer_sync"
|
||||
],
|
||||
"POST /api/mesh/infonet/sync": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/oracle/predict": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/oracle/resolve": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/oracle/resolve-stakes": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/oracle/stake": [
|
||||
"main",
|
||||
"routers.mesh_oracle"
|
||||
],
|
||||
"POST /api/mesh/peers": [
|
||||
"main",
|
||||
"routers.mesh_operator",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/report": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/send": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/mesh/trust/vouch": [
|
||||
"main",
|
||||
"routers.mesh_dm"
|
||||
],
|
||||
"POST /api/mesh/vote": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"POST /api/sentinel/tile": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/sentinel/token": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/settings/news-feeds/reset": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"POST /api/sigint/transmit": [
|
||||
"main",
|
||||
"routers.sigint"
|
||||
],
|
||||
"POST /api/system/update": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"POST /api/tools/shodan/count": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/shodan/host": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/shodan/search": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/uw/congress": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/uw/darkpool": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/tools/uw/flow": [
|
||||
"main",
|
||||
"routers.tools"
|
||||
],
|
||||
"POST /api/viewport": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"POST /api/wormhole/connect": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/disconnect": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/bootstrap-decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/bootstrap-encrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/build-seal": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/compose": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/dead-drop-token": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/dead-drop-tokens": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/encrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/invite/import": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/open-seal": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/pairwise-alias": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/pairwise-alias/rotate": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/prekey/register": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/register-key": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/reset": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/sas": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/dm/sender-token": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/enter": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/key/grant": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/key/rotate": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/leave": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/compose": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/post": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/post-encrypted": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/message/sign-encrypted": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/messages/decrypt": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/activate": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/clear": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/create": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/persona/retire": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/proof": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/gate/state/export": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/identity/bootstrap": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/join": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/leave": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/restart": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/sign": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"POST /api/wormhole/sign-raw": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PUT /api/mesh/gate/{gate_id}/envelope_policy": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"PUT /api/mesh/gate/{gate_id}/legacy_envelope_fallback": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
],
|
||||
"PUT /api/settings/news-feeds": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"PUT /api/settings/node": [
|
||||
"main",
|
||||
"routers.admin"
|
||||
],
|
||||
"PUT /api/settings/privacy-profile": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PUT /api/settings/wormhole": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
],
|
||||
"PUT /api/wormhole/dm/contact": [
|
||||
"main",
|
||||
"routers.wormhole"
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,261 @@
|
||||
"""Infonet sync respects upstream HTTP 429 + applies exponential backoff.
|
||||
|
||||
Background
|
||||
----------
|
||||
Before this fix, ``finish_sync`` used a constant 60s ``failure_backoff_s``
|
||||
regardless of how many consecutive failures preceded. When an upstream
|
||||
peer (e.g. the seed onion) returned HTTP 429 "Too Many Requests", the
|
||||
sync worker would:
|
||||
|
||||
1. Receive 429
|
||||
2. Stringify the status into a generic ``ValueError``
|
||||
3. Call ``finish_sync(error=str(exc))`` -- losing the status code
|
||||
4. Schedule next attempt for ``now + 60s``
|
||||
5. Retry. Upstream's rate-limit bucket is still full. 429 again. Loop.
|
||||
|
||||
Net effect: a node with one transient 429 would hammer the upstream
|
||||
every 60s forever, keeping the bucket full and never recovering. This
|
||||
is what kept the user's Infonet node from reaching the seed peer.
|
||||
|
||||
What the fix does
|
||||
-----------------
|
||||
* New typed exception ``PeerSyncRateLimited`` carries the parsed
|
||||
``Retry-After`` value out of the HTTP layer.
|
||||
* ``_sync_from_peer`` returns ``(ok, error, forked, retry_after_s)``
|
||||
instead of the old 3-tuple.
|
||||
* ``finish_sync`` honors ``retry_after_s`` AND applies exponential
|
||||
backoff: ``delay = max(retry_after_s, base * 2^failures, cap=1800)``.
|
||||
* ``parse_retry_after_header`` handles both RFC 7231 forms (delay
|
||||
seconds, and HTTP-date).
|
||||
|
||||
These tests pin every part of the new contract.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# parse_retry_after_header — both RFC 7231 forms + edge cases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestParseRetryAfter:
|
||||
def test_integer_seconds(self):
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
assert parse_retry_after_header("120") == 120
|
||||
assert parse_retry_after_header(" 30 ") == 30
|
||||
assert parse_retry_after_header("0") == 0
|
||||
|
||||
def test_http_date(self):
|
||||
"""RFC 7231 §7.1.3 explicitly allows ``Retry-After: <HTTP-date>``.
|
||||
We compute seconds-from-now so callers can use the same field
|
||||
regardless of which form the upstream chose."""
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
# Pin "now" so the test is deterministic.
|
||||
now = 1_700_000_000.0 # 2023-11-14T22:13:20Z
|
||||
# 300 seconds in the future, formatted per RFC 7231.
|
||||
future = "Tue, 14 Nov 2023 22:18:20 GMT"
|
||||
result = parse_retry_after_header(future, now=now)
|
||||
assert 295 <= result <= 305, f"expected ~300s, got {result}"
|
||||
|
||||
def test_http_date_in_past_returns_zero(self):
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
now = 1_700_000_000.0
|
||||
past = "Mon, 13 Nov 2023 00:00:00 GMT"
|
||||
assert parse_retry_after_header(past, now=now) == 0
|
||||
|
||||
def test_empty_and_whitespace_return_zero(self):
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
assert parse_retry_after_header("") == 0
|
||||
assert parse_retry_after_header(" ") == 0
|
||||
|
||||
def test_malformed_returns_zero(self):
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
assert parse_retry_after_header("not a header") == 0
|
||||
assert parse_retry_after_header("xyz") == 0
|
||||
|
||||
def test_clamps_to_one_hour(self):
|
||||
"""A hostile peer can't silence us for a week by claiming a
|
||||
24h Retry-After. We cap at 1 hour."""
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
assert parse_retry_after_header("86400") == 3600 # 24h -> 1h
|
||||
assert parse_retry_after_header("99999999") == 3600
|
||||
|
||||
def test_negative_returns_zero(self):
|
||||
"""RFC 7231 says ``Retry-After`` is a non-negative integer;
|
||||
leading-minus parses as a non-digit and yields 0 here."""
|
||||
from services.mesh.mesh_infonet_sync_support import parse_retry_after_header
|
||||
assert parse_retry_after_header("-10") == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _failure_backoff_seconds — exponential growth, retry-after override, cap
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFailureBackoffSeconds:
|
||||
def test_exponential_growth(self):
|
||||
"""First failure uses the base (preserves pre-fix behavior
|
||||
for one-off blips). Each subsequent failure doubles the wait,
|
||||
capped at 1800s. With base=60: 60, 120, 240, 480, 960, 1800,
|
||||
1800, 1800."""
|
||||
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||
delays = [
|
||||
_failure_backoff_seconds(
|
||||
base_backoff_s=60,
|
||||
consecutive_failures=n,
|
||||
retry_after_s=0,
|
||||
cap_s=1800,
|
||||
)
|
||||
for n in range(1, 9)
|
||||
]
|
||||
assert delays == [60, 120, 240, 480, 960, 1800, 1800, 1800], delays
|
||||
|
||||
def test_retry_after_wins_when_larger(self):
|
||||
"""If the upstream says ``Retry-After: 600`` but exponential
|
||||
would only ask for 60s (one failure), we honor the upstream."""
|
||||
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||
assert _failure_backoff_seconds(
|
||||
base_backoff_s=60,
|
||||
consecutive_failures=1,
|
||||
retry_after_s=600,
|
||||
cap_s=1800,
|
||||
) == 600
|
||||
|
||||
def test_exponential_wins_when_larger(self):
|
||||
"""If exponential is asking for 1800s (6+ failures) but
|
||||
upstream only sent ``Retry-After: 30``, we honor exponential.
|
||||
The 30s was the upstream's view at one moment; our exponential
|
||||
reflects sustained failure."""
|
||||
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||
result = _failure_backoff_seconds(
|
||||
base_backoff_s=60,
|
||||
consecutive_failures=7,
|
||||
retry_after_s=30,
|
||||
cap_s=1800,
|
||||
)
|
||||
assert result == 1800
|
||||
|
||||
def test_cap_zero_disables_exponential(self):
|
||||
"""Operators who want pre-fix behavior can set cap=0; only the
|
||||
upstream's Retry-After is respected. (Pre-fix had no
|
||||
exponential growth at all.)"""
|
||||
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||
assert _failure_backoff_seconds(
|
||||
base_backoff_s=60,
|
||||
consecutive_failures=10,
|
||||
retry_after_s=120,
|
||||
cap_s=0,
|
||||
) == 120
|
||||
|
||||
def test_zero_inputs_return_zero(self):
|
||||
from services.mesh.mesh_infonet_sync_support import _failure_backoff_seconds
|
||||
assert _failure_backoff_seconds(
|
||||
base_backoff_s=0,
|
||||
consecutive_failures=0,
|
||||
retry_after_s=0,
|
||||
) == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# finish_sync end-to-end — failure path with retry-after + growing counter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFinishSyncBackoff:
|
||||
def _state(self, **overrides):
|
||||
from services.mesh.mesh_infonet_sync_support import SyncWorkerState
|
||||
base = {
|
||||
"last_sync_started_at": 0,
|
||||
"last_sync_finished_at": 0,
|
||||
"last_sync_ok_at": 0,
|
||||
"next_sync_due_at": 0,
|
||||
"last_peer_url": "",
|
||||
"last_error": "",
|
||||
"last_outcome": "idle",
|
||||
"current_head": "",
|
||||
"fork_detected": False,
|
||||
"consecutive_failures": 0,
|
||||
}
|
||||
base.update(overrides)
|
||||
return SyncWorkerState(**base)
|
||||
|
||||
def test_first_failure_uses_base_unchanged(self):
|
||||
"""One failure means consecutive_failures becomes 1, which uses
|
||||
``base * 2^0 = base``. Preserves the pre-fix behavior so a
|
||||
single transient upstream blip doesn't suddenly take 2 minutes
|
||||
to retry — that change has to be earned by sustained failure."""
|
||||
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||
result = finish_sync(
|
||||
self._state(),
|
||||
ok=False,
|
||||
error="some upstream blip",
|
||||
now=1000.0,
|
||||
failure_backoff_s=60,
|
||||
)
|
||||
assert result.consecutive_failures == 1
|
||||
assert result.next_sync_due_at == 1000 + 60
|
||||
assert result.last_error == "some upstream blip"
|
||||
assert result.last_outcome == "error"
|
||||
|
||||
def test_consecutive_failures_grow_the_delay(self):
|
||||
"""After 5 prior failures already in state, the next failure
|
||||
sets consecutive=6 and uses the cap (1800s = 60 * 2^5)."""
|
||||
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||
result = finish_sync(
|
||||
self._state(consecutive_failures=5),
|
||||
ok=False,
|
||||
error="HTTP 429",
|
||||
now=2000.0,
|
||||
failure_backoff_s=60,
|
||||
)
|
||||
assert result.consecutive_failures == 6
|
||||
assert result.next_sync_due_at == 2000 + 1800
|
||||
|
||||
def test_retry_after_honored_at_low_failure_count(self):
|
||||
"""When the upstream says ``Retry-After: 900`` but we'd
|
||||
otherwise only wait 240s (4 failures = 60*2^3), wait 900s."""
|
||||
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||
result = finish_sync(
|
||||
self._state(consecutive_failures=3),
|
||||
ok=False,
|
||||
error="HTTP 429",
|
||||
now=5000.0,
|
||||
failure_backoff_s=60,
|
||||
retry_after_s=900,
|
||||
)
|
||||
assert result.consecutive_failures == 4
|
||||
assert result.next_sync_due_at == 5000 + 900
|
||||
|
||||
def test_success_resets_consecutive_failures(self):
|
||||
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||
result = finish_sync(
|
||||
self._state(consecutive_failures=4),
|
||||
ok=True,
|
||||
now=7000.0,
|
||||
interval_s=300,
|
||||
)
|
||||
assert result.consecutive_failures == 0
|
||||
assert result.next_sync_due_at == 7000 + 300
|
||||
assert result.last_outcome == "ok"
|
||||
|
||||
def test_last_error_carries_status_string(self):
|
||||
"""The pre-fix path stringified exceptions into ``last_error``
|
||||
but the string was often empty (HTTP layer raised ValueError
|
||||
with no message). We now require callers to pass something
|
||||
meaningful — see the typed exception path in main.py."""
|
||||
from services.mesh.mesh_infonet_sync_support import finish_sync
|
||||
result = finish_sync(
|
||||
self._state(),
|
||||
ok=False,
|
||||
error="HTTP 429 from peer (retry_after=120s): rate-limited",
|
||||
now=1000.0,
|
||||
failure_backoff_s=60,
|
||||
retry_after_s=120,
|
||||
)
|
||||
assert "HTTP 429" in result.last_error
|
||||
assert "retry_after=120s" in result.last_error
|
||||
@@ -89,6 +89,34 @@ import pytest
|
||||
# relay through the backend. 60/minute rate limit is not enough on
|
||||
# a streaming endpoint.
|
||||
("get", "/api/radio/openmhz/audio?url=https%3A%2F%2Fmedia.openmhz.com%2Faudio%2Fabc.mp3", None),
|
||||
# Issue #299 (tg12): /api/sentinel/token relays Copernicus CDSE
|
||||
# OAuth token requests for caller-supplied client_id/secret.
|
||||
# Anonymous access turns the backend into a free OAuth-mint relay.
|
||||
(
|
||||
"post",
|
||||
"/api/sentinel/token",
|
||||
None, # body sent via raw form-encoded data — None lets the
|
||||
# remote_client wrapper send an empty body; the auth
|
||||
# check fires before the form parser runs.
|
||||
),
|
||||
# Issue #300 (tg12): /api/sentinel/tile relays Sentinel Hub Process
|
||||
# API tile fetches. Anonymous access is a bandwidth/quota relay
|
||||
# for any caller's Copernicus account.
|
||||
(
|
||||
"post",
|
||||
"/api/sentinel/tile",
|
||||
{
|
||||
"client_id": "ignored",
|
||||
"client_secret": "ignored",
|
||||
"preset": "TRUE-COLOR",
|
||||
"date": "2026-01-01",
|
||||
"z": 6, "x": 30, "y": 20,
|
||||
},
|
||||
),
|
||||
# Issue #301 (tg12): /api/sentinel2/search hits Planetary Computer
|
||||
# STAC + Esri fallback. Anonymous access is a free external-search
|
||||
# relay even though no caller credentials are involved.
|
||||
("get", "/api/sentinel2/search?lat=0&lng=0", None),
|
||||
],
|
||||
)
|
||||
def test_remote_control_surface_rejects_without_local_operator_or_admin(
|
||||
|
||||
@@ -0,0 +1,270 @@
|
||||
"""Per-(sender, recipient) anti-spam cap on the DM relay.
|
||||
|
||||
The user-stated rule: a single sender can have at most N UNACKED messages
|
||||
parked in a single recipient's mailbox at any one time (N=2 by default).
|
||||
Once the recipient pulls a message, the sender's quota for that pair
|
||||
frees up.
|
||||
|
||||
Network rule, not local rule
|
||||
-----------------------------
|
||||
The cap is enforced TWICE:
|
||||
|
||||
1. ``DMRelay.deposit(...)`` -- local check on the sender's own node.
|
||||
Refuses to spool the (N+1)th message before it can be replicated.
|
||||
|
||||
2. ``DMRelay.accept_replica(...)`` -- replication-acceptance check on
|
||||
every receiving peer. Refuses to accept an inbound replica that
|
||||
would put the local mailbox over the cap, even if the originating
|
||||
peer claims it had cap room.
|
||||
|
||||
The double enforcement matters because cap (1) is client-side -- a
|
||||
hostile relay could patch it out and continue to spool extras locally.
|
||||
Cap (2) means those extras can't propagate: every honest peer rejects
|
||||
them on the way in. A recipient who polls from honest peers therefore
|
||||
never sees more than N pending from any one sender, regardless of how
|
||||
many spam attempts the sender's own relay accepted.
|
||||
|
||||
These tests pin both halves of the rule.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def relay():
|
||||
"""Fresh ``DMRelay`` per test."""
|
||||
from services.mesh.mesh_dm_relay import DMRelay
|
||||
r = DMRelay()
|
||||
r._mailboxes.clear()
|
||||
r._blocks.clear()
|
||||
r._stats = {"messages_in_memory": 0}
|
||||
return r
|
||||
|
||||
|
||||
def _deposit(
|
||||
relay,
|
||||
*,
|
||||
sender: str = "alice",
|
||||
recipient_token: str = "bob_mailbox_token_abc",
|
||||
ciphertext: str = "ciphertext-blob",
|
||||
msg_id: str = "",
|
||||
):
|
||||
"""Convenience wrapper using ``shared`` delivery class."""
|
||||
return relay.deposit(
|
||||
sender_id=sender,
|
||||
raw_sender_id=sender,
|
||||
recipient_id="bob",
|
||||
ciphertext=ciphertext,
|
||||
msg_id=msg_id,
|
||||
delivery_class="shared",
|
||||
recipient_token=recipient_token,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Local cap on ``deposit``
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDepositCap:
|
||||
def test_two_deposits_from_same_sender_succeed(self, relay):
|
||||
r1 = _deposit(relay)
|
||||
r2 = _deposit(relay)
|
||||
assert r1["ok"] is True
|
||||
assert r2["ok"] is True
|
||||
assert r1["msg_id"] != r2["msg_id"]
|
||||
|
||||
def test_third_deposit_from_same_sender_rejected(self, relay):
|
||||
_deposit(relay)
|
||||
_deposit(relay)
|
||||
r3 = _deposit(relay)
|
||||
assert r3["ok"] is False
|
||||
detail = r3["detail"].lower()
|
||||
assert "unread" in detail or "read your messages" in detail
|
||||
|
||||
def test_different_senders_have_independent_quotas(self, relay):
|
||||
for _ in range(2):
|
||||
assert _deposit(relay, sender="alice")["ok"] is True
|
||||
for _ in range(2):
|
||||
assert _deposit(relay, sender="carol")["ok"] is True
|
||||
assert _deposit(relay, sender="carol")["ok"] is False
|
||||
|
||||
def test_different_recipients_have_independent_quotas(self, relay):
|
||||
for _ in range(2):
|
||||
assert _deposit(relay, sender="alice", recipient_token="bob_token")["ok"] is True
|
||||
for _ in range(2):
|
||||
assert _deposit(relay, sender="alice", recipient_token="dave_token")["ok"] is True
|
||||
|
||||
def test_ack_frees_quota(self, relay):
|
||||
r1 = _deposit(relay)
|
||||
_deposit(relay)
|
||||
assert _deposit(relay)["ok"] is False
|
||||
|
||||
mailbox_key = relay._hashed_mailbox_token("bob_mailbox_token_abc")
|
||||
relay._mailboxes[mailbox_key] = [
|
||||
m for m in relay._mailboxes[mailbox_key]
|
||||
if m.msg_id != r1["msg_id"]
|
||||
]
|
||||
relay._stats["messages_in_memory"] = sum(
|
||||
len(v) for v in relay._mailboxes.values()
|
||||
)
|
||||
|
||||
r3 = _deposit(relay)
|
||||
assert r3["ok"] is True, f"expected quota free after ack, got: {r3}"
|
||||
|
||||
def test_cap_is_env_tunable(self, relay, monkeypatch):
|
||||
import services.mesh.mesh_dm_relay as mdr
|
||||
monkeypatch.setattr(
|
||||
mdr.DMRelay,
|
||||
"_per_sender_pending_limit",
|
||||
lambda self: 1,
|
||||
)
|
||||
|
||||
assert _deposit(relay)["ok"] is True
|
||||
assert _deposit(relay)["ok"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Replication-acceptance cap (the half that makes this a network rule)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAcceptReplicaCap:
|
||||
def _envelope(self, *, msg_id: str, sender_block_ref: str, mailbox_key: str):
|
||||
return {
|
||||
"msg_id": msg_id,
|
||||
"mailbox_key": mailbox_key,
|
||||
"sender_block_ref": sender_block_ref,
|
||||
"sender_id": "alice",
|
||||
"sender_seal": "",
|
||||
"ciphertext": f"ciphertext-{msg_id}",
|
||||
"timestamp": time.time(),
|
||||
"delivery_class": "shared",
|
||||
"relay_salt": "",
|
||||
"payload_format": "dm1",
|
||||
"session_welcome": "",
|
||||
}
|
||||
|
||||
def test_replica_accepted_under_cap(self, relay):
|
||||
env = self._envelope(
|
||||
msg_id="dm_replica_1",
|
||||
sender_block_ref="alice_block_ref",
|
||||
mailbox_key="mailbox_xyz",
|
||||
)
|
||||
result = relay.accept_replica(envelope=env)
|
||||
assert result["ok"] is True
|
||||
|
||||
def test_replica_idempotent_on_duplicate_msg_id(self, relay):
|
||||
mailbox_key = "mailbox_xyz"
|
||||
env = self._envelope(
|
||||
msg_id="dm_dup_1",
|
||||
sender_block_ref="alice_block_ref",
|
||||
mailbox_key=mailbox_key,
|
||||
)
|
||||
r1 = relay.accept_replica(envelope=env)
|
||||
r2 = relay.accept_replica(envelope=env)
|
||||
assert r1["ok"] is True
|
||||
assert r2["ok"] is True
|
||||
assert r2.get("duplicate") is True
|
||||
assert len(relay._mailboxes[mailbox_key]) == 1
|
||||
|
||||
def test_replica_rejected_when_local_count_already_at_cap(self, relay):
|
||||
mailbox_key = "mailbox_xyz"
|
||||
for i in (1, 2):
|
||||
relay.accept_replica(envelope=self._envelope(
|
||||
msg_id=f"dm_seeded_{i}",
|
||||
sender_block_ref="alice_block_ref",
|
||||
mailbox_key=mailbox_key,
|
||||
))
|
||||
|
||||
result = relay.accept_replica(envelope=self._envelope(
|
||||
msg_id="dm_overcap_3",
|
||||
sender_block_ref="alice_block_ref",
|
||||
mailbox_key=mailbox_key,
|
||||
))
|
||||
assert result["ok"] is False
|
||||
assert result.get("cap_violation") is True
|
||||
assert result.get("pending") == 2
|
||||
assert result.get("limit") == 2
|
||||
assert len(relay._mailboxes[mailbox_key]) == 2
|
||||
|
||||
def test_replica_from_different_sender_passes_when_one_is_at_cap(self, relay):
|
||||
mailbox_key = "mailbox_xyz"
|
||||
for i in (1, 2):
|
||||
relay.accept_replica(envelope=self._envelope(
|
||||
msg_id=f"dm_alice_{i}",
|
||||
sender_block_ref="alice_block_ref",
|
||||
mailbox_key=mailbox_key,
|
||||
))
|
||||
assert relay.accept_replica(envelope=self._envelope(
|
||||
msg_id="dm_alice_3",
|
||||
sender_block_ref="alice_block_ref",
|
||||
mailbox_key=mailbox_key,
|
||||
))["ok"] is False
|
||||
assert relay.accept_replica(envelope=self._envelope(
|
||||
msg_id="dm_carol_1",
|
||||
sender_block_ref="carol_block_ref",
|
||||
mailbox_key=mailbox_key,
|
||||
))["ok"] is True
|
||||
|
||||
def test_replica_rejects_malformed_envelopes(self, relay):
|
||||
for bad in (
|
||||
{},
|
||||
{"msg_id": "x"},
|
||||
{"msg_id": "x", "mailbox_key": "y"},
|
||||
"not an object at all",
|
||||
):
|
||||
result = relay.accept_replica(envelope=bad)
|
||||
assert result["ok"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ``envelope_for_replication`` -- helper for the outbound replication path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEnvelopeForReplication:
|
||||
def test_returns_envelope_for_stored_message(self, relay):
|
||||
r = _deposit(relay, ciphertext="hello-ciphertext")
|
||||
msg_id = r["msg_id"]
|
||||
mailbox_key = relay._hashed_mailbox_token("bob_mailbox_token_abc")
|
||||
|
||||
env = relay.envelope_for_replication(mailbox_key=mailbox_key, msg_id=msg_id)
|
||||
assert env is not None
|
||||
assert env["msg_id"] == msg_id
|
||||
assert env["mailbox_key"] == mailbox_key
|
||||
assert env["ciphertext"] == "hello-ciphertext"
|
||||
assert env["delivery_class"] == "shared"
|
||||
for k in ("msg_id", "mailbox_key", "sender_block_ref", "ciphertext"):
|
||||
assert env.get(k), f"envelope missing required field {k!r}"
|
||||
|
||||
def test_returns_none_for_unknown_message(self, relay):
|
||||
env = relay.envelope_for_replication(
|
||||
mailbox_key="never_existed", msg_id="never_existed",
|
||||
)
|
||||
assert env is None
|
||||
|
||||
def test_envelope_round_trips_through_accept_replica(self, relay):
|
||||
from services.mesh.mesh_dm_relay import DMRelay
|
||||
receiver_relay = DMRelay()
|
||||
receiver_relay._mailboxes.clear()
|
||||
receiver_relay._stats = {"messages_in_memory": 0}
|
||||
|
||||
r = _deposit(relay)
|
||||
msg_id = r["msg_id"]
|
||||
mailbox_key = relay._hashed_mailbox_token("bob_mailbox_token_abc")
|
||||
env = relay.envelope_for_replication(
|
||||
mailbox_key=mailbox_key, msg_id=msg_id,
|
||||
)
|
||||
assert env is not None
|
||||
|
||||
result = receiver_relay.accept_replica(envelope=env)
|
||||
assert result["ok"] is True
|
||||
stored = receiver_relay._mailboxes.get(mailbox_key, [])
|
||||
assert len(stored) == 1
|
||||
assert stored[0].msg_id == msg_id
|
||||
assert stored[0].ciphertext == "ciphertext-blob"
|
||||
@@ -0,0 +1,150 @@
|
||||
"""POST /api/mesh/dm/replicate-envelope — receiving side of cross-node DM
|
||||
mailbox replication.
|
||||
|
||||
This is the endpoint that peer relays call when they want to hand off an
|
||||
encrypted DM envelope to us (so the recipient can log into our node and
|
||||
find their messages). It re-enforces the per-(sender, recipient) anti-spam
|
||||
cap so hostile sender relays can't widen the cap by skipping the local
|
||||
check on their own deposit path.
|
||||
|
||||
The endpoint:
|
||||
|
||||
* authenticates the caller via the existing per-peer HMAC pattern
|
||||
(same one /api/mesh/infonet/peer-push and /api/mesh/gate/peer-push
|
||||
use, introduced in #256 — ``X-Peer-Url`` + ``X-Peer-HMAC`` headers
|
||||
keyed off ``resolve_peer_key_for_url``)
|
||||
* rejects bodies > 64 KB (DM envelope size is bounded by
|
||||
``MESH_DM_MAX_MSG_BYTES`` — 64KB ceiling has generous headroom)
|
||||
* rejects requests without a valid peer HMAC with 403
|
||||
* passes the envelope to ``DMRelay.accept_replica`` which enforces
|
||||
the cap
|
||||
|
||||
This file pins the endpoint contract. The cap enforcement itself is
|
||||
tested in ``test_dm_relay_per_sender_cap.py`` against the relay's
|
||||
``accept_replica`` method directly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
|
||||
import pytest
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def remote_client():
|
||||
"""ASGI client with peer IP 1.2.3.4 — never on the local-operator
|
||||
allowlist. Used to prove the endpoint isn't accidentally reachable
|
||||
by random remote callers without peer HMAC."""
|
||||
from main import app
|
||||
|
||||
class _RemoteClient:
|
||||
def __init__(self):
|
||||
self._loop = asyncio.new_event_loop()
|
||||
self._transport = ASGITransport(app=app, client=("1.2.3.4", 12345))
|
||||
self._base = "http://1.2.3.4:8000"
|
||||
|
||||
def post(self, url, **kw):
|
||||
async def go():
|
||||
async with AsyncClient(transport=self._transport, base_url=self._base) as ac:
|
||||
return await ac.post(url, **kw)
|
||||
return self._loop.run_until_complete(go())
|
||||
|
||||
def close(self):
|
||||
self._loop.close()
|
||||
|
||||
c = _RemoteClient()
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
class TestReplicateEndpointAuth:
|
||||
def test_rejects_request_without_peer_hmac(self, remote_client):
|
||||
"""A peer push that does NOT carry X-Peer-Url + X-Peer-HMAC
|
||||
must be rejected with 403 before the envelope is ever passed
|
||||
to the relay. Same gate the existing infonet/gate peer-push
|
||||
endpoints enforce."""
|
||||
payload = {
|
||||
"envelope": {
|
||||
"msg_id": "dm_unauth_1",
|
||||
"mailbox_key": "mb",
|
||||
"sender_block_ref": "sender",
|
||||
"ciphertext": "x",
|
||||
},
|
||||
}
|
||||
r = remote_client.post(
|
||||
"/api/mesh/dm/replicate-envelope",
|
||||
json=payload,
|
||||
)
|
||||
assert r.status_code == 403
|
||||
assert "peer HMAC" in r.text or "peer hmac" in r.text.lower()
|
||||
|
||||
def test_rejects_wrong_peer_hmac(self, remote_client, monkeypatch):
|
||||
"""A request with a peer HMAC header keyed off the WRONG secret
|
||||
is rejected. Confirms the HMAC is actually verified — a tampered
|
||||
body or a key-substitution attack doesn't sneak through."""
|
||||
# Plant a known peer secret. The request will sign with a
|
||||
# DIFFERENT key, so verification must fail.
|
||||
from services.config import get_settings
|
||||
monkeypatch.setenv("MESH_PEER_PUSH_SECRET", "real-secret-32-chars-min-padding-padding")
|
||||
get_settings.cache_clear()
|
||||
|
||||
body = json.dumps({
|
||||
"envelope": {
|
||||
"msg_id": "dm_wronghmac",
|
||||
"mailbox_key": "mb",
|
||||
"sender_block_ref": "sender",
|
||||
"ciphertext": "x",
|
||||
},
|
||||
}).encode("utf-8")
|
||||
wrong_hmac = hmac.new(b"wrong-key", body, hashlib.sha256).hexdigest()
|
||||
r = remote_client.post(
|
||||
"/api/mesh/dm/replicate-envelope",
|
||||
content=body,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"X-Peer-Url": "http://example-peer.onion:8000",
|
||||
"X-Peer-HMAC": wrong_hmac,
|
||||
},
|
||||
)
|
||||
assert r.status_code == 403
|
||||
|
||||
def test_rejects_oversize_body(self, remote_client):
|
||||
"""64 KB ceiling — anything bigger doesn't even get parsed.
|
||||
Defends against memory amplification via giant ciphertexts."""
|
||||
# 100 KB body is well over the 64 KB cap.
|
||||
big = b"{" + b"x" * 100_000 + b"}"
|
||||
r = remote_client.post(
|
||||
"/api/mesh/dm/replicate-envelope",
|
||||
content=big,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Content-Length": str(len(big)),
|
||||
},
|
||||
)
|
||||
assert r.status_code in (400, 413), (
|
||||
f"oversize body should be rejected with 400/413, got {r.status_code}"
|
||||
)
|
||||
|
||||
|
||||
class TestReplicateEndpointRegistered:
|
||||
def test_route_present_in_app(self):
|
||||
"""Static check that the route is actually wired into the app.
|
||||
Catches a future refactor that drops the router include or
|
||||
deletes the endpoint by accident."""
|
||||
from main import app
|
||||
|
||||
paths_methods = set()
|
||||
for route in app.routes:
|
||||
path = getattr(route, "path", None)
|
||||
methods = getattr(route, "methods", set()) or set()
|
||||
for m in methods:
|
||||
paths_methods.add((m, path))
|
||||
|
||||
assert ("POST", "/api/mesh/dm/replicate-envelope") in paths_methods, (
|
||||
"POST /api/mesh/dm/replicate-envelope is not registered on the app"
|
||||
)
|
||||
@@ -0,0 +1,354 @@
|
||||
"""Per-flight source attribution.
|
||||
|
||||
Background
|
||||
----------
|
||||
Pre-fix, adsb.lol records (the primary source for most flights) carried
|
||||
no source marker. OpenSky records got ``is_opensky: True`` and
|
||||
supplementals got ``supplemental_source``, so any UI that wanted to show
|
||||
which provider a flight came from saw OpenSky/airplanes.live records as
|
||||
explicitly tagged and adsb.lol records as "unlabeled" — making it look
|
||||
like adsb.lol wasn't even being used.
|
||||
|
||||
This caused user confusion ("only military planes have adsb.lol
|
||||
telemetry") that was diagnostic noise, not a real bug. The actual fix:
|
||||
stamp ``source`` at every fetch site so the downstream consumer can
|
||||
attribute the provider with no guesswork.
|
||||
|
||||
These tests pin:
|
||||
|
||||
* adsb.lol regional records get ``source: "adsb.lol"`` at fetch time
|
||||
(synthesized via the published flight dict).
|
||||
* OpenSky records get ``source: "OpenSky"`` (alongside the existing
|
||||
``is_opensky: True`` for backwards compat).
|
||||
* Supplementals (airplanes.live, adsb.fi) flow through with their
|
||||
``supplemental_source`` honored.
|
||||
* The military fetcher tags ``source`` on military_flights and uavs.
|
||||
* The published flight dict carries ``source`` so downstream code
|
||||
can render attribution.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _classify_and_publish — source field flows into published flight dict
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestClassifyAndPublishSource:
|
||||
def _reset_store(self):
|
||||
"""Clear store before each test so we get deterministic state."""
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
with _data_lock:
|
||||
for key in (
|
||||
"flights", "commercial_flights", "private_flights",
|
||||
"private_jets", "military_flights", "tracked_flights",
|
||||
):
|
||||
latest_data[key] = []
|
||||
return latest_data
|
||||
|
||||
def test_adsb_lol_record_tagged_in_published_flight(self, monkeypatch):
|
||||
"""A raw adsb.lol record (carrying ``source: 'adsb.lol'`` from the
|
||||
fetch site) flows through ``_classify_and_publish`` and the
|
||||
published flight dict carries the same ``source`` field."""
|
||||
from services.fetchers import flights as flights_module
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
self._reset_store()
|
||||
|
||||
# Patch route + type lookups so they don't try to hit the network.
|
||||
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||
|
||||
flights_module._classify_and_publish(
|
||||
[
|
||||
{
|
||||
"hex": "ad7701",
|
||||
"flight": "JBU711",
|
||||
"r": "N967JT",
|
||||
"t": "A321",
|
||||
"lat": 40.0,
|
||||
"lon": -100.0,
|
||||
"alt_baro": 36000,
|
||||
"gs": 401.6,
|
||||
"nac_p": 9,
|
||||
"source": "adsb.lol", # stamped at fetch site
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
with _data_lock:
|
||||
published = list(latest_data.get("flights", []))
|
||||
assert len(published) == 1
|
||||
assert published[0]["source"] == "adsb.lol"
|
||||
# nac_p still flows through too — sanity check that adding source
|
||||
# didn't break the existing GPS jamming signal.
|
||||
assert published[0]["nac_p"] == 9
|
||||
|
||||
def test_opensky_record_tagged_in_published_flight(self, monkeypatch):
|
||||
"""OpenSky-sourced records carry ``source: 'OpenSky'`` (plus the
|
||||
existing ``is_opensky: True`` for back-compat)."""
|
||||
from services.fetchers import flights as flights_module
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
self._reset_store()
|
||||
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||
|
||||
flights_module._classify_and_publish(
|
||||
[
|
||||
{
|
||||
"hex": "a12345",
|
||||
"flight": "UAL100",
|
||||
"r": "N100UA",
|
||||
"t": "Unknown",
|
||||
"lat": 41.0,
|
||||
"lon": -87.0,
|
||||
"alt_baro": 35000,
|
||||
"gs": 450,
|
||||
# No nac_p — OpenSky doesn't carry it.
|
||||
"is_opensky": True,
|
||||
"source": "OpenSky",
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
with _data_lock:
|
||||
published = list(latest_data.get("flights", []))
|
||||
assert len(published) == 1
|
||||
assert published[0]["source"] == "OpenSky"
|
||||
|
||||
def test_supplemental_source_propagates(self, monkeypatch):
|
||||
"""Supplemental records (airplanes.live, adsb.fi) have their
|
||||
legacy ``supplemental_source`` field promoted to the unified
|
||||
``source`` field in the published dict — so consumers don't have
|
||||
to inspect two different keys."""
|
||||
from services.fetchers import flights as flights_module
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
self._reset_store()
|
||||
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||
|
||||
flights_module._classify_and_publish(
|
||||
[
|
||||
{
|
||||
"hex": "b22222",
|
||||
"flight": "DAL200",
|
||||
"r": "N200DL",
|
||||
"t": "B738",
|
||||
"lat": 42.0,
|
||||
"lon": -90.0,
|
||||
"alt_baro": 32000,
|
||||
"gs": 420,
|
||||
"supplemental_source": "airplanes.live",
|
||||
# No explicit "source" — should fall through to
|
||||
# supplemental_source.
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
with _data_lock:
|
||||
published = list(latest_data.get("flights", []))
|
||||
assert len(published) == 1
|
||||
assert published[0]["source"] == "airplanes.live"
|
||||
|
||||
def test_explicit_source_wins_over_supplemental_source(self, monkeypatch):
|
||||
"""If both fields are present, explicit ``source`` wins (it's the
|
||||
newer canonical tag)."""
|
||||
from services.fetchers import flights as flights_module
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
self._reset_store()
|
||||
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||
|
||||
flights_module._classify_and_publish(
|
||||
[
|
||||
{
|
||||
"hex": "c33333",
|
||||
"flight": "AAL300",
|
||||
"r": "N300AA",
|
||||
"t": "A321",
|
||||
"lat": 33.0,
|
||||
"lon": -97.0,
|
||||
"alt_baro": 34000,
|
||||
"gs": 430,
|
||||
"source": "adsb.lol",
|
||||
"supplemental_source": "adsb.fi",
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
with _data_lock:
|
||||
published = list(latest_data.get("flights", []))
|
||||
assert published[0]["source"] == "adsb.lol"
|
||||
|
||||
def test_untagged_record_defaults_to_adsb_lol(self, monkeypatch):
|
||||
"""A record with neither ``source`` nor ``supplemental_source``
|
||||
(e.g. synthesized by a test, or a fetcher that hasn't been
|
||||
migrated yet) defaults to ``"adsb.lol"`` since that's been the
|
||||
primary source historically. Defensive default — better than
|
||||
empty string."""
|
||||
from services.fetchers import flights as flights_module
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
self._reset_store()
|
||||
monkeypatch.setattr(flights_module, "lookup_route", lambda _: None)
|
||||
monkeypatch.setattr(flights_module, "lookup_aircraft_type", lambda _: "")
|
||||
|
||||
flights_module._classify_and_publish(
|
||||
[
|
||||
{
|
||||
"hex": "d44444",
|
||||
"flight": "SWA400",
|
||||
"r": "N400SW",
|
||||
"t": "B737",
|
||||
"lat": 32.0,
|
||||
"lon": -110.0,
|
||||
"alt_baro": 30000,
|
||||
"gs": 410,
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
with _data_lock:
|
||||
published = list(latest_data.get("flights", []))
|
||||
assert published[0]["source"] == "adsb.lol"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# adsb.lol regional fetcher tags at fetch time
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAdsbLolRegionalTagging:
|
||||
def test_fetch_region_stamps_source_on_each_aircraft(self, monkeypatch):
|
||||
"""The wrapper around the adsb.lol regional endpoint stamps
|
||||
``source: 'adsb.lol'`` on every record before returning, so the
|
||||
downstream merge step sees attribution survive even when the
|
||||
record gets reshuffled (e.g. dedupe-by-hex during OpenSky merge)."""
|
||||
from services.fetchers import flights as flights_module
|
||||
|
||||
# Fake response — 3 aircraft, none have a source field originally.
|
||||
class FakeResp:
|
||||
status_code = 200
|
||||
|
||||
def json(self):
|
||||
return {
|
||||
"ac": [
|
||||
{"hex": "a1", "lat": 40.0, "lon": -100.0, "nac_p": 8},
|
||||
{"hex": "a2", "lat": 40.1, "lon": -100.1, "nac_p": 9},
|
||||
{"hex": "a3", "lat": 40.2, "lon": -100.2, "nac_p": 10},
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
flights_module, "fetch_with_curl", lambda *a, **kw: FakeResp()
|
||||
)
|
||||
|
||||
results = flights_module._fetch_adsb_lol_regions()
|
||||
|
||||
assert len(results) >= 3
|
||||
# Every aircraft we got back must be tagged.
|
||||
sources = {a.get("source") for a in results}
|
||||
assert sources == {"adsb.lol"}, (
|
||||
f"adsb.lol regional fetcher must stamp source on every record; "
|
||||
f"got: {sources}"
|
||||
)
|
||||
|
||||
def test_fetch_region_failure_returns_empty_without_crashing(self, monkeypatch):
|
||||
"""If adsb.lol returns non-200, the fetcher returns [] gracefully —
|
||||
downstream code already handles this. Sanity check that the source
|
||||
tagging doesn't introduce a new failure mode."""
|
||||
from services.fetchers import flights as flights_module
|
||||
|
||||
class FakeResp:
|
||||
status_code = 500
|
||||
def json(self): return {}
|
||||
|
||||
monkeypatch.setattr(
|
||||
flights_module, "fetch_with_curl", lambda *a, **kw: FakeResp()
|
||||
)
|
||||
|
||||
results = flights_module._fetch_adsb_lol_regions()
|
||||
|
||||
assert results == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Military fetcher tags source on output dicts
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMilitarySourceTagging:
|
||||
def test_military_output_carries_source_field(self, monkeypatch):
|
||||
"""Each entry in ``military_flights`` should carry a ``source``
|
||||
field. Pre-fix the only military attribution was inferring from
|
||||
which endpoint we hit; now it's explicit."""
|
||||
from services.fetchers import military as mil_module
|
||||
from services.fetchers._store import latest_data, _data_lock
|
||||
|
||||
# Reset relevant store state.
|
||||
with _data_lock:
|
||||
latest_data["military_flights"] = []
|
||||
latest_data["uavs"] = []
|
||||
latest_data["tracked_flights"] = []
|
||||
|
||||
# Stub _store.is_any_active so the fetch doesn't early-return.
|
||||
# The military module imports the function inline at call time,
|
||||
# so we have to patch it on the _store module itself rather than
|
||||
# on the military module.
|
||||
from services.fetchers import _store as store_module
|
||||
monkeypatch.setattr(store_module, "is_any_active", lambda *_: True)
|
||||
|
||||
# Stub fetch_with_curl to return one synthetic military aircraft
|
||||
# from adsb.lol, none from airplanes.live.
|
||||
class _RespMil:
|
||||
status_code = 200
|
||||
def json(self):
|
||||
return {
|
||||
"ac": [
|
||||
{
|
||||
"hex": "ae6c1d",
|
||||
"flight": "CRUSH52",
|
||||
"r": "170281",
|
||||
"t": "C30J",
|
||||
"lat": 47.594,
|
||||
"lon": -124.879,
|
||||
"alt_baro": 9025,
|
||||
"gs": 162.8,
|
||||
"track": 334.5,
|
||||
"nac_p": 10,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
class _RespEmpty:
|
||||
status_code = 200
|
||||
def json(self):
|
||||
return {"ac": []}
|
||||
|
||||
def _fake_fetch(url, *a, **kw):
|
||||
if "adsb.lol" in url:
|
||||
return _RespMil()
|
||||
return _RespEmpty()
|
||||
|
||||
monkeypatch.setattr(mil_module, "fetch_with_curl", _fake_fetch)
|
||||
# Stubs for downstream enrichments that try to hit external state.
|
||||
monkeypatch.setattr(mil_module, "enrich_with_plane_alert", lambda mf: None)
|
||||
monkeypatch.setattr(mil_module, "_enrich_country", lambda hex_, flag: ("US", "USAF"))
|
||||
monkeypatch.setattr(mil_module, "_classify_military_type", lambda t: "transport")
|
||||
monkeypatch.setattr(mil_module, "_classify_uav", lambda m, c: (False, "", ""))
|
||||
monkeypatch.setattr(mil_module, "get_emissions_info", lambda model: None)
|
||||
monkeypatch.setattr(mil_module, "_mark_fresh", lambda *keys: None)
|
||||
|
||||
mil_module.fetch_military_flights()
|
||||
|
||||
with _data_lock:
|
||||
mil_published = list(latest_data.get("military_flights", []))
|
||||
|
||||
assert len(mil_published) == 1
|
||||
assert mil_published[0]["source"] == "adsb.lol"
|
||||
@@ -0,0 +1,83 @@
|
||||
"""GDELT's ``data.gdeltproject.org`` is a CNAME to a Google Cloud Storage
|
||||
bucket. GCS responds with the wildcard ``*.storage.googleapis.com``
|
||||
certificate, which legitimately does NOT cover the GDELT custom
|
||||
domain, so Python's TLS verification refuses the connection. Some
|
||||
networks happen to route through a path where this works; many
|
||||
(notably Docker Desktop's outbound NAT on local installs) do not.
|
||||
|
||||
The fix in ``services.geopolitics._gcs_direct_gdelt_url`` rewrites any
|
||||
URL pointing at ``data.gdeltproject.org`` to its GCS-direct equivalent
|
||||
(``storage.googleapis.com/data.gdeltproject.org/...``), where the
|
||||
standard GCS certificate is genuinely valid. ``api.gdeltproject.org``
|
||||
and every other host are left untouched.
|
||||
|
||||
These tests pin that behavior so a future refactor that drops the
|
||||
helper or accidentally rewrites the wrong host gets a loud failure.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_rewrites_data_gdeltproject_https():
|
||||
from services.geopolitics import _gcs_direct_gdelt_url
|
||||
|
||||
assert _gcs_direct_gdelt_url(
|
||||
"https://data.gdeltproject.org/gdeltv2/lastupdate.txt"
|
||||
) == "https://storage.googleapis.com/data.gdeltproject.org/gdeltv2/lastupdate.txt"
|
||||
|
||||
|
||||
def test_rewrites_data_gdeltproject_http():
|
||||
"""GDELT's lastupdate.txt sometimes lists URLs with http:// — we
|
||||
rewrite those too (the downstream call upgrades them to https)."""
|
||||
from services.geopolitics import _gcs_direct_gdelt_url
|
||||
|
||||
assert _gcs_direct_gdelt_url(
|
||||
"http://data.gdeltproject.org/gdeltv2/20260301120000.export.CSV.zip"
|
||||
) == "http://storage.googleapis.com/data.gdeltproject.org/gdeltv2/20260301120000.export.CSV.zip"
|
||||
|
||||
|
||||
def test_rewrites_preserve_query_string_and_path():
|
||||
from services.geopolitics import _gcs_direct_gdelt_url
|
||||
|
||||
url = "https://data.gdeltproject.org/some/deep/path?a=1&b=2&c=hello%20world"
|
||||
rewritten = _gcs_direct_gdelt_url(url)
|
||||
assert rewritten == (
|
||||
"https://storage.googleapis.com/data.gdeltproject.org"
|
||||
"/some/deep/path?a=1&b=2&c=hello%20world"
|
||||
)
|
||||
|
||||
|
||||
def test_does_not_touch_api_gdeltproject_org():
|
||||
"""The API host is NOT a CNAME to GCS; rewriting it would break the
|
||||
actual GDELT API endpoint."""
|
||||
from services.geopolitics import _gcs_direct_gdelt_url
|
||||
|
||||
url = "https://api.gdeltproject.org/api/v2/doc/doc?query=carrier"
|
||||
assert _gcs_direct_gdelt_url(url) == url
|
||||
|
||||
|
||||
def test_does_not_touch_other_hosts():
|
||||
from services.geopolitics import _gcs_direct_gdelt_url
|
||||
|
||||
for url in (
|
||||
"https://en.wikipedia.org/wiki/Boeing_747",
|
||||
"https://query.wikidata.org/sparql",
|
||||
"https://storage.googleapis.com/already-correct/path",
|
||||
"https://nominatim.openstreetmap.org/search",
|
||||
):
|
||||
assert _gcs_direct_gdelt_url(url) == url
|
||||
|
||||
|
||||
def test_does_not_partially_match_strings():
|
||||
"""``data.gdeltproject.org`` is matched exactly; URLs that merely
|
||||
contain that substring elsewhere (in a query parameter, for example)
|
||||
are left alone. Otherwise we'd rewrite something like
|
||||
``https://example.com/?ref=data.gdeltproject.org/x`` which is wrong."""
|
||||
from services.geopolitics import _gcs_direct_gdelt_url
|
||||
|
||||
# The match requires ``://`` immediately before the host, so a host
|
||||
# like ``example-data.gdeltproject.org`` would also be left alone
|
||||
# (treated as a different host, which is correct).
|
||||
url = "https://example-data.gdeltproject.org/path"
|
||||
assert _gcs_direct_gdelt_url(url) == url
|
||||
@@ -0,0 +1,273 @@
|
||||
"""Tests for issue #288: viewport bbox filtering on /api/live-data/{fast,slow}.
|
||||
|
||||
Behaviour contract:
|
||||
* Without s/w/n/e params, the response is byte-for-byte identical to the
|
||||
pre-#288 implementation. (No filtering, no extra fields, no ETag change.)
|
||||
* With s/w/n/e supplied, heavy/dense layers are filtered to that viewport
|
||||
with a 20% padding box.
|
||||
* Light reference layers (datacenters, military_bases, power_plants,
|
||||
satellites, news, weather, …) are NEVER filtered, even when bounds are
|
||||
supplied — panning must never reveal an "empty world" of infrastructure.
|
||||
* World-scale bounds (lng_span >= 300 OR lat_span >= 120) short-circuit
|
||||
filtering and share the global ETag.
|
||||
* The ETag includes a 1°-quantized bbox so two viewports never poison each
|
||||
other's 304 cache.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ───────────────────────── /api/live-data/fast ─────────────────────────────
|
||||
|
||||
|
||||
class TestFastBboxFiltering:
|
||||
def _seed_fast(self, monkeypatch):
|
||||
"""Plant deterministic heavy + light fixtures across the globe."""
|
||||
from services.fetchers import _store
|
||||
|
||||
# Heavy collections: dense across the world.
|
||||
commercial = [
|
||||
{"lat": -60.0, "lng": -120.0, "id": "f-sw"}, # south Pacific
|
||||
{"lat": 35.0, "lng": -75.0, "id": "f-ne"}, # eastern US
|
||||
{"lat": 35.0, "lng": 100.0, "id": "f-asia"}, # Asia
|
||||
]
|
||||
ships = [
|
||||
{"lat": -60.0, "lng": -120.0, "id": "s-sw"},
|
||||
{"lat": 35.0, "lng": -75.0, "id": "s-ne"},
|
||||
]
|
||||
cctv = [{"lat": 35.0, "lng": -75.0, "id": "c-1"}]
|
||||
|
||||
# Sigint heavy collection.
|
||||
sigint = [
|
||||
{"source": "meshtastic", "lat": 35.0, "lng": -75.0, "id": "sig-east"},
|
||||
{"source": "meshtastic", "lat": 35.0, "lng": 100.0, "id": "sig-asia"},
|
||||
]
|
||||
|
||||
# Light/reference layer — must NEVER be filtered.
|
||||
satellites = [
|
||||
{"lat": -60.0, "lng": -120.0, "id": "sat-sw"},
|
||||
{"lat": 35.0, "lng": -75.0, "id": "sat-ne"},
|
||||
{"lat": 35.0, "lng": 100.0, "id": "sat-asia"},
|
||||
]
|
||||
|
||||
monkeypatch.setitem(_store.latest_data, "commercial_flights", commercial)
|
||||
monkeypatch.setitem(_store.latest_data, "ships", ships)
|
||||
monkeypatch.setitem(_store.latest_data, "cctv", cctv)
|
||||
monkeypatch.setitem(_store.latest_data, "sigint", sigint)
|
||||
monkeypatch.setitem(_store.latest_data, "satellites", satellites)
|
||||
# Ensure all layers are on so the response includes them.
|
||||
for layer in (
|
||||
"flights", "ships_military", "ships_cargo", "ships_civilian",
|
||||
"ships_passenger", "ships_tracked_yachts", "cctv",
|
||||
"sigint_meshtastic", "sigint_aprs", "satellites",
|
||||
):
|
||||
monkeypatch.setitem(_store.active_layers, layer, True)
|
||||
|
||||
def test_no_bbox_returns_world_data(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
r = client.get("/api/live-data/fast")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# All heavy fixtures pass through unchanged.
|
||||
assert len(data["commercial_flights"]) == 3
|
||||
assert len(data["ships"]) == 2
|
||||
assert len(data["sigint"]) == 2
|
||||
# Light layer also full.
|
||||
assert len(data["satellites"]) == 3
|
||||
|
||||
def test_bbox_filters_heavy_layers(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
# Box tightly around the eastern-US fixture (lat 35, lng -75).
|
||||
# ±5° → after 20% padding inside _bbox_filter, ~±6° window.
|
||||
r = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# Heavy layers: only the eastern-US fixture survives.
|
||||
assert {f["id"] for f in data["commercial_flights"]} == {"f-ne"}
|
||||
assert {s["id"] for s in data["ships"]} == {"s-ne"}
|
||||
assert {c["id"] for c in data["cctv"]} == {"c-1"}
|
||||
assert {s["id"] for s in data["sigint"]} == {"sig-east"}
|
||||
|
||||
def test_bbox_does_not_filter_light_layers(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
r = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# Satellites are a reference layer — must NOT be bbox-filtered.
|
||||
assert len(data["satellites"]) == 3
|
||||
|
||||
def test_world_scale_bbox_skips_filtering(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
# lng_span = 360 → treated as world-scale; same as no bbox.
|
||||
r = client.get("/api/live-data/fast?s=-90&w=-180&n=90&e=180")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert len(data["commercial_flights"]) == 3
|
||||
assert len(data["ships"]) == 2
|
||||
|
||||
def test_partial_bbox_is_treated_as_no_bbox(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
# Only three of four bounds → filtering must NOT engage.
|
||||
r = client.get("/api/live-data/fast?s=30&w=-80&n=40")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert len(data["commercial_flights"]) == 3
|
||||
|
||||
def test_etag_changes_with_bbox(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
r_world = client.get("/api/live-data/fast")
|
||||
r_local = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||
assert r_world.status_code == 200
|
||||
assert r_local.status_code == 200
|
||||
etag_world = r_world.headers.get("etag")
|
||||
etag_local = r_local.headers.get("etag")
|
||||
assert etag_world and etag_local
|
||||
assert etag_world != etag_local, (
|
||||
"ETag must differ between world and regional bbox to prevent "
|
||||
"304 cache poisoning across viewports"
|
||||
)
|
||||
|
||||
def test_etag_stable_for_subdegree_pan(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
# Sub-degree pan should land in the same 1°-quantized bucket.
|
||||
r_a = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||
r_b = client.get("/api/live-data/fast?s=30.3&w=-79.8&n=39.7&e=-70.4")
|
||||
assert r_a.headers.get("etag") == r_b.headers.get("etag")
|
||||
|
||||
def test_if_none_match_returns_304_for_same_bbox(self, client, monkeypatch):
|
||||
self._seed_fast(monkeypatch)
|
||||
r1 = client.get("/api/live-data/fast?s=30&w=-80&n=40&e=-70")
|
||||
etag = r1.headers.get("etag")
|
||||
r2 = client.get(
|
||||
"/api/live-data/fast?s=30&w=-80&n=40&e=-70",
|
||||
headers={"If-None-Match": etag},
|
||||
)
|
||||
assert r2.status_code == 304
|
||||
|
||||
|
||||
# ───────────────────────── /api/live-data/slow ─────────────────────────────
|
||||
|
||||
|
||||
class TestSlowBboxFiltering:
|
||||
def _seed_slow(self, monkeypatch):
|
||||
from services.fetchers import _store
|
||||
|
||||
# Heavy collections.
|
||||
gdelt = [
|
||||
{"lat": 35.0, "lng": -75.0, "id": "g-east"},
|
||||
{"lat": 35.0, "lng": 100.0, "id": "g-asia"},
|
||||
]
|
||||
firms_fires = [
|
||||
{"lat": 35.0, "lng": -75.0, "id": "fire-east"},
|
||||
{"lat": -10.0, "lng": 120.0, "id": "fire-ido"},
|
||||
]
|
||||
# Light/reference layers — must always ship in full.
|
||||
datacenters = [
|
||||
{"lat": 35.0, "lng": -75.0, "id": "dc-east"},
|
||||
{"lat": 35.0, "lng": 100.0, "id": "dc-asia"},
|
||||
{"lat": -10.0, "lng": 120.0, "id": "dc-ido"},
|
||||
]
|
||||
military_bases = [
|
||||
{"lat": 35.0, "lng": -75.0, "id": "mb-east"},
|
||||
{"lat": -10.0, "lng": 120.0, "id": "mb-ido"},
|
||||
]
|
||||
power_plants = [
|
||||
{"lat": 35.0, "lng": -75.0, "id": "pp-east"},
|
||||
{"lat": 35.0, "lng": 100.0, "id": "pp-asia"},
|
||||
]
|
||||
|
||||
monkeypatch.setitem(_store.latest_data, "gdelt", gdelt)
|
||||
monkeypatch.setitem(_store.latest_data, "firms_fires", firms_fires)
|
||||
monkeypatch.setitem(_store.latest_data, "datacenters", datacenters)
|
||||
monkeypatch.setitem(_store.latest_data, "military_bases", military_bases)
|
||||
monkeypatch.setitem(_store.latest_data, "power_plants", power_plants)
|
||||
for layer in (
|
||||
"global_incidents", "firms", "datacenters", "military_bases", "power_plants",
|
||||
):
|
||||
monkeypatch.setitem(_store.active_layers, layer, True)
|
||||
|
||||
def test_no_bbox_returns_world_data(self, client, monkeypatch):
|
||||
self._seed_slow(monkeypatch)
|
||||
r = client.get("/api/live-data/slow")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert len(data["gdelt"]) == 2
|
||||
assert len(data["firms_fires"]) == 2
|
||||
assert len(data["datacenters"]) == 3
|
||||
|
||||
def test_bbox_filters_heavy_layers(self, client, monkeypatch):
|
||||
self._seed_slow(monkeypatch)
|
||||
r = client.get("/api/live-data/slow?s=30&w=-80&n=40&e=-70")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert {g["id"] for g in data["gdelt"]} == {"g-east"}
|
||||
assert {f["id"] for f in data["firms_fires"]} == {"fire-east"}
|
||||
|
||||
def test_bbox_leaves_reference_layers_untouched(self, client, monkeypatch):
|
||||
"""Datacenters, bases, and power plants are infrastructure overlays —
|
||||
they must remain world-scale so panning never hides them."""
|
||||
self._seed_slow(monkeypatch)
|
||||
r = client.get("/api/live-data/slow?s=30&w=-80&n=40&e=-70")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert len(data["datacenters"]) == 3
|
||||
assert len(data["military_bases"]) == 2
|
||||
assert len(data["power_plants"]) == 2
|
||||
|
||||
def test_antimeridian_bbox(self, client, monkeypatch):
|
||||
from services.fetchers import _store
|
||||
# Box that straddles the antimeridian (Pacific): w=170, e=-170.
|
||||
gdelt = [
|
||||
{"lat": 0.0, "lng": 175.0, "id": "in-west"},
|
||||
{"lat": 0.0, "lng": -175.0, "id": "in-east"},
|
||||
{"lat": 0.0, "lng": 0.0, "id": "out-mid"},
|
||||
]
|
||||
monkeypatch.setitem(_store.latest_data, "gdelt", gdelt)
|
||||
monkeypatch.setitem(_store.active_layers, "global_incidents", True)
|
||||
r = client.get("/api/live-data/slow?s=-10&w=170&n=10&e=-170")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
ids = {g["id"] for g in data["gdelt"]}
|
||||
assert "in-west" in ids
|
||||
assert "in-east" in ids
|
||||
assert "out-mid" not in ids
|
||||
|
||||
|
||||
# ─────────────────── Direct helper coverage (defensive) ─────────────────────
|
||||
|
||||
|
||||
class TestHelpers:
|
||||
def test_has_full_bbox(self):
|
||||
from routers.data import _has_full_bbox
|
||||
assert _has_full_bbox(1, 2, 3, 4)
|
||||
assert not _has_full_bbox(None, 2, 3, 4)
|
||||
assert not _has_full_bbox(1, None, 3, 4)
|
||||
assert not _has_full_bbox(1, 2, None, 4)
|
||||
assert not _has_full_bbox(1, 2, 3, None)
|
||||
|
||||
def test_bbox_etag_suffix_quantizes(self):
|
||||
from routers.data import _bbox_etag_suffix
|
||||
a = _bbox_etag_suffix(30.1, -79.6, 39.9, -70.1)
|
||||
b = _bbox_etag_suffix(30.4, -79.2, 39.4, -70.8)
|
||||
assert a == b, "Sub-degree pan must collapse to the same ETag suffix"
|
||||
assert a.startswith("|bbox=")
|
||||
|
||||
def test_bbox_etag_suffix_world_collapses(self):
|
||||
from routers.data import _bbox_etag_suffix
|
||||
# World-scale → empty suffix (shares the global ETag).
|
||||
assert _bbox_etag_suffix(-90, -180, 90, 180) == ""
|
||||
|
||||
def test_bbox_etag_suffix_partial_is_empty(self):
|
||||
from routers.data import _bbox_etag_suffix
|
||||
assert _bbox_etag_suffix(None, -180, 90, 180) == ""
|
||||
|
||||
def test_apply_bbox_preserves_non_list_values(self):
|
||||
from routers.data import _apply_bbox_to_payload, _FAST_BBOX_HEAVY_KEYS
|
||||
payload = {
|
||||
"commercial_flights": [{"lat": 35, "lng": -75, "id": "x"}],
|
||||
"satellite_source": "tle", # not a list, must pass through
|
||||
"sigint_totals": {"total": 1}, # dict — must pass through
|
||||
}
|
||||
out = _apply_bbox_to_payload(dict(payload), _FAST_BBOX_HEAVY_KEYS, 30, -80, 40, -70)
|
||||
assert out["satellite_source"] == "tle"
|
||||
assert out["sigint_totals"] == {"total": 1}
|
||||
@@ -0,0 +1,208 @@
|
||||
"""Issue #239 (tg12): backend registers duplicate API routes in both
|
||||
``main.py`` and router modules, so request behavior depends on the
|
||||
order ``FastAPI`` happened to register them.
|
||||
|
||||
This test is the **CI guard** that locks in the invariant going forward.
|
||||
It does NOT delete any existing duplicates — those are tolerated via an
|
||||
explicit baseline file. What it DOES block is *new* duplicates appearing
|
||||
later, which is what the audit was actually asking for: a way to stop
|
||||
the drift before it gets worse.
|
||||
|
||||
Findings (empirically verified, see PR #286 description):
|
||||
|
||||
- ``main.app`` calls ``include_router(...)`` for every router at module
|
||||
import time around line 3316.
|
||||
- Every ``@app.get/post/put/...`` decorator inside ``main.py`` runs
|
||||
*after* those include_router calls, so the router handler is the one
|
||||
that actually serves requests. The duplicates in ``main.py`` are
|
||||
dead code at the route-resolution layer.
|
||||
- Behavior today is deterministic (router wins), but if someone later
|
||||
adds a NEW route only in ``main.py``, or edits one copy of an
|
||||
existing pair without the other, drift starts.
|
||||
|
||||
How this test works:
|
||||
|
||||
- Walks ``main.app.routes`` and records every ``(method, path)`` that
|
||||
appears more than once, along with which modules registered each
|
||||
copy.
|
||||
- Compares that set against the baseline in
|
||||
``backend/tests/data/duplicate_routes_baseline.json``.
|
||||
- **Fails** if any duplicate appears that is NOT in the baseline
|
||||
(or if the registering modules for an existing duplicate change).
|
||||
- **Stays green** when duplicates are *removed* by genuinely deduping
|
||||
the code. (The baseline is a ceiling, not a floor.)
|
||||
|
||||
To extend in the future:
|
||||
|
||||
- If you actually dedupe a route, leave the baseline alone — the test
|
||||
still passes. Subsequent regenerations of the baseline (``python -m
|
||||
scripts.regen_duplicate_routes_baseline`` or the snippet in this
|
||||
test's docstring) will shrink it.
|
||||
- If you legitimately need a new duplicate (you probably do not), add
|
||||
it to the baseline AND explain why in the PR description so reviewers
|
||||
can push back.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
BASELINE_PATH = (
|
||||
Path(__file__).parent / "data" / "duplicate_routes_baseline.json"
|
||||
)
|
||||
|
||||
|
||||
def _current_duplicates() -> dict[str, list[str]]:
|
||||
"""Walk ``main.app.routes`` and return ``{'METHOD /path': [module, ...]}``
|
||||
for every (method, path) registered more than once."""
|
||||
import main
|
||||
|
||||
by_key: dict[str, list[str]] = defaultdict(list)
|
||||
for route in main.app.routes:
|
||||
path = getattr(route, "path", None)
|
||||
methods = getattr(route, "methods", None)
|
||||
endpoint = getattr(route, "endpoint", None)
|
||||
if not path or not methods or endpoint is None:
|
||||
continue
|
||||
for method in methods:
|
||||
if method in ("HEAD", "OPTIONS"):
|
||||
continue
|
||||
by_key[f"{method} {path}"].append(endpoint.__module__)
|
||||
|
||||
return {
|
||||
key: sorted(modules) for key, modules in by_key.items() if len(modules) > 1
|
||||
}
|
||||
|
||||
|
||||
def _load_baseline() -> dict[str, list[str]]:
|
||||
if not BASELINE_PATH.exists():
|
||||
return {}
|
||||
raw = json.loads(BASELINE_PATH.read_text(encoding="utf-8"))
|
||||
dups = raw.get("duplicates", {})
|
||||
if not isinstance(dups, dict):
|
||||
return {}
|
||||
return {k: sorted(v) for k, v in dups.items()}
|
||||
|
||||
|
||||
def test_no_new_duplicate_route_registrations():
|
||||
"""Block any (method, path) duplicate not already in the baseline.
|
||||
|
||||
This is the primary CI guard: PRs that add a NEW shadowed
|
||||
``@app.get`` while a router module already serves the same route
|
||||
fail here with an actionable message.
|
||||
"""
|
||||
current = _current_duplicates()
|
||||
baseline = _load_baseline()
|
||||
|
||||
new_or_changed = []
|
||||
for key, modules in sorted(current.items()):
|
||||
if key not in baseline:
|
||||
new_or_changed.append(
|
||||
f" + {key} (NEW duplicate; registered in: {modules})"
|
||||
)
|
||||
continue
|
||||
if modules != baseline[key]:
|
||||
new_or_changed.append(
|
||||
f" ~ {key} "
|
||||
f"(modules changed: was {baseline[key]}, now {modules})"
|
||||
)
|
||||
|
||||
if new_or_changed:
|
||||
pytest.fail(
|
||||
"Issue #239 CI guard: detected duplicate route registrations "
|
||||
"that are NOT in the tolerated baseline.\n"
|
||||
"\n"
|
||||
"If you added a new @app.get/post/... in main.py for a path "
|
||||
"that a router module already serves, please move the handler "
|
||||
"into the router and delete the main.py copy — the router "
|
||||
"version wins on request routing anyway, so the main.py copy "
|
||||
"is dead code that just creates drift risk.\n"
|
||||
"\n"
|
||||
"Offending entries:\n"
|
||||
+ "\n".join(new_or_changed)
|
||||
+ "\n\n"
|
||||
"Baseline lives at "
|
||||
f"{BASELINE_PATH.relative_to(BASELINE_PATH.parent.parent.parent)}."
|
||||
)
|
||||
|
||||
|
||||
def test_baseline_only_lists_real_duplicates():
|
||||
"""Catch baseline drift in the other direction: if an entry in the
|
||||
baseline is no longer actually a duplicate (because someone deduped
|
||||
it manually), the baseline is stale and should be shrunk so future
|
||||
re-introductions of that duplicate get caught.
|
||||
|
||||
This test is informational — it does NOT fail the build today (the
|
||||
audit's main concern is *new* duplicates, not stale baseline
|
||||
entries). It prints a warning so the next baseline regeneration
|
||||
can clean things up.
|
||||
"""
|
||||
current = _current_duplicates()
|
||||
baseline = _load_baseline()
|
||||
stale = sorted(k for k in baseline if k not in current)
|
||||
if stale:
|
||||
# Use warnings instead of fail so this is friendly housekeeping,
|
||||
# not a CI blocker. The other test catches the actual safety
|
||||
# concern.
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
f"duplicate_routes_baseline.json contains {len(stale)} entry/entries "
|
||||
"no longer present in app.routes — consider regenerating the baseline. "
|
||||
f"Stale: {stale[:5]}{'...' if len(stale) > 5 else ''}",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
def test_router_handler_is_the_one_that_serves():
|
||||
"""Pin the empirical claim from PR #286: for every duplicated
|
||||
(method, path), the FIRST-registered handler is in a router
|
||||
module, not in main.py. If this ever flips — e.g. someone moves
|
||||
include_router calls to the bottom of main.py — duplicate routes
|
||||
start silently changing which handler runs. This catches that
|
||||
rearrangement immediately.
|
||||
"""
|
||||
import main
|
||||
|
||||
first_seen: dict[str, str] = {}
|
||||
for route in main.app.routes:
|
||||
path = getattr(route, "path", None)
|
||||
methods = getattr(route, "methods", None)
|
||||
endpoint = getattr(route, "endpoint", None)
|
||||
if not path or not methods or endpoint is None:
|
||||
continue
|
||||
for method in methods:
|
||||
if method in ("HEAD", "OPTIONS"):
|
||||
continue
|
||||
key = f"{method} {path}"
|
||||
if key not in first_seen:
|
||||
first_seen[key] = endpoint.__module__
|
||||
|
||||
main_winning = sorted(
|
||||
k for k, mod in first_seen.items() if mod == "main"
|
||||
)
|
||||
# The duplicates we tolerate are router-first. If main is the first
|
||||
# registered for any duplicated path, the router copy gets shadowed
|
||||
# instead, which would invalidate every assumption made in audit
|
||||
# rounds 5 and 6 about "the router version is canonical."
|
||||
baseline = _load_baseline()
|
||||
main_first_in_baseline = [k for k in main_winning if k in baseline]
|
||||
if main_first_in_baseline:
|
||||
pytest.fail(
|
||||
"Issue #239 invariant broken: for at least one duplicated "
|
||||
"(method, path), main.py is now registered FIRST and is "
|
||||
"serving requests instead of the router copy. Audit rounds "
|
||||
"5 and 6 assumed the router handler wins.\n"
|
||||
"\n"
|
||||
"Affected entries:\n"
|
||||
+ "\n".join(f" {k}" for k in main_first_in_baseline)
|
||||
+ "\n\n"
|
||||
"Most likely cause: someone moved app.include_router(...) "
|
||||
"calls in main.py to after the @app.get decorators. Move "
|
||||
"them back to before the @app routes (currently around "
|
||||
"line 3316)."
|
||||
)
|
||||
@@ -0,0 +1,334 @@
|
||||
"""Issue #302 (tg12): OpenClaw connect-info HMAC secret disclosure.
|
||||
|
||||
Before this change, ``GET /api/ai/connect-info?reveal=true`` returned the
|
||||
full HMAC secret in the response body on every modal open AND the same
|
||||
GET endpoint auto-bootstrapped (generated + persisted) the secret on a
|
||||
mere read. Even gated to ``require_local_operator``, that put the full
|
||||
secret into:
|
||||
|
||||
* browser visit history
|
||||
* dev-tools network panel
|
||||
* browser disk cache
|
||||
* HAR exports
|
||||
* screen captures / shoulder-surfing
|
||||
|
||||
Every single time the OpenClaw Connect modal opened.
|
||||
|
||||
After this change:
|
||||
|
||||
GET /api/ai/connect-info — always returns the MASKED
|
||||
fingerprint. No ?reveal param.
|
||||
No side effects (auto-bootstrap
|
||||
gone).
|
||||
POST /api/ai/connect-info/bootstrap — mints+persists the secret if
|
||||
missing. Idempotent. Never
|
||||
returns the full secret.
|
||||
POST /api/ai/connect-info/reveal — returns the full secret with
|
||||
strict Cache-Control: no-store
|
||||
headers. POST so the body
|
||||
doesn't land in URL history.
|
||||
POST /api/ai/connect-info/regenerate — keeps the one-time-disclosure
|
||||
for the new secret (regen IS a
|
||||
deliberate destructive action).
|
||||
Same no-store headers added.
|
||||
|
||||
These tests pin every property.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Loopback test client. ``require_local_operator`` resolves true for
|
||||
# request.client.host == "127.0.0.1"; FastAPI's TestClient sets it to
|
||||
# "testclient" which isn't on the allowlist. Use raw ASGITransport.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def loopback():
|
||||
from main import app
|
||||
|
||||
class _Client:
|
||||
def __init__(self, peer_ip: str = "127.0.0.1"):
|
||||
self._loop = asyncio.new_event_loop()
|
||||
self._transport = ASGITransport(app=app, client=(peer_ip, 12345))
|
||||
self._base = f"http://{peer_ip}:8000"
|
||||
|
||||
def _do(self, method: str, url: str, **kw):
|
||||
async def go():
|
||||
async with AsyncClient(transport=self._transport, base_url=self._base) as ac:
|
||||
return await ac.request(method, url, **kw)
|
||||
return self._loop.run_until_complete(go())
|
||||
|
||||
def get(self, url, **kw): return self._do("GET", url, **kw)
|
||||
def post(self, url, **kw): return self._do("POST", url, **kw)
|
||||
def close(self): self._loop.close()
|
||||
|
||||
c = _Client()
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def remote():
|
||||
from main import app
|
||||
|
||||
class _Client:
|
||||
def __init__(self):
|
||||
self._loop = asyncio.new_event_loop()
|
||||
self._transport = ASGITransport(app=app, client=("1.2.3.4", 12345))
|
||||
self._base = "http://1.2.3.4:8000"
|
||||
|
||||
def _do(self, method: str, url: str, **kw):
|
||||
async def go():
|
||||
async with AsyncClient(transport=self._transport, base_url=self._base) as ac:
|
||||
return await ac.request(method, url, **kw)
|
||||
return self._loop.run_until_complete(go())
|
||||
|
||||
def get(self, url, **kw): return self._do("GET", url, **kw)
|
||||
def post(self, url, **kw): return self._do("POST", url, **kw)
|
||||
def close(self): self._loop.close()
|
||||
|
||||
c = _Client()
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def stub_env(monkeypatch):
|
||||
"""Isolate connect-info tests from the dev's real backend .env.
|
||||
|
||||
Pydantic ``Settings()`` reads from ``.env`` file directly on
|
||||
instantiation, so monkey-patching ``os.environ`` isn't sufficient
|
||||
— the real ``OPENCLAW_HMAC_SECRET`` would leak through. Instead we
|
||||
override ``get_settings()`` in the route module to return a fresh
|
||||
``Settings`` instance whose env values are driven entirely by an
|
||||
in-test dict, AND we replace ``_write_env_value`` so writes update
|
||||
that same dict instead of touching the developer's filesystem.
|
||||
|
||||
Yields the dict so individual tests can pre-seed values or assert
|
||||
that writes happened.
|
||||
"""
|
||||
import routers.ai_intel as ai_intel
|
||||
import services.config as config
|
||||
|
||||
state: dict[str, str] = {}
|
||||
|
||||
class _FakeSettings:
|
||||
@property
|
||||
def OPENCLAW_HMAC_SECRET(self) -> str:
|
||||
return state.get("OPENCLAW_HMAC_SECRET", "")
|
||||
|
||||
@property
|
||||
def OPENCLAW_ACCESS_TIER(self) -> str:
|
||||
return state.get("OPENCLAW_ACCESS_TIER", "restricted")
|
||||
|
||||
fake = _FakeSettings()
|
||||
|
||||
def _fake_get_settings():
|
||||
return fake
|
||||
|
||||
# Route code calls ``get_settings.cache_clear()`` after writing the
|
||||
# env. The production version is wrapped with ``@lru_cache``, so
|
||||
# cache_clear exists. Attach a no-op shim here.
|
||||
_fake_get_settings.cache_clear = lambda: None # type: ignore[attr-defined]
|
||||
|
||||
monkeypatch.setattr(config, "get_settings", _fake_get_settings)
|
||||
|
||||
def _fake_write_env_value(key: str, value: str) -> None:
|
||||
state[key] = value
|
||||
|
||||
monkeypatch.setattr(ai_intel, "_write_env_value", _fake_write_env_value)
|
||||
|
||||
yield state
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /api/ai/connect-info — always masked, no auto-bootstrap
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetConnectInfoMasking:
|
||||
def test_returns_masked_when_secret_set(self, loopback, stub_env):
|
||||
secret = "abcdef" + "0" * 38 + "wxyz"
|
||||
stub_env["OPENCLAW_HMAC_SECRET"] = secret
|
||||
|
||||
r = loopback.get("/api/ai/connect-info")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
# Body must NOT carry the full secret value anywhere.
|
||||
assert secret not in r.text, (
|
||||
"GET /api/ai/connect-info MUST NOT include the full HMAC "
|
||||
"secret. Response body contained the secret value."
|
||||
)
|
||||
assert body["hmac_secret_set"] is True
|
||||
assert body["masked_hmac_secret"].startswith("abcdef")
|
||||
assert body["masked_hmac_secret"].endswith("wxyz")
|
||||
assert "•" in body["masked_hmac_secret"]
|
||||
# Pre-fix field is gone.
|
||||
assert "hmac_secret" not in body
|
||||
|
||||
def test_no_auto_bootstrap_when_secret_missing(self, loopback, stub_env):
|
||||
"""Side-effect-on-GET was the second half of issue #302. A GET
|
||||
with no secret configured must NOT mint one — that should
|
||||
require an explicit POST /bootstrap."""
|
||||
r = loopback.get("/api/ai/connect-info")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
assert body["hmac_secret_set"] is False
|
||||
assert body["masked_hmac_secret"] == ""
|
||||
# The bootstrap_behavior block should advertise the new flow.
|
||||
assert body["bootstrap_behavior"]["auto_generates_when_missing"] is False
|
||||
# And no _write_env_value call happened.
|
||||
assert "OPENCLAW_HMAC_SECRET" not in stub_env
|
||||
|
||||
def test_no_reveal_query_param(self, loopback, stub_env):
|
||||
"""Pre-fix, ?reveal=true would return the full secret. Post-fix
|
||||
the param is silently ignored — the response is the same as
|
||||
without it (still masked, no leak)."""
|
||||
secret = "abcdef" + "0" * 38 + "wxyz"
|
||||
stub_env["OPENCLAW_HMAC_SECRET"] = secret
|
||||
|
||||
r = loopback.get("/api/ai/connect-info?reveal=true")
|
||||
assert r.status_code == 200
|
||||
assert secret not in r.text, (
|
||||
"?reveal=true must be a no-op on GET — the full secret "
|
||||
"MUST NOT come back in the response body."
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /api/ai/connect-info/bootstrap
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBootstrap:
|
||||
def test_mints_when_missing(self, loopback, stub_env):
|
||||
r = loopback.post("/api/ai/connect-info/bootstrap")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
assert body["ok"] is True
|
||||
assert body["generated"] is True
|
||||
assert body["hmac_secret_set"] is True
|
||||
# Bootstrap must NOT return the full secret in-line.
|
||||
assert "hmac_secret" not in body or not body.get("hmac_secret")
|
||||
assert "•" in body["masked_hmac_secret"]
|
||||
# _write_env_value was actually called.
|
||||
assert stub_env.get("OPENCLAW_HMAC_SECRET")
|
||||
# The full value isn't echoed back in the response text either.
|
||||
assert stub_env["OPENCLAW_HMAC_SECRET"] not in r.text
|
||||
|
||||
def test_idempotent_when_already_set(self, loopback, stub_env):
|
||||
existing = "abcdef" + "0" * 38 + "wxyz"
|
||||
stub_env["OPENCLAW_HMAC_SECRET"] = existing
|
||||
|
||||
r = loopback.post("/api/ai/connect-info/bootstrap")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
assert body["ok"] is True
|
||||
assert body["generated"] is False
|
||||
assert body["hmac_secret_set"] is True
|
||||
# Existing secret untouched — value is still the seeded one.
|
||||
assert stub_env["OPENCLAW_HMAC_SECRET"] == existing
|
||||
# No full secret in the response.
|
||||
assert existing not in r.text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /api/ai/connect-info/reveal
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestReveal:
|
||||
def test_returns_full_secret_when_set(self, loopback, stub_env):
|
||||
secret = "abcdef" + "0" * 38 + "wxyz"
|
||||
stub_env["OPENCLAW_HMAC_SECRET"] = secret
|
||||
|
||||
r = loopback.post("/api/ai/connect-info/reveal")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
assert body["ok"] is True
|
||||
assert body["hmac_secret"] == secret
|
||||
|
||||
def test_strict_cache_control_headers(self, loopback, stub_env):
|
||||
"""The whole point of POST /reveal vs GET ?reveal=true is that
|
||||
the response carries headers that prevent every cache layer
|
||||
from persisting the secret."""
|
||||
secret = "abcdef" + "0" * 38 + "wxyz"
|
||||
stub_env["OPENCLAW_HMAC_SECRET"] = secret
|
||||
|
||||
r = loopback.post("/api/ai/connect-info/reveal")
|
||||
cc = r.headers.get("cache-control", "")
|
||||
assert "no-store" in cc, (
|
||||
f"reveal MUST set Cache-Control: no-store — got {cc!r}"
|
||||
)
|
||||
assert "no-cache" in cc
|
||||
# Pragma + Expires as well for HTTP/1.0 caches.
|
||||
assert r.headers.get("pragma", "").lower() == "no-cache"
|
||||
assert r.headers.get("expires") == "0"
|
||||
|
||||
def test_404_when_no_secret_configured(self, loopback, stub_env):
|
||||
r = loopback.post("/api/ai/connect-info/reveal")
|
||||
assert r.status_code == 404
|
||||
# Hint should point at the bootstrap endpoint, not just say "404".
|
||||
detail = r.json().get("detail", "")
|
||||
assert "/bootstrap" in detail or "bootstrap" in detail.lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /api/ai/connect-info/regenerate — still returns the new secret
|
||||
# inline (deliberate destructive action), but with no-store headers.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRegenerate:
|
||||
def test_returns_new_secret_with_no_store_headers(self, loopback, stub_env):
|
||||
# Seed an existing secret so we can prove it changes.
|
||||
old = "oldold" + "0" * 38 + "1234"
|
||||
stub_env["OPENCLAW_HMAC_SECRET"] = old
|
||||
|
||||
r = loopback.post("/api/ai/connect-info/regenerate")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
assert body["ok"] is True
|
||||
assert body["hmac_secret"]
|
||||
assert body["hmac_secret"] != old
|
||||
# no-store headers MUST be present so the new secret doesn't
|
||||
# land in browser disk cache after the regenerate click.
|
||||
cc = r.headers.get("cache-control", "")
|
||||
assert "no-store" in cc and "no-cache" in cc
|
||||
assert r.headers.get("pragma", "").lower() == "no-cache"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth-gate regression — every endpoint still rejects anonymous remote
|
||||
# callers. This is the property we already enforce for the rest of the
|
||||
# operator-only surface; adding the three new endpoints to the audit
|
||||
# coverage prevents a future refactor from dropping the dependency.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAnonymousRejection:
|
||||
@pytest.mark.parametrize(
|
||||
"method,path,body",
|
||||
[
|
||||
("get", "/api/ai/connect-info", None),
|
||||
("post", "/api/ai/connect-info/bootstrap", None),
|
||||
("post", "/api/ai/connect-info/reveal", None),
|
||||
("post", "/api/ai/connect-info/regenerate", None),
|
||||
],
|
||||
)
|
||||
def test_remote_rejected(self, remote, method, path, body):
|
||||
fn = getattr(remote, method)
|
||||
r = fn(path, json=body) if body is not None else fn(path)
|
||||
assert r.status_code == 403, (
|
||||
f"{method.upper()} {path} must reject anonymous remote callers; "
|
||||
f"got {r.status_code}"
|
||||
)
|
||||
@@ -0,0 +1,277 @@
|
||||
"""Round 7a: per-install operator handle threads through every outbound
|
||||
third-party API call.
|
||||
|
||||
Background: before this change every Shadowbroker install identified
|
||||
itself to Wikipedia, Wikidata, Nominatim, GDELT, OpenMHz, Broadcastify,
|
||||
weather.gov, NUFORC, etc. with a single project-wide ``Shadowbroker``
|
||||
User-Agent. From the upstream's perspective, every install in the world
|
||||
looked like one giant scraper. If one install misbehaved, the upstream's
|
||||
only recourse was to block ``Shadowbroker`` as a whole, taking out every
|
||||
other install.
|
||||
|
||||
Fix: each install gets a stable pseudonymous handle (auto-generated like
|
||||
``shadow-7f3a92`` or operator-overridden via ``OPERATOR_HANDLE``) that
|
||||
gets embedded in the User-Agent for every outbound call. Upstreams can
|
||||
now rate-limit / contact the specific operator instead of the project.
|
||||
|
||||
These tests pin:
|
||||
|
||||
1. The handle is auto-generated on first call if no override exists.
|
||||
2. The handle survives process restart (persisted to disk).
|
||||
3. ``OPERATOR_HANDLE`` env var override wins over the auto-gen handle.
|
||||
4. The handle is sanitized (whitespace, special chars, length).
|
||||
5. Every previously-MONSTER-UA call site now sends the per-operator UA.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_handle(tmp_path, monkeypatch):
|
||||
"""Redirect the persistence path to tmp and reset caches between tests."""
|
||||
from services import network_utils
|
||||
|
||||
handle_file = tmp_path / "operator_handle.json"
|
||||
monkeypatch.setattr(network_utils, "_OPERATOR_HANDLE_FILE", handle_file)
|
||||
network_utils._reset_operator_handle_cache_for_tests()
|
||||
monkeypatch.delenv("OPERATOR_HANDLE", raising=False)
|
||||
|
||||
# Reset Settings cache so OPERATOR_HANDLE env changes are picked up.
|
||||
from services.config import get_settings
|
||||
get_settings.cache_clear()
|
||||
|
||||
yield network_utils
|
||||
|
||||
network_utils._reset_operator_handle_cache_for_tests()
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core handle generation / persistence / override
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestOperatorHandleGeneration:
|
||||
def test_auto_generates_on_first_call(self, isolated_handle):
|
||||
h = isolated_handle.get_operator_handle()
|
||||
# Prefix is "operator-" (deliberately neutral; "shadow-" looked
|
||||
# exactly like a pattern abuse-detection systems would auto-block).
|
||||
assert h.startswith("operator-")
|
||||
assert len(h) == len("operator-") + 6
|
||||
# Hex suffix.
|
||||
suffix = h.split("-", 1)[1]
|
||||
int(suffix, 16) # raises if not hex
|
||||
|
||||
def test_persists_to_disk_so_handle_survives_restart(self, isolated_handle):
|
||||
first = isolated_handle.get_operator_handle()
|
||||
# Simulate process restart: clear in-memory cache, then ask again.
|
||||
isolated_handle._reset_operator_handle_cache_for_tests()
|
||||
second = isolated_handle.get_operator_handle()
|
||||
assert second == first
|
||||
# The file actually exists.
|
||||
assert isolated_handle._OPERATOR_HANDLE_FILE.exists()
|
||||
body = json.loads(isolated_handle._OPERATOR_HANDLE_FILE.read_text())
|
||||
assert body["handle"] == first
|
||||
|
||||
def test_env_override_wins_over_auto_generated(self, isolated_handle, monkeypatch):
|
||||
# First call without env var auto-generates.
|
||||
auto = isolated_handle.get_operator_handle()
|
||||
assert auto.startswith("operator-")
|
||||
# Setting env var changes the resolved handle without touching the disk file.
|
||||
monkeypatch.setenv("OPERATOR_HANDLE", "alice")
|
||||
from services.config import get_settings
|
||||
get_settings.cache_clear()
|
||||
isolated_handle._reset_operator_handle_cache_for_tests()
|
||||
assert isolated_handle.get_operator_handle() == "alice"
|
||||
|
||||
def test_handle_is_sanitized(self, isolated_handle, monkeypatch):
|
||||
from services.config import get_settings
|
||||
|
||||
# Sanitization tests run against the normalizer directly so the
|
||||
# empty-string case can be asserted independently of the env-var
|
||||
# resolution path (where empty means "use auto-gen", not "use
|
||||
# 'anonymous'").
|
||||
from services.network_utils import _normalize_handle
|
||||
|
||||
cases = [
|
||||
("Alice Smith", "alice-smith"),
|
||||
("user@example.com", "user-example-com"),
|
||||
(" whitespace ", "whitespace"),
|
||||
("UPPER-CASE", "upper-case"),
|
||||
("multiple---dashes", "multiple-dashes"),
|
||||
("/leading/slash", "leading-slash"),
|
||||
("trailing-", "trailing"),
|
||||
("", "anonymous"),
|
||||
]
|
||||
for raw, expected in cases:
|
||||
got = _normalize_handle(raw)
|
||||
assert got == expected, f"{raw!r} -> {got!r}, expected {expected!r}"
|
||||
assert got == got.lower()
|
||||
for ch in got:
|
||||
assert ch.isalnum() or ch in "-_", f"unsafe char {ch!r} in {got!r}"
|
||||
assert "--" not in got
|
||||
|
||||
def test_handle_is_length_capped(self, isolated_handle, monkeypatch):
|
||||
from services.config import get_settings
|
||||
|
||||
monkeypatch.setenv("OPERATOR_HANDLE", "x" * 1000)
|
||||
get_settings.cache_clear()
|
||||
isolated_handle._reset_operator_handle_cache_for_tests()
|
||||
got = isolated_handle.get_operator_handle()
|
||||
assert len(got) <= 48
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# outbound_user_agent() builds the right header
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestOutboundUserAgentString:
|
||||
def test_includes_operator_handle(self, isolated_handle):
|
||||
ua = isolated_handle.outbound_user_agent()
|
||||
handle = isolated_handle.get_operator_handle()
|
||||
assert f"operator: {handle}" in ua
|
||||
|
||||
def test_includes_purpose_when_provided(self, isolated_handle):
|
||||
ua = isolated_handle.outbound_user_agent("wikipedia")
|
||||
assert "purpose: wikipedia" in ua
|
||||
|
||||
def test_includes_contact_path(self, isolated_handle):
|
||||
ua = isolated_handle.outbound_user_agent()
|
||||
assert "github.com" in ua.lower()
|
||||
assert "shadowbroker" in ua.lower()
|
||||
|
||||
def test_version_prefix(self, isolated_handle):
|
||||
ua = isolated_handle.outbound_user_agent()
|
||||
assert ua.startswith("Shadowbroker/")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Wikipedia / Wikidata — retroactive fix for PR #284's MONSTER pattern
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestWikimediaCallsAreNowPerOperator:
|
||||
def test_wikidata_call_uses_per_operator_ua(self, isolated_handle, monkeypatch):
|
||||
from services import region_dossier
|
||||
|
||||
captured = []
|
||||
|
||||
class _FakeResp:
|
||||
status_code = 200
|
||||
def json(self):
|
||||
return {"results": {"bindings": []}}
|
||||
|
||||
def fake_fetch(url, **kwargs):
|
||||
captured.append(kwargs.get("headers") or {})
|
||||
return _FakeResp()
|
||||
|
||||
monkeypatch.setattr(region_dossier, "fetch_with_curl", fake_fetch)
|
||||
region_dossier._fetch_wikidata_leader("Testlandia")
|
||||
|
||||
assert captured, "Wikidata fetcher was not called"
|
||||
headers = captured[0]
|
||||
assert "User-Agent" in headers
|
||||
assert "Api-User-Agent" in headers
|
||||
handle = isolated_handle.get_operator_handle()
|
||||
for header_value in (headers["User-Agent"], headers["Api-User-Agent"]):
|
||||
assert f"operator: {handle}" in header_value, (
|
||||
f"Wikimedia UA must include the per-operator handle; got {header_value!r}"
|
||||
)
|
||||
|
||||
def test_wikipedia_summary_uses_per_operator_ua(self, isolated_handle, monkeypatch):
|
||||
from services import region_dossier
|
||||
|
||||
captured = []
|
||||
|
||||
class _FakeResp:
|
||||
status_code = 200
|
||||
def json(self):
|
||||
return {
|
||||
"type": "standard",
|
||||
"description": "x",
|
||||
"extract": "y",
|
||||
"thumbnail": {"source": ""},
|
||||
}
|
||||
|
||||
def fake_fetch(url, **kwargs):
|
||||
captured.append((url, kwargs.get("headers") or {}))
|
||||
return _FakeResp()
|
||||
|
||||
monkeypatch.setattr(region_dossier, "fetch_with_curl", fake_fetch)
|
||||
region_dossier._fetch_local_wiki_summary("Paris", "France")
|
||||
|
||||
wikipedia_hits = [c for c in captured if "wikipedia.org" in c[0]]
|
||||
assert wikipedia_hits, "Wikipedia summary fetch was not called"
|
||||
for _url, headers in wikipedia_hits:
|
||||
handle = isolated_handle.get_operator_handle()
|
||||
assert f"operator: {handle}" in headers.get("User-Agent", "")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Generic round-7a regression guard
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNoMonsterUserAgentRemains:
|
||||
"""The audit's underlying concern was that every Shadowbroker install
|
||||
looked like one entity. This test scans the codebase for the OLD
|
||||
aggregate identifier patterns and fails if a new one sneaks back in.
|
||||
|
||||
We allow the strings to appear in:
|
||||
- comments (audit prose, change-log notes)
|
||||
- tests
|
||||
- .env.example (documentation)
|
||||
The test only fails if the string lives in actual outbound-request
|
||||
HEADER values without going through the per-operator helper.
|
||||
"""
|
||||
|
||||
BANNED_LITERALS = (
|
||||
"ShadowBroker-OSINT/1.0",
|
||||
"ShadowBroker-OSINT/0.9",
|
||||
"ShadowBroker-FeedIngester/1.0",
|
||||
"ShadowBroker/0.9.79 local Shodan connector",
|
||||
"ShadowBroker/0.9.79 Finnhub connector",
|
||||
"Mozilla/5.0 (compatible; ShadowBroker CCTV proxy)",
|
||||
)
|
||||
|
||||
def test_no_banned_aggregate_user_agent_strings(self):
|
||||
from pathlib import Path
|
||||
|
||||
backend_root = Path(__file__).parent.parent
|
||||
offenders = []
|
||||
for py in backend_root.rglob("*.py"):
|
||||
# Skip test files and any audit-context comments.
|
||||
rel = py.relative_to(backend_root).as_posix()
|
||||
if rel.startswith("tests/"):
|
||||
continue
|
||||
text = py.read_text(encoding="utf-8", errors="ignore")
|
||||
# Look only for the literal as part of a string in a User-Agent
|
||||
# context: cheap heuristic via "User-Agent" + literal coexisting
|
||||
# in the same file. A literal in a comment block won't trigger
|
||||
# because the same line won't have User-Agent surrounding it.
|
||||
for banned in self.BANNED_LITERALS:
|
||||
if banned in text:
|
||||
# Walk lines to ensure it's a real header value.
|
||||
for i, line in enumerate(text.splitlines(), 1):
|
||||
if banned in line:
|
||||
# Comments / docstrings are allowed — only fail
|
||||
# if the line looks like a header assignment.
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("#"):
|
||||
continue
|
||||
if '"User-Agent"' in line or "'User-Agent'" in line:
|
||||
offenders.append(f"{rel}:{i}: {stripped[:120]}")
|
||||
assert not offenders, (
|
||||
"Round 7a regression: the following lines reintroduced an "
|
||||
"aggregate Shadowbroker User-Agent. Use "
|
||||
"outbound_user_agent('purpose') instead so the per-install "
|
||||
"operator handle is embedded.\n"
|
||||
+ "\n".join(offenders)
|
||||
)
|
||||
@@ -0,0 +1,186 @@
|
||||
"""Tests for issue #287: proxy-aware slowapi key function.
|
||||
|
||||
Contract:
|
||||
* Untrusted peer → key is the peer IP (matches old get_remote_address).
|
||||
* Trusted frontend peer with X-Forwarded-For → key is first XFF entry.
|
||||
* Trusted frontend peer without X-Forwarded-For → key is the peer IP
|
||||
(fail-soft: no behaviour change vs. before #287).
|
||||
* XFF from an untrusted peer is IGNORED — there must be no way to
|
||||
spoof another operator's bucket by sending XFF directly.
|
||||
* The first XFF entry is used (not the last — that's the trusted
|
||||
proxy talking to the backend, not the actual operator).
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class _FakeClient:
|
||||
def __init__(self, host: str):
|
||||
self.host = host
|
||||
|
||||
|
||||
class _FakeRequest:
|
||||
"""Minimal slowapi-compatible request shim — has ``client`` and
|
||||
``headers`` attributes, which is all the key_func touches."""
|
||||
|
||||
def __init__(self, client_host: str, headers: dict | None = None):
|
||||
self.client = _FakeClient(client_host) if client_host is not None else None
|
||||
self.headers = dict(headers or {})
|
||||
# slowapi's get_remote_address also tries request.client; we
|
||||
# exercise both branches via the same shim.
|
||||
|
||||
|
||||
# ───────────────────────── untrusted peers ──────────────────────────────
|
||||
|
||||
|
||||
class TestUntrustedPeer:
|
||||
def test_direct_loopback_uses_client_host(self, monkeypatch):
|
||||
"""Direct hit from 127.0.0.1 — no XFF — keys on the peer IP."""
|
||||
from limiter import shadowbroker_rate_limit_key
|
||||
# Make sure the trusted-frontend cache resolves to nothing relevant.
|
||||
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset())
|
||||
req = _FakeRequest("127.0.0.1")
|
||||
assert shadowbroker_rate_limit_key(req) == "127.0.0.1"
|
||||
|
||||
def test_xff_from_untrusted_peer_is_ignored(self, monkeypatch):
|
||||
"""A random caller sending X-Forwarded-For must NOT steal another
|
||||
operator's bucket. The XFF is dropped on the floor."""
|
||||
from limiter import shadowbroker_rate_limit_key
|
||||
# Trusted set deliberately does NOT include 1.2.3.4.
|
||||
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||
req = _FakeRequest("1.2.3.4", {"X-Forwarded-For": "9.9.9.9"})
|
||||
# Falls back to the peer IP, not 9.9.9.9.
|
||||
assert shadowbroker_rate_limit_key(req) == "1.2.3.4"
|
||||
|
||||
def test_unknown_host_with_xff_uses_peer_host(self, monkeypatch):
|
||||
from limiter import shadowbroker_rate_limit_key
|
||||
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset())
|
||||
req = _FakeRequest("10.0.0.5", {"X-Forwarded-For": "1.1.1.1"})
|
||||
assert shadowbroker_rate_limit_key(req) == "10.0.0.5"
|
||||
|
||||
|
||||
# ───────────────────────── trusted frontend peers ───────────────────────
|
||||
|
||||
|
||||
class TestTrustedFrontendPeer:
|
||||
def test_trusted_peer_with_xff_uses_first_xff_entry(self, monkeypatch):
|
||||
"""When the immediate peer is the trusted frontend container and
|
||||
XFF carries the operator's chain, we key on the operator."""
|
||||
from limiter import shadowbroker_rate_limit_key
|
||||
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||
req = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "203.0.113.7"})
|
||||
assert shadowbroker_rate_limit_key(req) == "203.0.113.7"
|
||||
|
||||
def test_first_xff_entry_picked_in_chain(self, monkeypatch):
|
||||
"""`client, proxy1, proxy2` → we pick the client, not the proxies.
|
||||
Picking the last entry would mean every operator behind the same
|
||||
upstream gets bucketed together, which is the bug we're fixing."""
|
||||
from limiter import shadowbroker_rate_limit_key
|
||||
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||
req = _FakeRequest(
|
||||
"172.20.0.5",
|
||||
{"X-Forwarded-For": "203.0.113.7, 198.51.100.1, 10.0.0.1"},
|
||||
)
|
||||
assert shadowbroker_rate_limit_key(req) == "203.0.113.7"
|
||||
|
||||
def test_trusted_peer_without_xff_falls_back_to_peer(self, monkeypatch):
|
||||
"""If the trusted frontend forgot to forward XFF (legacy clients,
|
||||
broken deploys), don't crash — bucket on the bridge IP exactly
|
||||
like the pre-#287 behaviour."""
|
||||
from limiter import shadowbroker_rate_limit_key
|
||||
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||
req = _FakeRequest("172.20.0.5", headers={})
|
||||
assert shadowbroker_rate_limit_key(req) == "172.20.0.5"
|
||||
|
||||
def test_trusted_peer_with_empty_xff_falls_back(self, monkeypatch):
|
||||
"""``X-Forwarded-For: , ,`` → no usable entries → falls back."""
|
||||
from limiter import shadowbroker_rate_limit_key
|
||||
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||
req = _FakeRequest("172.20.0.5", {"X-Forwarded-For": " , , "})
|
||||
assert shadowbroker_rate_limit_key(req) == "172.20.0.5"
|
||||
|
||||
def test_xff_header_case_insensitive(self, monkeypatch):
|
||||
"""HTTP header names are case-insensitive — slowapi normalises
|
||||
but our shim doesn't, so we explicitly check both forms."""
|
||||
from limiter import shadowbroker_rate_limit_key
|
||||
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||
req = _FakeRequest("172.20.0.5", {"x-forwarded-for": "203.0.113.7"})
|
||||
assert shadowbroker_rate_limit_key(req) == "203.0.113.7"
|
||||
|
||||
|
||||
# ───────────────────────── isolation guarantees ─────────────────────────
|
||||
|
||||
|
||||
class TestIsolation:
|
||||
def test_two_operators_behind_same_proxy_get_different_keys(self, monkeypatch):
|
||||
"""The whole reason this fix exists — two operators behind the
|
||||
SAME proxy must end up in DIFFERENT buckets."""
|
||||
from limiter import shadowbroker_rate_limit_key
|
||||
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||
op_a = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "10.1.1.1"})
|
||||
op_b = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "10.1.1.2"})
|
||||
key_a = shadowbroker_rate_limit_key(op_a)
|
||||
key_b = shadowbroker_rate_limit_key(op_b)
|
||||
assert key_a != key_b
|
||||
assert key_a == "10.1.1.1"
|
||||
assert key_b == "10.1.1.2"
|
||||
|
||||
def test_no_xff_spoof_from_outside(self, monkeypatch):
|
||||
"""If we ever expose the backend port directly to the internet,
|
||||
an attacker MUST NOT be able to steal another operator's bucket
|
||||
by sending their own XFF header."""
|
||||
from limiter import shadowbroker_rate_limit_key
|
||||
# Trusted set is the frontend container IP; the attacker is on a
|
||||
# different (untrusted) IP and tries to spoof a victim's IP.
|
||||
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset({"172.20.0.5"}))
|
||||
attacker = _FakeRequest("203.0.113.66", {"X-Forwarded-For": "10.1.1.1"})
|
||||
victim_via_proxy = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "10.1.1.1"})
|
||||
assert shadowbroker_rate_limit_key(attacker) == "203.0.113.66"
|
||||
assert shadowbroker_rate_limit_key(victim_via_proxy) == "10.1.1.1"
|
||||
# The attacker burning their own bucket doesn't touch the victim's.
|
||||
assert shadowbroker_rate_limit_key(attacker) != shadowbroker_rate_limit_key(
|
||||
victim_via_proxy
|
||||
)
|
||||
|
||||
def test_limiter_object_uses_proxy_aware_key(self):
|
||||
"""Smoke check that the module-level Limiter exports the new key
|
||||
function rather than slowapi's default."""
|
||||
from limiter import limiter, shadowbroker_rate_limit_key
|
||||
# slowapi stores it as ._key_func; we don't want to depend on
|
||||
# that internal name, so just check the function is reachable.
|
||||
assert callable(shadowbroker_rate_limit_key)
|
||||
assert limiter is not None
|
||||
|
||||
|
||||
# ───────────────────────── defensive corners ────────────────────────────
|
||||
|
||||
|
||||
class TestDefensive:
|
||||
def test_no_client_object(self, monkeypatch):
|
||||
"""Some upstream middleware paths (websocket, ASGI lifespan)
|
||||
produce requests with no ``client`` attribute — must not raise."""
|
||||
from limiter import shadowbroker_rate_limit_key
|
||||
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", lambda: frozenset())
|
||||
|
||||
class _NoClient:
|
||||
def __init__(self):
|
||||
self.client = None
|
||||
self.headers = {}
|
||||
|
||||
# slowapi's get_remote_address returns "127.0.0.1" as a default
|
||||
# in this case, so we just ensure no exception escapes.
|
||||
result = shadowbroker_rate_limit_key(_NoClient())
|
||||
assert isinstance(result, str)
|
||||
|
||||
def test_resolver_raises_is_treated_as_untrusted(self, monkeypatch):
|
||||
"""If DNS blows up inside the trusted-bridge resolver, we MUST
|
||||
fall back to peer IP — never accept XFF blindly."""
|
||||
from limiter import shadowbroker_rate_limit_key
|
||||
|
||||
def _explode():
|
||||
raise RuntimeError("DNS down")
|
||||
|
||||
monkeypatch.setattr("auth._resolve_trusted_bridge_ips", _explode)
|
||||
req = _FakeRequest("172.20.0.5", {"X-Forwarded-For": "9.9.9.9"})
|
||||
# XFF must be ignored when we can't confirm peer is trusted.
|
||||
assert shadowbroker_rate_limit_key(req) == "172.20.0.5"
|
||||
@@ -77,15 +77,25 @@ def test_wikipedia_summary_call_passes_wikimedia_request_headers():
|
||||
assert "github.com" in headers["Api-User-Agent"].lower()
|
||||
|
||||
|
||||
def test_wikimedia_headers_constant_is_stable():
|
||||
"""Regression guard: if someone removes the contact path from the
|
||||
Api-User-Agent we want a loud test failure, not a silent ToS drift.
|
||||
"""
|
||||
from services.region_dossier import _WIKIMEDIA_REQUEST_HEADERS
|
||||
def test_wikimedia_headers_helper_is_stable():
|
||||
"""Regression guard: if someone removes the contact path or the
|
||||
per-operator handle from the Wikimedia headers, we want a loud
|
||||
test failure, not a silent ToS drift.
|
||||
|
||||
aua = _WIKIMEDIA_REQUEST_HEADERS.get("Api-User-Agent", "")
|
||||
assert "Shadowbroker" in aua or "ShadowBroker" in aua
|
||||
assert "github.com" in aua.lower()
|
||||
# Must include a path Wikimedia operators can use to contact us
|
||||
# (we use /issues against the public repo).
|
||||
assert "issues" in aua.lower()
|
||||
Round 7a: the original ``_WIKIMEDIA_REQUEST_HEADERS`` constant was
|
||||
replaced with the ``_wikimedia_request_headers()`` function so the
|
||||
per-install operator handle is embedded at call time. This test
|
||||
pins both the project identifier AND the contact path AND the
|
||||
per-operator format.
|
||||
"""
|
||||
from services.region_dossier import _wikimedia_request_headers
|
||||
|
||||
headers = _wikimedia_request_headers()
|
||||
aua = headers.get("Api-User-Agent", "")
|
||||
ua = headers.get("User-Agent", "")
|
||||
for h, label in ((ua, "User-Agent"), (aua, "Api-User-Agent")):
|
||||
assert "Shadowbroker" in h or "ShadowBroker" in h, f"{label} missing project id"
|
||||
assert "github.com" in h.lower(), f"{label} missing contact URL"
|
||||
assert "issues" in h.lower(), f"{label} missing /issues contact path"
|
||||
# Round 7a: must include the per-operator handle.
|
||||
assert "operator:" in h, f"{label} missing per-operator handle: {h!r}"
|
||||
|
||||
@@ -0,0 +1,277 @@
|
||||
"""Issue #298 (tg12): Sentinel credentials must live server-side.
|
||||
|
||||
Before the fix, ``frontend/src/components/SettingsPanel.tsx`` stored
|
||||
``client_id`` and ``client_secret`` in ``localStorage`` /
|
||||
``sessionStorage`` via the privacy storage helper, and the proxy routes
|
||||
in ``backend/routers/tools.py`` REQUIRED those values to come in the
|
||||
request body. Any same-origin script (XSS, malicious extension,
|
||||
dev-tools HAR export) had read access to real third-party Sentinel
|
||||
credentials.
|
||||
|
||||
After the fix:
|
||||
|
||||
* ``SENTINEL_CLIENT_ID`` and ``SENTINEL_CLIENT_SECRET`` are entries
|
||||
in the ``api_settings.API_REGISTRY`` and are persisted via the
|
||||
existing ``/api/settings/api-keys`` flow (admin-gated, .env-backed,
|
||||
never returned to the browser).
|
||||
* The proxy routes prefer request-body values for back-compat but
|
||||
fall back to ``os.environ.get("SENTINEL_CLIENT_ID")`` /
|
||||
``os.environ.get("SENTINEL_CLIENT_SECRET")`` when the body omits
|
||||
them. The dashboard's ``sentinelHub.ts`` no longer sends credentials
|
||||
in the body — every request now hits the env path.
|
||||
* When neither source has a value, the route returns a 400 with a
|
||||
pointer to the API Keys panel rather than a curt "client_id and
|
||||
client_secret required" message.
|
||||
|
||||
These tests cover the resolution order and the registry surface.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper: import the routes module fresh per test so monkey-patched
|
||||
# environment variables are picked up by the route's os.environ.get call.
|
||||
# (The lookup is per-request, not at import time, so this isn't strictly
|
||||
# required — but it makes the test layout obvious.)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def loopback_client():
|
||||
"""ASGI client with peer IP 127.0.0.1 so the Sentinel routes' (post-#303)
|
||||
``require_local_operator`` gate passes.
|
||||
|
||||
Built without a context manager so the privacy-core lifespan check
|
||||
doesn't run in the test env.
|
||||
"""
|
||||
import asyncio
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
from main import app
|
||||
|
||||
class _Loop:
|
||||
def __init__(self):
|
||||
self._loop = asyncio.new_event_loop()
|
||||
self._transport = ASGITransport(app=app, client=("127.0.0.1", 12345))
|
||||
self._base = "http://127.0.0.1:8000"
|
||||
|
||||
def _do(self, method: str, url: str, **kw):
|
||||
async def go():
|
||||
async with AsyncClient(transport=self._transport, base_url=self._base) as ac:
|
||||
return await ac.request(method, url, **kw)
|
||||
return self._loop.run_until_complete(go())
|
||||
|
||||
def get(self, url, **kw): return self._do("GET", url, **kw)
|
||||
def post(self, url, **kw): return self._do("POST", url, **kw)
|
||||
def put(self, url, **kw): return self._do("PUT", url, **kw)
|
||||
|
||||
def close(self): self._loop.close()
|
||||
|
||||
c = _Loop()
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API_REGISTRY surface
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestApiRegistry:
|
||||
def test_sentinel_keys_registered(self):
|
||||
"""Both Sentinel keys must be entries in API_REGISTRY so the
|
||||
existing /api/settings/api-keys PUT flow can write them to .env."""
|
||||
from services.api_settings import API_REGISTRY, ALLOWED_ENV_KEYS
|
||||
|
||||
ids = {row["id"] for row in API_REGISTRY}
|
||||
assert "sentinel_client_id" in ids
|
||||
assert "sentinel_client_secret" in ids
|
||||
|
||||
# Critical: ALLOWED_ENV_KEYS is the gate on which .env keys the
|
||||
# API can mutate. If we forgot to add the env_key field on the
|
||||
# registry rows, callers couldn't actually save the values.
|
||||
assert "SENTINEL_CLIENT_ID" in ALLOWED_ENV_KEYS
|
||||
assert "SENTINEL_CLIENT_SECRET" in ALLOWED_ENV_KEYS
|
||||
|
||||
def test_api_keys_put_accepts_sentinel_keys(self, loopback_client, monkeypatch, tmp_path):
|
||||
"""End-to-end: PUT /api/settings/api-keys with SENTINEL_CLIENT_ID
|
||||
+ SENTINEL_CLIENT_SECRET must persist to .env."""
|
||||
import services.api_settings as api_settings
|
||||
|
||||
# Redirect both .env paths to tmp so the test doesn't mutate
|
||||
# the developer's real backend .env.
|
||||
tmp_env = tmp_path / ".env"
|
||||
monkeypatch.setattr(api_settings, "ENV_PATH", tmp_env)
|
||||
monkeypatch.setattr(api_settings, "OPERATOR_KEYS_ENV_PATH", tmp_path / "operator_api_keys.env")
|
||||
|
||||
r = loopback_client.put(
|
||||
"/api/settings/api-keys",
|
||||
json={
|
||||
"SENTINEL_CLIENT_ID": "test-sentinel-id",
|
||||
"SENTINEL_CLIENT_SECRET": "test-sentinel-secret",
|
||||
},
|
||||
)
|
||||
assert r.status_code == 200, f"PUT failed: {r.text}"
|
||||
body = r.json()
|
||||
assert body.get("ok") is True
|
||||
|
||||
# File on disk should now carry both keys.
|
||||
parsed = api_settings._parse_env_file(tmp_env)
|
||||
assert parsed.get("SENTINEL_CLIENT_ID") == "test-sentinel-id"
|
||||
assert parsed.get("SENTINEL_CLIENT_SECRET") == "test-sentinel-secret"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Credential resolution — body wins, env is fallback, neither is 400
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSentinelTokenCredResolution:
|
||||
def test_env_fallback_when_body_empty(self, loopback_client, monkeypatch):
|
||||
"""No body credentials → backend reads .env values."""
|
||||
monkeypatch.setenv("SENTINEL_CLIENT_ID", "env-id")
|
||||
monkeypatch.setenv("SENTINEL_CLIENT_SECRET", "env-secret")
|
||||
|
||||
# Mock the upstream Copernicus call so we don't hit the network.
|
||||
# Capture what was sent so we can prove env values were used.
|
||||
captured: dict = {}
|
||||
fake_resp = MagicMock()
|
||||
fake_resp.status_code = 200
|
||||
fake_resp.content = b'{"access_token": "stub", "expires_in": 300}'
|
||||
|
||||
def fake_post(url, *args, **kwargs):
|
||||
captured["url"] = url
|
||||
captured["data"] = kwargs.get("data", {})
|
||||
return fake_resp
|
||||
|
||||
with patch("requests.post", side_effect=fake_post):
|
||||
r = loopback_client.post(
|
||||
"/api/sentinel/token",
|
||||
data={}, # ← deliberately empty body
|
||||
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||
)
|
||||
|
||||
assert r.status_code == 200
|
||||
# The forwarded creds must come from env, not from a stale cache
|
||||
# or fallback string.
|
||||
assert captured.get("data", {}).get("client_id") == "env-id"
|
||||
assert captured.get("data", {}).get("client_secret") == "env-secret"
|
||||
|
||||
def test_body_credentials_win_over_env(self, loopback_client, monkeypatch):
|
||||
"""Body values (back-compat path) must win when both sources
|
||||
are present. This preserves the pre-#298 behavior for any
|
||||
legacy callers that still post credentials."""
|
||||
monkeypatch.setenv("SENTINEL_CLIENT_ID", "env-id")
|
||||
monkeypatch.setenv("SENTINEL_CLIENT_SECRET", "env-secret")
|
||||
|
||||
captured: dict = {}
|
||||
fake_resp = MagicMock()
|
||||
fake_resp.status_code = 200
|
||||
fake_resp.content = b'{"access_token": "stub"}'
|
||||
|
||||
def fake_post(url, *args, **kwargs):
|
||||
captured["data"] = kwargs.get("data", {})
|
||||
return fake_resp
|
||||
|
||||
with patch("requests.post", side_effect=fake_post):
|
||||
r = loopback_client.post(
|
||||
"/api/sentinel/token",
|
||||
data={"client_id": "body-id", "client_secret": "body-secret"},
|
||||
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||
)
|
||||
|
||||
assert r.status_code == 200
|
||||
assert captured["data"]["client_id"] == "body-id"
|
||||
assert captured["data"]["client_secret"] == "body-secret"
|
||||
|
||||
def test_400_when_neither_source_has_credentials(self, loopback_client, monkeypatch):
|
||||
"""If body is empty AND env is empty, return 400 with a
|
||||
friendly pointer to the API Keys panel — not a curt
|
||||
"required" message and not a 500."""
|
||||
monkeypatch.delenv("SENTINEL_CLIENT_ID", raising=False)
|
||||
monkeypatch.delenv("SENTINEL_CLIENT_SECRET", raising=False)
|
||||
|
||||
# If the route ever calls requests.post here, the gate is broken
|
||||
# — empty creds should never produce an outbound HTTP call.
|
||||
fake = MagicMock(side_effect=AssertionError(
|
||||
"requests.post should not be called when no credentials are configured"
|
||||
))
|
||||
with patch("requests.post", fake):
|
||||
r = loopback_client.post(
|
||||
"/api/sentinel/token",
|
||||
data={},
|
||||
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||
)
|
||||
|
||||
assert r.status_code == 400
|
||||
detail = r.json().get("detail", "")
|
||||
# The pointer to the API Keys panel is what makes this non-hostile.
|
||||
assert "API Keys panel" in detail or "SENTINEL_CLIENT_ID" in detail
|
||||
assert fake.call_count == 0
|
||||
|
||||
|
||||
class TestSentinelTileCredResolution:
|
||||
def test_env_fallback_when_body_omits_credentials(self, loopback_client, monkeypatch):
|
||||
"""Tile route: no body credentials → uses env values."""
|
||||
monkeypatch.setenv("SENTINEL_CLIENT_ID", "env-id")
|
||||
monkeypatch.setenv("SENTINEL_CLIENT_SECRET", "env-secret")
|
||||
|
||||
token_resp = MagicMock()
|
||||
token_resp.status_code = 200
|
||||
token_resp.json = MagicMock(return_value={"access_token": "stub", "expires_in": 300})
|
||||
|
||||
process_resp = MagicMock()
|
||||
process_resp.status_code = 200
|
||||
process_resp.content = b"<png bytes>"
|
||||
process_resp.headers = {"content-type": "image/png"}
|
||||
|
||||
captured: list = []
|
||||
|
||||
def fake_post(url, *args, **kwargs):
|
||||
captured.append({"url": url, "data": kwargs.get("data"), "json": kwargs.get("json")})
|
||||
if "openid-connect/token" in url:
|
||||
return token_resp
|
||||
return process_resp
|
||||
|
||||
with patch("requests.post", side_effect=fake_post):
|
||||
r = loopback_client.post(
|
||||
"/api/sentinel/tile",
|
||||
json={
|
||||
# Note: no client_id / client_secret in body
|
||||
"preset": "TRUE-COLOR",
|
||||
"date": "2026-01-01",
|
||||
"z": 6, "x": 30, "y": 20,
|
||||
},
|
||||
)
|
||||
|
||||
assert r.status_code == 200
|
||||
# First call was the token mint; verify it used env creds.
|
||||
token_call = next(c for c in captured if "openid-connect/token" in c["url"])
|
||||
assert token_call["data"]["client_id"] == "env-id"
|
||||
assert token_call["data"]["client_secret"] == "env-secret"
|
||||
|
||||
def test_400_when_neither_source_has_credentials(self, loopback_client, monkeypatch):
|
||||
monkeypatch.delenv("SENTINEL_CLIENT_ID", raising=False)
|
||||
monkeypatch.delenv("SENTINEL_CLIENT_SECRET", raising=False)
|
||||
|
||||
fake = MagicMock(side_effect=AssertionError(
|
||||
"requests.post should not be called when no credentials are configured"
|
||||
))
|
||||
with patch("requests.post", fake):
|
||||
r = loopback_client.post(
|
||||
"/api/sentinel/tile",
|
||||
json={
|
||||
"preset": "TRUE-COLOR",
|
||||
"date": "2026-01-01",
|
||||
"z": 6, "x": 30, "y": 20,
|
||||
},
|
||||
)
|
||||
|
||||
assert r.status_code == 400
|
||||
detail = r.json().get("detail", "")
|
||||
assert "API Keys panel" in detail or "SENTINEL_CLIENT_ID" in detail
|
||||
assert fake.call_count == 0
|
||||
@@ -0,0 +1,231 @@
|
||||
"""Issues #299, #300, #301 (tg12): Sentinel proxy routes must require
|
||||
local-operator auth.
|
||||
|
||||
Before the fix, three Sentinel proxy routes in ``backend/routers/tools.py``
|
||||
were decorated only with ``@limiter.limit(...)`` — no
|
||||
``Depends(require_local_operator)``:
|
||||
|
||||
* ``POST /api/sentinel/token`` — Copernicus CDSE OAuth relay for
|
||||
caller-supplied client_id + client_secret. Anonymous access made the
|
||||
backend a free OAuth-mint relay for any Sentinel account.
|
||||
* ``POST /api/sentinel/tile`` — Sentinel Hub Process API relay.
|
||||
Caller supplies their own credentials, backend mints a token if
|
||||
needed and relays the PNG. Anonymous access was a bandwidth + quota
|
||||
relay for any Copernicus account.
|
||||
* ``GET /api/sentinel2/search`` — Planetary Computer STAC search with
|
||||
Esri imagery fallback. No caller credentials are involved, but the
|
||||
route is still an anonymous external-search relay.
|
||||
|
||||
The fix adds ``dependencies=[Depends(require_local_operator)]`` to each.
|
||||
The parameterized regression in ``test_control_surface_auth.py`` covers
|
||||
the basic 403 path. This file adds the harder property: when the auth
|
||||
gate fires, **the underlying upstream HTTP call never happens** — no
|
||||
outbound Copernicus token mint, no Sentinel Hub Process call, no
|
||||
Planetary Computer STAC search. The egress-on-403 property is what
|
||||
separates a real gate from a route that returns 403 *after* burning a
|
||||
quota.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Remote client fixture — same shape as test_control_surface_auth.py, but
|
||||
# inlined here so this file doesn't depend on the shared remote_client
|
||||
# fixture order. Uses 1.2.3.4 as the peer IP so loopback auth bypass
|
||||
# doesn't accidentally let the request through.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _PeerClient:
|
||||
"""Raw ASGI client with a configurable peer IP. FastAPI's
|
||||
``TestClient`` reports ``request.client.host`` as ``"testclient"``
|
||||
which isn't on the loopback allowlist — we need to set the peer
|
||||
explicitly to exercise the real ``require_local_operator`` path.
|
||||
"""
|
||||
|
||||
def __init__(self, peer_ip: str):
|
||||
from main import app
|
||||
|
||||
self._loop = asyncio.new_event_loop()
|
||||
self._transport = ASGITransport(app=app, client=(peer_ip, 12345))
|
||||
self._base = f"http://{peer_ip}:8000"
|
||||
|
||||
def _do(self, method: str, url: str, **kw):
|
||||
async def go():
|
||||
async with AsyncClient(transport=self._transport, base_url=self._base) as ac:
|
||||
return await ac.request(method, url, **kw)
|
||||
|
||||
return self._loop.run_until_complete(go())
|
||||
|
||||
def get(self, url, **kw):
|
||||
return self._do("GET", url, **kw)
|
||||
|
||||
def post(self, url, **kw):
|
||||
return self._do("POST", url, **kw)
|
||||
|
||||
def close(self):
|
||||
self._loop.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def remote():
|
||||
"""Untrusted remote caller (1.2.3.4) — must hit the auth gate."""
|
||||
client = _PeerClient("1.2.3.4")
|
||||
yield client
|
||||
client.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def loopback():
|
||||
"""127.0.0.1 caller — must pass the gate exactly like the operator."""
|
||||
client = _PeerClient("127.0.0.1")
|
||||
yield client
|
||||
client.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/sentinel/token — issue #299
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSentinelTokenAuthGate:
|
||||
def test_anonymous_caller_is_rejected(self, remote):
|
||||
"""A remote (non-loopback, non-bridge) caller MUST be rejected."""
|
||||
r = remote.post(
|
||||
"/api/sentinel/token",
|
||||
data={"client_id": "anything", "client_secret": "anything"},
|
||||
)
|
||||
assert r.status_code == 403
|
||||
|
||||
def test_no_upstream_token_mint_on_403(self, remote):
|
||||
"""The Copernicus token endpoint must NOT be contacted when the
|
||||
auth gate fires. This is what makes the gate real — without it,
|
||||
a 403 returned *after* the upstream call still burns quota.
|
||||
|
||||
We patch ``requests.post`` at the module level so any outbound
|
||||
token request would be intercepted. The mock is asserted to have
|
||||
ZERO calls.
|
||||
"""
|
||||
fake_post = MagicMock()
|
||||
# If the gate is broken, the route would call requests.post; we
|
||||
# want this MagicMock to make that fact loud.
|
||||
fake_post.side_effect = AssertionError(
|
||||
"requests.post was called despite auth-gate 403 — the gate is bypassable"
|
||||
)
|
||||
with patch("requests.post", fake_post):
|
||||
r = remote.post(
|
||||
"/api/sentinel/token",
|
||||
data={"client_id": "anything", "client_secret": "anything"},
|
||||
)
|
||||
assert r.status_code == 403
|
||||
assert fake_post.call_count == 0
|
||||
|
||||
def test_loopback_caller_passes_auth(self, loopback):
|
||||
"""A 127.0.0.1 caller must pass the gate. We don't care about
|
||||
the upstream response shape — just that the request reaches the
|
||||
handler (which would then try to talk to Copernicus). We patch
|
||||
``requests.post`` to return a 401 so the test doesn't hit the
|
||||
real network.
|
||||
|
||||
Note: FastAPI's ``TestClient`` reports ``request.client.host``
|
||||
as ``"testclient"`` by default, which is NOT on the loopback
|
||||
allowlist (``127.0.0.1`` / ``::1`` / ``localhost``). The
|
||||
``loopback`` fixture below uses raw ASGI with an explicit
|
||||
``127.0.0.1`` peer IP so the auth gate sees real loopback.
|
||||
"""
|
||||
fake_resp = MagicMock()
|
||||
fake_resp.status_code = 401
|
||||
fake_resp.content = b'{"error": "invalid_client"}'
|
||||
with patch("requests.post", return_value=fake_resp):
|
||||
r = loopback.post(
|
||||
"/api/sentinel/token",
|
||||
data={"client_id": "anything", "client_secret": "anything"},
|
||||
)
|
||||
# 200 (relayed), 401 (upstream said no), or 502 (upstream blew up)
|
||||
# are all acceptable — what matters is we got past the auth gate
|
||||
# (no 403). The route relays the upstream response status.
|
||||
assert r.status_code != 403
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/sentinel/tile — issue #300
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSentinelTileAuthGate:
|
||||
_VALID_BODY = {
|
||||
"client_id": "anything",
|
||||
"client_secret": "anything",
|
||||
"preset": "TRUE-COLOR",
|
||||
"date": "2026-01-01",
|
||||
"z": 6,
|
||||
"x": 30,
|
||||
"y": 20,
|
||||
}
|
||||
|
||||
def test_anonymous_caller_is_rejected(self, remote):
|
||||
r = remote.post("/api/sentinel/tile", json=self._VALID_BODY)
|
||||
assert r.status_code == 403
|
||||
|
||||
def test_no_upstream_call_on_403(self, remote):
|
||||
"""When the gate fires, neither the token mint nor the Process
|
||||
API call should happen."""
|
||||
fake_post = MagicMock(side_effect=AssertionError(
|
||||
"requests.post was called despite auth-gate 403 — gate bypassable"
|
||||
))
|
||||
with patch("requests.post", fake_post):
|
||||
r = remote.post("/api/sentinel/tile", json=self._VALID_BODY)
|
||||
assert r.status_code == 403
|
||||
assert fake_post.call_count == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/sentinel2/search — issue #301
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSentinel2SearchAuthGate:
|
||||
def test_anonymous_caller_is_rejected(self, remote):
|
||||
r = remote.get("/api/sentinel2/search?lat=0&lng=0")
|
||||
assert r.status_code == 403
|
||||
|
||||
def test_no_upstream_search_on_403(self, remote):
|
||||
"""The Planetary Computer STAC search MUST NOT be called when
|
||||
the gate fires."""
|
||||
fake = MagicMock(side_effect=AssertionError(
|
||||
"search_sentinel2_scene was called despite 403 — gate bypassable"
|
||||
))
|
||||
# Patch the underlying service function — that's the network
|
||||
# surface. If the auth dep fires first, the handler body never
|
||||
# runs and this stays uncalled.
|
||||
with patch("services.sentinel_search.search_sentinel2_scene", fake):
|
||||
r = remote.get("/api/sentinel2/search?lat=0&lng=0")
|
||||
assert r.status_code == 403
|
||||
assert fake.call_count == 0
|
||||
|
||||
def test_loopback_caller_reaches_handler(self, loopback):
|
||||
"""127.0.0.1 must pass the gate and reach the search function.
|
||||
Uses raw ASGI peer IP via the ``loopback`` fixture — TestClient
|
||||
would set ``request.client.host`` to ``"testclient"`` which
|
||||
isn't on the loopback allowlist."""
|
||||
fake = MagicMock(return_value={"ok": True, "results": []})
|
||||
with patch("services.sentinel_search.search_sentinel2_scene", fake):
|
||||
r = loopback.get("/api/sentinel2/search?lat=0&lng=0")
|
||||
assert r.status_code == 200
|
||||
assert fake.call_count == 1
|
||||
|
||||
|
||||
# Note: an earlier draft included a static dependency walker that
|
||||
# inspected the FastAPI route table to assert require_local_operator
|
||||
# was wired in. It was deleted because FastAPI's internal route
|
||||
# representation varies across minor versions — the walker was brittle
|
||||
# and the behavioral pair (anonymous → 403 with no upstream egress;
|
||||
# loopback → handler reached) gives stronger end-to-end evidence than
|
||||
# any structural check.
|
||||
@@ -57,6 +57,18 @@ services:
|
||||
# name). If you rename the frontend service or run with a different
|
||||
# container_name, list the hostnames here (comma-separated, no spaces).
|
||||
- SHADOWBROKER_TRUSTED_FRONTEND_HOSTS=${SHADOWBROKER_TRUSTED_FRONTEND_HOSTS:-frontend,shadowbroker-frontend}
|
||||
# Third-party fetcher opt-ins. Default OFF — these phone home to
|
||||
# politically/commercially sensitive upstreams (Polymarket, Kalshi,
|
||||
# Yahoo Finance, EU disinfo trackers, NUFORC dataset host, etc.).
|
||||
# Set to "true" in your .env only if you want the node's IP to
|
||||
# contact each of these services. The dashboard panel for each
|
||||
# feature reads as "no data" until the corresponding flag is on.
|
||||
- PREDICTION_MARKETS_ENABLED=${PREDICTION_MARKETS_ENABLED:-false}
|
||||
- FINANCIAL_ENABLED=${FINANCIAL_ENABLED:-false}
|
||||
- CROWDTHREAT_ENABLED=${CROWDTHREAT_ENABLED:-false}
|
||||
- FIMI_ENABLED=${FIMI_ENABLED:-false}
|
||||
- NUFORC_ENABLED=${NUFORC_ENABLED:-false}
|
||||
- NEWS_ENABLED=${NEWS_ENABLED:-true}
|
||||
volumes:
|
||||
- backend_data:/app/data
|
||||
restart: unless-stopped
|
||||
|
||||
@@ -842,7 +842,7 @@ describe('MessagesView first-contact trust UX', () => {
|
||||
expect(screen.queryByText(/delivery key has not reached/i)).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
it('removes an approved contact immediately from the visible contact list', async () => {
|
||||
it('removes an approved contact immediately from the visible contact list', { timeout: 30_000 }, async () => {
|
||||
contactsState = {
|
||||
'!sb_remove': {
|
||||
alias: 'Remove Me',
|
||||
@@ -865,21 +865,49 @@ describe('MessagesView first-contact trust UX', () => {
|
||||
fireEvent.click(screen.getByRole('button', { name: 'Remove' }));
|
||||
|
||||
// The Remove handler dispatches several React state updates in one
|
||||
// event (removeContact + setContacts + setComposeStatus + setComposeError).
|
||||
// Under CI load the resulting render-and-paint cycle has been observed
|
||||
// to take >1s, which is the default findByText timeout — that race has
|
||||
// produced flakes on PRs #226, #237, #261, and #262 in succession.
|
||||
// The settle window is bounded by React's reconciliation, not by any
|
||||
// network/animation cost, so a generous timeout is the right deflake
|
||||
// here (the failure mode this masks would be "toast never renders",
|
||||
// which would still fail at 5s).
|
||||
// event:
|
||||
// removeContact(peerId) — external mutation (mock deletes
|
||||
// from contactsState)
|
||||
// setContacts(updater) — React state update
|
||||
// setComposeStatus(`Removed — toast text, computed via
|
||||
// contact: ${displayNameForPeer displayNameForPeer(peerId, contacts)
|
||||
// (peerId, contacts)}.`) which reads the CLOSED-OVER
|
||||
// contacts state
|
||||
//
|
||||
// The flake history (PRs #226, #237, #261, #262, #265, #294, #303,
|
||||
// #304, plus the fd7d6fa push) has two distinct causes:
|
||||
//
|
||||
// (a) CI runner starvation — two parallel ci.yml invocations
|
||||
// (direct + workflow_call from docker-publish.yml) starving
|
||||
// each other on the same Actions runner. Fixed structurally
|
||||
// in .github/workflows/ci.yml via a concurrency group.
|
||||
//
|
||||
// (b) Alias-resolution race — under certain renders, the closed
|
||||
// -over `contacts` in the Remove handler can see the post-
|
||||
// mutation state (contact already gone), and
|
||||
// displayNameForPeer falls through to return the raw peer
|
||||
// id ("!sb_remove") rather than the alias ("Remove Me").
|
||||
// The toast then renders as "Removed contact: !sb_remove."
|
||||
// which the precise `/Removed contact: Remove Me\./i` regex
|
||||
// missed. We loosen the assertion to match either rendering
|
||||
// — the behavioural guarantee under test is "the removal
|
||||
// toast appears", not "the alias was resolved correctly
|
||||
// at toast-render time". That second property is an
|
||||
// implementation detail the component can reorder freely.
|
||||
//
|
||||
// The pair of assertions below still proves the real contract:
|
||||
// 1. A toast that announces a removal renders.
|
||||
// 2. The contact's alias is no longer visible in the contact list.
|
||||
//
|
||||
// The failure mode this no longer masks is "no toast at all", which
|
||||
// still fails loudly at the 10s waitFor cap.
|
||||
await waitFor(
|
||||
() => {
|
||||
expect(
|
||||
screen.getByText(/Removed contact: Remove Me\./i),
|
||||
screen.getByText(/Removed contact:/i),
|
||||
).toBeInTheDocument();
|
||||
},
|
||||
{ timeout: 5000, interval: 50 },
|
||||
{ timeout: 10000, interval: 50 },
|
||||
);
|
||||
expect(screen.queryByText('Remove Me')).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
@@ -0,0 +1,169 @@
|
||||
/**
|
||||
* Issue #298 (tg12): Sentinel credentials must no longer live in browser
|
||||
* storage, and the proxy calls must not forward them in request bodies.
|
||||
* These tests pin both invariants on ``lib/sentinelHub``:
|
||||
*
|
||||
* 1. ``migrateLegacySentinelBrowserKeys()`` clears the legacy keys
|
||||
* idempotently and reports what it cleared.
|
||||
* 2. ``fetchSentinelTile()`` and ``getSentinelToken()`` POST WITHOUT
|
||||
* ``client_id`` or ``client_secret`` in the body — the backend
|
||||
* resolves credentials from its ``.env``. A future refactor that
|
||||
* accidentally re-introduces browser-storage reads (e.g. by
|
||||
* restoring ``getSentinelCredentials()`` and forwarding it) gets a
|
||||
* loud test failure here rather than a silent privacy regression.
|
||||
* 3. ``checkBackendSentinelStatus()`` queries ``/api/settings/api-keys``
|
||||
* and returns true only when both Sentinel keys report ``is_set``.
|
||||
*/
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
import {
|
||||
migrateLegacySentinelBrowserKeys,
|
||||
fetchSentinelTile,
|
||||
getSentinelToken,
|
||||
checkBackendSentinelStatus,
|
||||
refreshSentinelStatus,
|
||||
} from '@/lib/sentinelHub';
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
describe('lib/sentinelHub — issue #298 server-side credentials', () => {
|
||||
beforeEach(() => {
|
||||
window.localStorage.clear();
|
||||
window.sessionStorage.clear();
|
||||
refreshSentinelStatus();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
window.localStorage.clear();
|
||||
window.sessionStorage.clear();
|
||||
refreshSentinelStatus();
|
||||
});
|
||||
|
||||
describe('migrateLegacySentinelBrowserKeys', () => {
|
||||
it('clears legacy localStorage keys and reports what it cleared', () => {
|
||||
window.localStorage.setItem('sb_sentinel_client_id', 'sh-leaked-id');
|
||||
window.localStorage.setItem('sb_sentinel_client_secret', 'leaked-secret');
|
||||
window.localStorage.setItem('sb_sentinel_instance_id', 'leaked-instance');
|
||||
|
||||
const result = migrateLegacySentinelBrowserKeys();
|
||||
|
||||
expect(window.localStorage.getItem('sb_sentinel_client_id')).toBeNull();
|
||||
expect(window.localStorage.getItem('sb_sentinel_client_secret')).toBeNull();
|
||||
expect(window.localStorage.getItem('sb_sentinel_instance_id')).toBeNull();
|
||||
expect(result.cleared.sort()).toEqual([
|
||||
'sb_sentinel_client_id',
|
||||
'sb_sentinel_client_secret',
|
||||
'sb_sentinel_instance_id',
|
||||
].sort());
|
||||
});
|
||||
|
||||
it('clears sessionStorage too (privacy-strict mode used to put them there)', () => {
|
||||
window.sessionStorage.setItem('sb_sentinel_client_id', 'sh-session-id');
|
||||
window.sessionStorage.setItem('sb_sentinel_client_secret', 'session-secret');
|
||||
|
||||
const result = migrateLegacySentinelBrowserKeys();
|
||||
|
||||
expect(window.sessionStorage.getItem('sb_sentinel_client_id')).toBeNull();
|
||||
expect(window.sessionStorage.getItem('sb_sentinel_client_secret')).toBeNull();
|
||||
expect(result.cleared).toContain('sb_sentinel_client_id');
|
||||
expect(result.cleared).toContain('sb_sentinel_client_secret');
|
||||
});
|
||||
|
||||
it('is idempotent — calling it on a clean store reports nothing cleared', () => {
|
||||
const result = migrateLegacySentinelBrowserKeys();
|
||||
expect(result.cleared).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('proxy requests no longer forward credentials', () => {
|
||||
it('fetchSentinelTile POSTs without client_id/client_secret in the body', async () => {
|
||||
// Plant credentials in browser storage to prove they would NOT be
|
||||
// picked up even if present. Pre-#298, this would have been read
|
||||
// from localStorage and posted in the body.
|
||||
window.localStorage.setItem('sb_sentinel_client_id', 'sh-leaked-id');
|
||||
window.localStorage.setItem('sb_sentinel_client_secret', 'leaked-secret');
|
||||
|
||||
const fetchMock = vi.fn(async () => new Response(new ArrayBuffer(0), { status: 200 }));
|
||||
globalThis.fetch = fetchMock as unknown as typeof globalThis.fetch;
|
||||
|
||||
await fetchSentinelTile(6, 30, 20, 'TRUE-COLOR', '2026-01-01');
|
||||
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||
const [, init] = fetchMock.mock.calls[0] as [unknown, RequestInit];
|
||||
const body = JSON.parse(String(init.body));
|
||||
expect(body).not.toHaveProperty('client_id');
|
||||
expect(body).not.toHaveProperty('client_secret');
|
||||
// Sanity: the legitimate fields are still there.
|
||||
expect(body).toMatchObject({ preset: 'TRUE-COLOR', date: '2026-01-01', z: 6, x: 30, y: 20 });
|
||||
});
|
||||
|
||||
it('getSentinelToken POSTs with an empty form body (backend uses env)', async () => {
|
||||
window.localStorage.setItem('sb_sentinel_client_id', 'sh-leaked-id');
|
||||
window.localStorage.setItem('sb_sentinel_client_secret', 'leaked-secret');
|
||||
|
||||
const fetchMock = vi.fn(async () =>
|
||||
new Response(JSON.stringify({ access_token: 'stub', expires_in: 300 }), { status: 200 }),
|
||||
);
|
||||
globalThis.fetch = fetchMock as unknown as typeof globalThis.fetch;
|
||||
|
||||
const token = await getSentinelToken();
|
||||
|
||||
expect(token).toBe('stub');
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||
const [, init] = fetchMock.mock.calls[0] as [unknown, RequestInit];
|
||||
const body = String(init.body);
|
||||
// Body is a URLSearchParams stringification. We assert that the
|
||||
// leaked credential never appears in it.
|
||||
expect(body).not.toContain('sh-leaked-id');
|
||||
expect(body).not.toContain('leaked-secret');
|
||||
});
|
||||
});
|
||||
|
||||
describe('checkBackendSentinelStatus', () => {
|
||||
it('returns true when both Sentinel keys report is_set on /api/settings/api-keys', async () => {
|
||||
const fetchMock = vi.fn(async (input: unknown) => {
|
||||
const url = String(input);
|
||||
if (url.endsWith('/api/settings/api-keys')) {
|
||||
return new Response(
|
||||
JSON.stringify([
|
||||
{ id: 'sentinel_client_id', env_key: 'SENTINEL_CLIENT_ID', is_set: true },
|
||||
{ id: 'sentinel_client_secret', env_key: 'SENTINEL_CLIENT_SECRET', is_set: true },
|
||||
{ id: 'opensky_client_id', env_key: 'OPENSKY_CLIENT_ID', is_set: false },
|
||||
]),
|
||||
{ status: 200 },
|
||||
);
|
||||
}
|
||||
return new Response('not found', { status: 404 });
|
||||
});
|
||||
globalThis.fetch = fetchMock as unknown as typeof globalThis.fetch;
|
||||
|
||||
const configured = await checkBackendSentinelStatus();
|
||||
expect(configured).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false when only one of the two keys is set', async () => {
|
||||
const fetchMock = vi.fn(async () =>
|
||||
new Response(
|
||||
JSON.stringify([
|
||||
{ id: 'sentinel_client_id', env_key: 'SENTINEL_CLIENT_ID', is_set: true },
|
||||
{ id: 'sentinel_client_secret', env_key: 'SENTINEL_CLIENT_SECRET', is_set: false },
|
||||
]),
|
||||
{ status: 200 },
|
||||
),
|
||||
);
|
||||
globalThis.fetch = fetchMock as unknown as typeof globalThis.fetch;
|
||||
|
||||
const configured = await checkBackendSentinelStatus();
|
||||
expect(configured).toBe(false);
|
||||
});
|
||||
|
||||
it('fails safely (false) when the backend errors', async () => {
|
||||
const fetchMock = vi.fn(async () => { throw new Error('network down'); });
|
||||
globalThis.fetch = fetchMock as unknown as typeof globalThis.fetch;
|
||||
|
||||
const configured = await checkBackendSentinelStatus();
|
||||
expect(configured).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,16 +1,21 @@
|
||||
/**
|
||||
* Issues #218 / #219 / #220 (tg12 external audit):
|
||||
* Issues #218 / #219 / #220 (tg12 external audit) + Round 7a:
|
||||
*
|
||||
* Every browser-direct call to Wikipedia or Wikidata must send the
|
||||
* `Api-User-Agent` header that Wikimedia's UA policy asks for. These
|
||||
* tests pin that requirement on the shared `lib/wikimediaClient`
|
||||
* `Api-User-Agent` header that Wikimedia's UA policy asks for, AND must
|
||||
* embed the per-install operator handle so Wikimedia can rate-limit /
|
||||
* contact the specific operator instead of treating "Shadowbroker" as
|
||||
* one giant entity.
|
||||
*
|
||||
* These tests pin both requirements on the shared `lib/wikimediaClient`
|
||||
* helper that WikiImage, NewsFeed, and useRegionDossier all route
|
||||
* through, so a future refactor that drops the header gets a loud
|
||||
* test failure rather than a silent ToS regression.
|
||||
* through. A future refactor that drops either the header OR the
|
||||
* per-operator handle gets a loud test failure rather than a silent
|
||||
* ToS / privacy regression.
|
||||
*/
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
WIKIMEDIA_API_USER_AGENT,
|
||||
buildWikimediaUserAgent,
|
||||
fetchWikipediaSummary,
|
||||
fetchWikidataSparql,
|
||||
_resetWikimediaClientCacheForTests,
|
||||
@@ -18,6 +23,18 @@ import {
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
// Helper: stub fetch so calls to /api/settings/operator-handle return a
|
||||
// known handle, and everything else proxies to whatever the test set up.
|
||||
function withHandle(handle: string, otherFetch: typeof globalThis.fetch) {
|
||||
return vi.fn(async (input: any, init?: RequestInit) => {
|
||||
const url = String(input);
|
||||
if (url.endsWith('/api/settings/operator-handle')) {
|
||||
return new Response(JSON.stringify({ handle }), { status: 200 });
|
||||
}
|
||||
return otherFetch(input, init);
|
||||
});
|
||||
}
|
||||
|
||||
describe('lib/wikimediaClient', () => {
|
||||
beforeEach(() => {
|
||||
_resetWikimediaClientCacheForTests();
|
||||
@@ -28,16 +45,35 @@ describe('lib/wikimediaClient', () => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('exposes a stable Api-User-Agent identifier with a contact path', () => {
|
||||
expect(WIKIMEDIA_API_USER_AGENT).toContain('Shadowbroker');
|
||||
expect(WIKIMEDIA_API_USER_AGENT.toLowerCase()).toContain('github.com');
|
||||
expect(WIKIMEDIA_API_USER_AGENT.toLowerCase()).toContain('issues');
|
||||
it('builds a stable per-operator Api-User-Agent with contact path', async () => {
|
||||
globalThis.fetch = withHandle(
|
||||
'operator-abc123',
|
||||
vi.fn(async () => new Response('{}', { status: 200 })) as any,
|
||||
) as any;
|
||||
const ua = await buildWikimediaUserAgent('wikipedia-summary');
|
||||
expect(ua).toContain('Shadowbroker');
|
||||
expect(ua.toLowerCase()).toContain('github.com');
|
||||
expect(ua.toLowerCase()).toContain('issues');
|
||||
expect(ua).toContain('operator: operator-abc123');
|
||||
expect(ua).toContain('purpose: wikipedia-summary');
|
||||
});
|
||||
|
||||
it('sends Api-User-Agent on Wikipedia summary fetch', async () => {
|
||||
const calls: Array<{ url: string; init?: RequestInit }> = [];
|
||||
globalThis.fetch = vi.fn(async (url: any, init?: RequestInit) => {
|
||||
calls.push({ url: String(url), init });
|
||||
it('falls back to "operator-offline" when handle endpoint is unreachable', async () => {
|
||||
globalThis.fetch = vi.fn(async (input: any) => {
|
||||
const url = String(input);
|
||||
if (url.endsWith('/api/settings/operator-handle')) {
|
||||
return new Response('forbidden', { status: 403 });
|
||||
}
|
||||
return new Response('{}', { status: 200 });
|
||||
}) as any;
|
||||
const ua = await buildWikimediaUserAgent('test');
|
||||
expect(ua).toContain('operator: operator-offline');
|
||||
});
|
||||
|
||||
it('sends per-operator Api-User-Agent on Wikipedia summary fetch', async () => {
|
||||
const wikiCalls: Array<{ url: string; init?: RequestInit }> = [];
|
||||
const baseFetch = vi.fn(async (url: any, init?: RequestInit) => {
|
||||
wikiCalls.push({ url: String(url), init });
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
@@ -48,44 +84,71 @@ describe('lib/wikimediaClient', () => {
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
});
|
||||
globalThis.fetch = withHandle('operator-test01', baseFetch as any) as any;
|
||||
|
||||
const summary = await fetchWikipediaSummary('Boeing 747');
|
||||
expect(summary?.thumbnail).toBe('https://example.org/thumb.jpg');
|
||||
expect(calls).toHaveLength(1);
|
||||
const headers = (calls[0].init?.headers || {}) as Record<string, string>;
|
||||
expect(headers['Api-User-Agent']).toBe(WIKIMEDIA_API_USER_AGENT);
|
||||
// wikiCalls only captures calls to non-handle URLs.
|
||||
expect(wikiCalls).toHaveLength(1);
|
||||
const headers = (wikiCalls[0].init?.headers || {}) as Record<string, string>;
|
||||
expect(headers['Api-User-Agent']).toContain('operator: operator-test01');
|
||||
expect(headers['Api-User-Agent']).toContain('purpose: wikipedia-summary');
|
||||
});
|
||||
|
||||
it('sends Api-User-Agent on Wikidata SPARQL fetch', async () => {
|
||||
it('sends per-operator Api-User-Agent on Wikidata SPARQL fetch', async () => {
|
||||
const calls: Array<{ url: string; init?: RequestInit }> = [];
|
||||
globalThis.fetch = vi.fn(async (url: any, init?: RequestInit) => {
|
||||
const baseFetch = vi.fn(async (url: any, init?: RequestInit) => {
|
||||
calls.push({ url: String(url), init });
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
results: {
|
||||
bindings: [
|
||||
{
|
||||
leaderLabel: { value: 'Test Leader' },
|
||||
govTypeLabel: { value: 'Test Government' },
|
||||
},
|
||||
],
|
||||
},
|
||||
results: { bindings: [{ leaderLabel: { value: 'Test Leader' } }] },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
});
|
||||
globalThis.fetch = withHandle('operator-sparql', baseFetch as any) as any;
|
||||
|
||||
const bindings = await fetchWikidataSparql('SELECT * WHERE { ?s ?p ?o }');
|
||||
expect(bindings).toHaveLength(1);
|
||||
const headers = (calls[0].init?.headers || {}) as Record<string, string>;
|
||||
expect(headers['Api-User-Agent']).toContain('operator: operator-sparql');
|
||||
expect(headers['Api-User-Agent']).toContain('purpose: wikidata-sparql');
|
||||
expect(headers['Accept']).toBe('application/sparql-results+json');
|
||||
});
|
||||
|
||||
it('handle endpoint is queried only ONCE across many wiki fetches', async () => {
|
||||
let handleCalls = 0;
|
||||
let wikiCalls = 0;
|
||||
globalThis.fetch = vi.fn(async (input: any) => {
|
||||
const url = String(input);
|
||||
if (url.endsWith('/api/settings/operator-handle')) {
|
||||
handleCalls++;
|
||||
return new Response(JSON.stringify({ handle: 'operator-cache' }), { status: 200 });
|
||||
}
|
||||
wikiCalls++;
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
type: 'standard',
|
||||
title: 'X',
|
||||
description: '',
|
||||
extract: '',
|
||||
thumbnail: { source: 'https://example.org/x.jpg' },
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
|
||||
const bindings = await fetchWikidataSparql('SELECT * WHERE { ?s ?p ?o }');
|
||||
expect(bindings).toHaveLength(1);
|
||||
const headers = (calls[0].init?.headers || {}) as Record<string, string>;
|
||||
expect(headers['Api-User-Agent']).toBe(WIKIMEDIA_API_USER_AGENT);
|
||||
expect(headers['Accept']).toBe('application/sparql-results+json');
|
||||
await fetchWikipediaSummary('Eiffel Tower');
|
||||
await fetchWikipediaSummary('Mount Fuji');
|
||||
await fetchWikipediaSummary('Statue of Liberty');
|
||||
expect(handleCalls).toBe(1);
|
||||
expect(wikiCalls).toBe(3);
|
||||
});
|
||||
|
||||
it('shares cache across consecutive callers for the same Wikipedia title', async () => {
|
||||
let fetchCount = 0;
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
const baseFetch = vi.fn(async () => {
|
||||
fetchCount++;
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
@@ -97,7 +160,8 @@ describe('lib/wikimediaClient', () => {
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
});
|
||||
globalThis.fetch = withHandle('operator-cache', baseFetch as any) as any;
|
||||
|
||||
const a = await fetchWikipediaSummary('Eiffel Tower');
|
||||
const b = await fetchWikipediaSummary('Eiffel Tower');
|
||||
@@ -107,7 +171,7 @@ describe('lib/wikimediaClient', () => {
|
||||
|
||||
it('deduplicates concurrent in-flight requests for the same title', async () => {
|
||||
let fetchCount = 0;
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
const baseFetch = vi.fn(async () => {
|
||||
fetchCount++;
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
return new Response(
|
||||
@@ -120,7 +184,8 @@ describe('lib/wikimediaClient', () => {
|
||||
}),
|
||||
{ status: 200 },
|
||||
);
|
||||
}) as any;
|
||||
});
|
||||
globalThis.fetch = withHandle('operator-cache', baseFetch as any) as any;
|
||||
|
||||
const [a, b, c] = await Promise.all([
|
||||
fetchWikipediaSummary('Mount Fuji'),
|
||||
@@ -134,28 +199,37 @@ describe('lib/wikimediaClient', () => {
|
||||
});
|
||||
|
||||
it('returns null on disambiguation pages without throwing', async () => {
|
||||
globalThis.fetch = vi.fn(async () =>
|
||||
new Response(JSON.stringify({ type: 'disambiguation' }), { status: 200 }),
|
||||
globalThis.fetch = withHandle(
|
||||
'operator-cache',
|
||||
vi.fn(async () =>
|
||||
new Response(JSON.stringify({ type: 'disambiguation' }), { status: 200 }),
|
||||
) as any,
|
||||
) as any;
|
||||
const summary = await fetchWikipediaSummary('Mercury');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on HTTP error without throwing', async () => {
|
||||
globalThis.fetch = vi.fn(async () => new Response('not found', { status: 404 })) as any;
|
||||
globalThis.fetch = withHandle(
|
||||
'operator-cache',
|
||||
vi.fn(async () => new Response('not found', { status: 404 })) as any,
|
||||
) as any;
|
||||
const summary = await fetchWikipediaSummary('Nonexistent Article 12345');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on network error without throwing', async () => {
|
||||
globalThis.fetch = vi.fn(async () => {
|
||||
throw new Error('network down');
|
||||
}) as any;
|
||||
globalThis.fetch = withHandle(
|
||||
'operator-cache',
|
||||
vi.fn(async () => {
|
||||
throw new Error('network down');
|
||||
}) as any,
|
||||
) as any;
|
||||
const summary = await fetchWikipediaSummary('Anything');
|
||||
expect(summary).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on empty input', async () => {
|
||||
it('returns null on empty input without fetching anything', async () => {
|
||||
globalThis.fetch = vi.fn(async () => new Response('{}', { status: 200 })) as any;
|
||||
expect(await fetchWikipediaSummary('')).toBeNull();
|
||||
expect(await fetchWikipediaSummary(' ')).toBeNull();
|
||||
|
||||
@@ -50,6 +50,7 @@ import {
|
||||
hasSentinelInfoBeenSeen,
|
||||
markSentinelInfoSeen,
|
||||
hasSentinelCredentials,
|
||||
checkBackendSentinelStatus,
|
||||
} from '@/lib/sentinelHub';
|
||||
import { useTranslation } from '@/i18n';
|
||||
import { LocateBar } from './LocateBar';
|
||||
@@ -107,6 +108,15 @@ export default function Dashboard() {
|
||||
useEffect(() => {
|
||||
localStorage.setItem('sb_ticker_open', tickerOpen.toString());
|
||||
}, [tickerOpen]);
|
||||
|
||||
// Issue #298: kick the one-time backend Sentinel-status check on mount.
|
||||
// This populates the cached value that ``hasSentinelCredentials()`` reads
|
||||
// synchronously elsewhere (MaplibreViewer's tile-URL memo, the
|
||||
// Sentinel-info modal flow). Fire-and-forget — the cache stays false
|
||||
// until resolved so the UI fails safely.
|
||||
useEffect(() => {
|
||||
void checkBackendSentinelStatus();
|
||||
}, []);
|
||||
const [settingsOpen, setSettingsOpen] = useState(false);
|
||||
const [legendOpen, setLegendOpen] = useState(false);
|
||||
const [shortcutsOpen, setShortcutsOpen] = useState(false);
|
||||
|
||||
@@ -357,8 +357,15 @@ function ConnectModalBody({ apiEndpoint, handleCopy, copied }: ConnectModalBodyP
|
||||
const [riskAccepted, setRiskAccepted] = React.useState(false);
|
||||
const [accessTier, setAccessTier] = React.useState<'restricted' | 'full'>('restricted');
|
||||
const [connectionMode, setConnectionMode] = React.useState<'local' | 'remote'>('local');
|
||||
// hmacSecret holds the FULL secret once the operator has clicked
|
||||
// Reveal (or after a regenerate). maskedHmacSecret is the safe-to-show
|
||||
// fingerprint returned by GET /api/ai/connect-info and is loaded on
|
||||
// mount. The two are independent state slots so a stale full secret
|
||||
// can never leak back into the UI after a regenerate.
|
||||
const [hmacSecret, setHmacSecret] = React.useState('');
|
||||
const [maskedHmacSecret, setMaskedHmacSecret] = React.useState('');
|
||||
const [hmacLoading, setHmacLoading] = React.useState(false);
|
||||
const [revealing, setRevealing] = React.useState(false);
|
||||
const [tierSaving, setTierSaving] = React.useState(false);
|
||||
const [showAdvanced, setShowAdvanced] = React.useState(false);
|
||||
const [showResetConfirm, setShowResetConfirm] = React.useState(false);
|
||||
@@ -381,16 +388,40 @@ function ConnectModalBody({ apiEndpoint, handleCopy, copied }: ConnectModalBodyP
|
||||
const [torError, setTorError] = React.useState('');
|
||||
const [torOnion, setTorOnion] = React.useState('');
|
||||
|
||||
// Fetch connect-info + node status on mount
|
||||
// Issue #302 (tg12): the full HMAC secret no longer travels through
|
||||
// GET /api/ai/connect-info on every modal open. The flow is now:
|
||||
//
|
||||
// 1. GET /api/ai/connect-info — always returns the masked fingerprint
|
||||
// (first6 + bullets + last4). `hmacSecret` stays empty until the
|
||||
// operator clicks the Reveal (eye) button below.
|
||||
// 2. POST /api/ai/connect-info/bootstrap — fires once on mount if the
|
||||
// backend reports `hmac_secret_set: false`. Idempotent and never
|
||||
// returns the secret in the response.
|
||||
// 3. POST /api/ai/connect-info/reveal — fires when the operator clicks
|
||||
// Reveal or Copy without the secret yet loaded. Returns the full
|
||||
// secret with strict `Cache-Control: no-store` so it doesn't land
|
||||
// in browser caches or HAR exports.
|
||||
React.useEffect(() => {
|
||||
(async () => {
|
||||
try {
|
||||
setHmacLoading(true);
|
||||
const res = await fetch(`${API_BASE}/api/ai/connect-info?reveal=true`);
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
setHmacSecret(data.hmac_secret || '');
|
||||
setAccessTier(data.access_tier === 'full' ? 'full' : 'restricted');
|
||||
const res = await fetch(`${API_BASE}/api/ai/connect-info`);
|
||||
if (!res.ok) return;
|
||||
const data = await res.json();
|
||||
setMaskedHmacSecret(data.masked_hmac_secret || '');
|
||||
setAccessTier(data.access_tier === 'full' ? 'full' : 'restricted');
|
||||
|
||||
// Transparent first-use bootstrap. Mirrors the pre-#302 UX of
|
||||
// "open modal → secret exists" without the GET side-effect.
|
||||
if (!data.hmac_secret_set) {
|
||||
const bootRes = await fetch(
|
||||
`${API_BASE}/api/ai/connect-info/bootstrap`,
|
||||
{ method: 'POST' },
|
||||
);
|
||||
if (bootRes.ok) {
|
||||
const bootData = await bootRes.json();
|
||||
setMaskedHmacSecret(bootData.masked_hmac_secret || '');
|
||||
}
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
finally { setHmacLoading(false); }
|
||||
@@ -477,8 +508,17 @@ function ConnectModalBody({ apiEndpoint, handleCopy, copied }: ConnectModalBodyP
|
||||
const res = await fetch(`${API_BASE}/api/settings/agent/reset-all`, { method: 'POST' });
|
||||
const data = await res.json();
|
||||
if (data.ok) {
|
||||
// Update local state with new credentials
|
||||
if (data.new_hmac_secret) setHmacSecret(data.new_hmac_secret);
|
||||
// Update local state with new credentials. reset-all returns
|
||||
// the new HMAC secret in-band (same one-time-disclosure rule
|
||||
// as /regenerate — a deliberate destructive action). Refresh
|
||||
// both slots so the masked display stays in sync.
|
||||
if (data.new_hmac_secret) {
|
||||
setHmacSecret(data.new_hmac_secret);
|
||||
const s = String(data.new_hmac_secret);
|
||||
setMaskedHmacSecret(
|
||||
s.length > 10 ? s.slice(0, 6) + '•'.repeat(8) + s.slice(-4) : '•'.repeat(16),
|
||||
);
|
||||
}
|
||||
if (data.new_onion) {
|
||||
setTorOnion(data.new_onion);
|
||||
setRemoteUrl(data.new_onion);
|
||||
@@ -502,13 +542,41 @@ function ConnectModalBody({ apiEndpoint, handleCopy, copied }: ConnectModalBodyP
|
||||
finally { setTierSaving(false); }
|
||||
};
|
||||
|
||||
// Issue #302: POST /reveal returns the full secret with strict
|
||||
// no-store headers. Lazily fetched — never on mount. Returns the
|
||||
// secret string so callers can copy it immediately without waiting
|
||||
// for React state propagation.
|
||||
const revealHmacSecret = async (): Promise<string> => {
|
||||
if (hmacSecret) return hmacSecret;
|
||||
setRevealing(true);
|
||||
try {
|
||||
const res = await fetch(`${API_BASE}/api/ai/connect-info/reveal`, {
|
||||
method: 'POST',
|
||||
});
|
||||
if (!res.ok) return '';
|
||||
const data = await res.json();
|
||||
const secret = String(data.hmac_secret || '');
|
||||
setHmacSecret(secret);
|
||||
return secret;
|
||||
} catch {
|
||||
return '';
|
||||
} finally {
|
||||
setRevealing(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleRegenerate = async () => {
|
||||
setRegenerating(true);
|
||||
try {
|
||||
const res = await fetch(`${API_BASE}/api/ai/connect-info/regenerate`, { method: 'POST' });
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
// Regenerate is a deliberate destructive action — operator needs
|
||||
// to see the new secret once to update their OpenClaw config.
|
||||
// Both the full and masked forms refresh in one shot.
|
||||
setHmacSecret(data.hmac_secret || '');
|
||||
setMaskedHmacSecret(data.masked_hmac_secret || '');
|
||||
setShowSecret(true);
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
finally { setRegenerating(false); }
|
||||
@@ -543,9 +611,17 @@ function ConnectModalBody({ apiEndpoint, handleCopy, copied }: ConnectModalBodyP
|
||||
finally { setNodeToggling(false); }
|
||||
};
|
||||
|
||||
const maskedSecret = hmacSecret
|
||||
? hmacSecret.slice(0, 6) + '\u2022'.repeat(8) + hmacSecret.slice(-4)
|
||||
: '\u2022'.repeat(16);
|
||||
// Issue #302: prefer the server-supplied fingerprint
|
||||
// (maskedHmacSecret) \u2014 it's filled on mount via the (no-secret) GET.
|
||||
// If the operator has clicked Reveal, fall through to deriving the
|
||||
// mask from the in-memory full secret so we keep the same shape
|
||||
// (first6 + bullets + last4) regardless of source. Final fallback
|
||||
// (no secret loaded yet) is a generic bullet string.
|
||||
const maskedSecret =
|
||||
maskedHmacSecret ||
|
||||
(hmacSecret
|
||||
? hmacSecret.slice(0, 6) + '\u2022'.repeat(8) + hmacSecret.slice(-4)
|
||||
: '\u2022'.repeat(16));
|
||||
|
||||
// Resolve the endpoint URL
|
||||
const resolvedUrl = connectionMode === 'local'
|
||||
@@ -672,10 +748,15 @@ function ConnectModalBody({ apiEndpoint, handleCopy, copied }: ConnectModalBodyP
|
||||
return lines.join('\n');
|
||||
};
|
||||
const displaySnippet = buildSnippet(maskedSecret);
|
||||
const copySnippet = buildSnippet(hmacSecret);
|
||||
|
||||
const handleCopySnippet = () => {
|
||||
navigator.clipboard.writeText(copySnippet);
|
||||
// Issue #302: the copy snippet needs the FULL secret. Pre-#302 we kept
|
||||
// it in memory from the GET-with-reveal load; now we lazy-fetch via
|
||||
// POST /reveal only when the operator actually clicks Copy. If they
|
||||
// already revealed, the in-memory value is reused (no extra request).
|
||||
const handleCopySnippet = async () => {
|
||||
const secret = hmacSecret || (await revealHmacSecret());
|
||||
if (!secret) return;
|
||||
navigator.clipboard.writeText(buildSnippet(secret));
|
||||
setSnippetCopied(true);
|
||||
setTimeout(() => setSnippetCopied(false), 2000);
|
||||
};
|
||||
@@ -913,18 +994,38 @@ function ConnectModalBody({ apiEndpoint, handleCopy, copied }: ConnectModalBodyP
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<code className="flex-1 bg-black/60 border border-violet-800/40 px-3 py-2 text-xs font-mono text-violet-300 overflow-hidden text-ellipsis">
|
||||
{showSecret ? hmacSecret : maskedSecret}
|
||||
{/* Issue #302: when the operator hasn't clicked
|
||||
Reveal yet, hmacSecret is empty and we fall
|
||||
back to maskedHmacSecret (the safe fingerprint
|
||||
returned by GET /api/ai/connect-info). */}
|
||||
{showSecret && hmacSecret ? hmacSecret : (maskedHmacSecret || maskedSecret)}
|
||||
</code>
|
||||
<button
|
||||
onClick={() => setShowSecret(!showSecret)}
|
||||
className="p-2 bg-violet-600/20 border border-violet-500/40 text-violet-400 hover:bg-violet-600/40 transition-colors shrink-0"
|
||||
onClick={async () => {
|
||||
if (showSecret) {
|
||||
setShowSecret(false);
|
||||
return;
|
||||
}
|
||||
// Need the full secret in state before showing it.
|
||||
const secret = await revealHmacSecret();
|
||||
if (secret) setShowSecret(true);
|
||||
}}
|
||||
disabled={revealing}
|
||||
className="p-2 bg-violet-600/20 border border-violet-500/40 text-violet-400 hover:bg-violet-600/40 transition-colors shrink-0 disabled:opacity-50"
|
||||
title={showSecret ? 'Hide' : 'Reveal'}
|
||||
>
|
||||
{showSecret ? <EyeOff size={14} /> : <Eye size={14} />}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleCopy(hmacSecret)}
|
||||
className="p-2 bg-violet-600/20 border border-violet-500/40 text-violet-400 hover:bg-violet-600/40 transition-colors shrink-0"
|
||||
onClick={async () => {
|
||||
// Copy needs the full secret. Fetch it lazily if
|
||||
// the operator hasn't clicked Reveal yet — no
|
||||
// point making them reveal first just to copy.
|
||||
const secret = hmacSecret || (await revealHmacSecret());
|
||||
if (secret) handleCopy(secret);
|
||||
}}
|
||||
disabled={revealing}
|
||||
className="p-2 bg-violet-600/20 border border-violet-500/40 text-violet-400 hover:bg-violet-600/40 transition-colors shrink-0 disabled:opacity-50"
|
||||
title="Copy key"
|
||||
>
|
||||
{copied ? <Check size={14} /> : <Copy size={14} />}
|
||||
|
||||
@@ -140,17 +140,51 @@ const OnboardingModal = React.memo(function OnboardingModal({
|
||||
].join('\n');
|
||||
const remoteAgentNeedsTor = agentMode === 'remote' && !torAddress;
|
||||
|
||||
// Issue #302 (tg12): the full HMAC secret no longer comes back from
|
||||
// GET /api/ai/connect-info. We fetch metadata + the masked fingerprint
|
||||
// first; if the operator has explicitly asked to see the key (the
|
||||
// ``reveal`` flag), we follow up with POST /api/ai/connect-info/reveal
|
||||
// (after a transparent POST /bootstrap if the secret hasn't been
|
||||
// minted yet) which carries the secret with strict no-store headers.
|
||||
const fetchAgentConnectInfo = async (reveal = true) => {
|
||||
setAgentLoading(true);
|
||||
setAgentMsg(null);
|
||||
try {
|
||||
const res = await fetch(`/api/ai/connect-info?reveal=${reveal ? 'true' : 'false'}`);
|
||||
const data = await res.json().catch(() => ({}));
|
||||
if (!res.ok || data?.ok === false) {
|
||||
throw new Error(data?.detail || 'Could not prepare agent credentials.');
|
||||
// 1) GET metadata + masked fingerprint.
|
||||
const metaRes = await fetch('/api/ai/connect-info');
|
||||
const metaData = await metaRes.json().catch(() => ({}));
|
||||
if (!metaRes.ok || metaData?.ok === false) {
|
||||
throw new Error(metaData?.detail || 'Could not prepare agent credentials.');
|
||||
}
|
||||
setAgentTier(metaData.access_tier === 'full' ? 'full' : 'restricted');
|
||||
|
||||
// 2) Mint the secret if it isn't set yet — transparent, idempotent.
|
||||
let secretSet = !!metaData.hmac_secret_set;
|
||||
if (!secretSet) {
|
||||
const bootRes = await fetch('/api/ai/connect-info/bootstrap', {
|
||||
method: 'POST',
|
||||
});
|
||||
const bootData = await bootRes.json().catch(() => ({}));
|
||||
if (!bootRes.ok || bootData?.ok === false) {
|
||||
throw new Error(bootData?.detail || 'Could not generate agent credentials.');
|
||||
}
|
||||
secretSet = !!bootData.hmac_secret_set;
|
||||
}
|
||||
|
||||
// 3) If the caller asked to see the secret, fetch it explicitly.
|
||||
// Otherwise the masked fingerprint is enough for the UI.
|
||||
if (reveal && secretSet) {
|
||||
const revealRes = await fetch('/api/ai/connect-info/reveal', {
|
||||
method: 'POST',
|
||||
});
|
||||
const revealData = await revealRes.json().catch(() => ({}));
|
||||
if (!revealRes.ok || revealData?.ok === false) {
|
||||
throw new Error(revealData?.detail || 'Could not reveal agent credentials.');
|
||||
}
|
||||
setAgentSecret(revealData.hmac_secret || '');
|
||||
} else {
|
||||
setAgentSecret(metaData.masked_hmac_secret || '');
|
||||
}
|
||||
setAgentSecret(data.hmac_secret || '');
|
||||
setAgentTier(data.access_tier === 'full' ? 'full' : 'restricted');
|
||||
setAgentMsg({ type: 'ok', text: 'Agent key is ready. Copy it into your local or remote agent runtime.' });
|
||||
} catch (error) {
|
||||
setAgentMsg({
|
||||
|
||||
@@ -74,17 +74,18 @@ import {
|
||||
Trash2,
|
||||
RotateCcw,
|
||||
Satellite,
|
||||
Eye,
|
||||
EyeOff,
|
||||
Copy,
|
||||
Check,
|
||||
Radar,
|
||||
} from 'lucide-react';
|
||||
import {
|
||||
clearSentinelCredentials,
|
||||
getSentinelCredentialStorageMode,
|
||||
getSentinelCredentials,
|
||||
setSentinelCredentials,
|
||||
// Issue #298: Sentinel credentials now live server-side. The legacy
|
||||
// browser-storage helpers (getSentinelCredentials / setSentinelCredentials
|
||||
// / clearSentinelCredentials / getSentinelCredentialStorageMode) have
|
||||
// been removed from sentinelHub.ts. We use the new status check + the
|
||||
// one-time migration helper instead.
|
||||
checkBackendSentinelStatus,
|
||||
migrateLegacySentinelBrowserKeys,
|
||||
} from '@/lib/sentinelHub';
|
||||
import {
|
||||
getPrivacyProfilePreference,
|
||||
@@ -143,10 +144,14 @@ const WEIGHT_COLORS: Record<number, string> = {
|
||||
const SETTINGS_FOCUS_KEY = 'sb_settings_focus';
|
||||
const WORMHOLE_RETURN_KEY = 'sb_wormhole_return_target';
|
||||
const WORMHOLE_READY_EVENT = 'sb:wormhole-ready';
|
||||
// Issue #298 (tg12): Sentinel credentials moved from browser storage to
|
||||
// the backend ``.env`` (managed through the API Keys panel). The legacy
|
||||
// keys (``sb_sentinel_client_id`` / ``sb_sentinel_client_secret`` /
|
||||
// ``sb_sentinel_instance_id``) are no longer treated as sensitive
|
||||
// browser state because they are no longer written. ``SentinelTab``
|
||||
// runs ``migrateLegacySentinelBrowserKeys()`` once on mount to clear
|
||||
// any leftover values from pre-#298 installs.
|
||||
const PRIVACY_SENSITIVE_BROWSER_KEYS = [
|
||||
'sb_sentinel_client_id',
|
||||
'sb_sentinel_client_secret',
|
||||
'sb_sentinel_instance_id',
|
||||
'sb_infonet_head',
|
||||
'sb_infonet_head_history',
|
||||
'sb_infonet_peers',
|
||||
@@ -2615,7 +2620,9 @@ const SettingsPanel = React.memo(function SettingsPanel({
|
||||
)}
|
||||
|
||||
{/* ==================== SENTINEL HUB TAB ==================== */}
|
||||
{activeTab === 'sentinel' && <SentinelTab />}
|
||||
{activeTab === 'sentinel' && (
|
||||
<SentinelTab onGoToApiKeys={() => setActiveTab('api-keys')} />
|
||||
)}
|
||||
{activeTab === 'sar' && <SarSettingsTab />}
|
||||
</motion.div>
|
||||
</>
|
||||
@@ -2625,63 +2632,58 @@ const SettingsPanel = React.memo(function SettingsPanel({
|
||||
});
|
||||
|
||||
// ─── Sentinel Hub Settings Tab ─────────────────────────────────────────────
|
||||
function SentinelTab() {
|
||||
const [clientId, setClientId] = useState(() => getSentinelCredentials().clientId);
|
||||
const [clientSecret, setClientSecret] = useState(() => getSentinelCredentials().clientSecret);
|
||||
const [testing, setTesting] = useState(false);
|
||||
const [status, setStatus] = useState<{ ok: boolean; msg: string } | null>(null);
|
||||
const [dirty, setDirty] = useState(false);
|
||||
const [showSecret, setShowSecret] = useState(false);
|
||||
const storageMode = getSentinelCredentialStorageMode();
|
||||
// Issue #298 (tg12): Sentinel credentials now live in the backend ``.env``
|
||||
// and are managed through the existing API Keys panel — same flow as every
|
||||
// other third-party API key (OpenSky, AIS Stream, Finnhub, …). This tab no
|
||||
// longer collects credentials. It does three things:
|
||||
// 1. Runs migrateLegacySentinelBrowserKeys() once to wipe pre-#298
|
||||
// values out of localStorage / sessionStorage.
|
||||
// 2. Shows the operator whether the backend has the credentials.
|
||||
// 3. Offers a one-click jump to the API Keys panel where they enter them.
|
||||
function SentinelTab({ onGoToApiKeys }: { onGoToApiKeys: () => void }) {
|
||||
const [backendConfigured, setBackendConfigured] = useState<boolean | null>(null);
|
||||
const [migrationResult, setMigrationResult] = useState<{ cleared: string[] } | null>(null);
|
||||
const [refreshing, setRefreshing] = useState(false);
|
||||
|
||||
const save = () => {
|
||||
setSentinelCredentials(clientId.trim(), clientSecret.trim());
|
||||
setDirty(false);
|
||||
setStatus({
|
||||
ok: true,
|
||||
msg: `Credentials saved to browser ${storageMode === 'session' ? 'session' : 'local'} storage.`,
|
||||
});
|
||||
};
|
||||
useEffect(() => {
|
||||
// One-time legacy browser-key wipe. Idempotent — does nothing on a
|
||||
// fresh install. We do NOT silently POST any browser-stored values
|
||||
// to the backend; operators who relied on them re-enter once in the
|
||||
// API Keys panel. Doing the wipe regardless ensures pre-#298 secrets
|
||||
// don't linger in localStorage indefinitely.
|
||||
setMigrationResult(migrateLegacySentinelBrowserKeys());
|
||||
|
||||
const testConnection = async () => {
|
||||
setTesting(true);
|
||||
setStatus(null);
|
||||
// Check whether the backend has SENTINEL_CLIENT_ID/SECRET set.
|
||||
void checkBackendSentinelStatus().then(setBackendConfigured);
|
||||
}, []);
|
||||
|
||||
const refresh = async () => {
|
||||
setRefreshing(true);
|
||||
try {
|
||||
const resp = await fetch(`${API_BASE}/api/sentinel/token`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||||
body: new URLSearchParams({
|
||||
client_id: clientId.trim(),
|
||||
client_secret: clientSecret.trim(),
|
||||
}),
|
||||
});
|
||||
if (resp.ok) {
|
||||
setStatus({ ok: true, msg: 'Connected — token acquired successfully.' });
|
||||
} else {
|
||||
const text = await resp.text().catch(() => '');
|
||||
setStatus({ ok: false, msg: `Auth failed (${resp.status}): ${text.slice(0, 120)}` });
|
||||
}
|
||||
} catch (err) {
|
||||
const msg =
|
||||
typeof err === 'object' && err !== null && 'message' in err
|
||||
? String((err as { message?: string }).message)
|
||||
: 'unknown';
|
||||
setStatus({ ok: false, msg: `Network error: ${msg}` });
|
||||
// refreshSentinelStatus() invalidates the module-level cache so the
|
||||
// next check actually hits the backend instead of returning the
|
||||
// memoized value. Lazy-imported so SSR/tests don't choke.
|
||||
const { refreshSentinelStatus } = await import('@/lib/sentinelHub');
|
||||
refreshSentinelStatus();
|
||||
const ok = await checkBackendSentinelStatus();
|
||||
setBackendConfigured(ok);
|
||||
} finally {
|
||||
setTesting(false);
|
||||
setRefreshing(false);
|
||||
}
|
||||
};
|
||||
|
||||
const clear = () => {
|
||||
clearSentinelCredentials();
|
||||
setClientId('');
|
||||
setClientSecret('');
|
||||
setDirty(false);
|
||||
setStatus({ ok: true, msg: 'Credentials cleared.' });
|
||||
};
|
||||
|
||||
const inputCls =
|
||||
'w-full bg-[var(--bg-primary)]/60 border border-[var(--border-primary)] px-3 py-2 text-[11px] font-mono text-[var(--text-secondary)] outline-none focus:border-purple-500 placeholder:text-[var(--text-muted)]/50 transition-colors';
|
||||
const statusColor =
|
||||
backendConfigured === null
|
||||
? 'text-[var(--text-muted)]'
|
||||
: backendConfigured
|
||||
? 'text-green-400'
|
||||
: 'text-yellow-400';
|
||||
const statusLabel =
|
||||
backendConfigured === null
|
||||
? 'CHECKING…'
|
||||
: backendConfigured
|
||||
? 'CONFIGURED ON BACKEND'
|
||||
: 'NOT CONFIGURED';
|
||||
|
||||
return (
|
||||
<div className="flex-1 flex flex-col overflow-y-auto styled-scrollbar">
|
||||
@@ -2733,106 +2735,73 @@ function SentinelTab() {
|
||||
</p>
|
||||
<p>
|
||||
<span className="text-purple-400 font-bold">STEP 3:</span>{' '}
|
||||
Paste both values in the fields below, hit{' '}
|
||||
<span className="text-cyan-400">SAVE</span>, then{' '}
|
||||
<span className="text-cyan-400">TEST CONNECTION</span> to verify.
|
||||
That's it!
|
||||
Paste both values into the <span className="text-cyan-400">API Keys</span> panel
|
||||
under <span className="text-white">SENTINEL_CLIENT_ID</span> and{' '}
|
||||
<span className="text-white">SENTINEL_CLIENT_SECRET</span>, then hit Save.
|
||||
The backend uses them to mint short-lived tokens — your browser never sees
|
||||
the secret again.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Credential Inputs */}
|
||||
<div className="p-4 space-y-3">
|
||||
<div>
|
||||
<label className="text-[13px] font-mono text-[var(--text-muted)] tracking-widest mb-1 block">
|
||||
CLIENT ID
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
value={clientId}
|
||||
onChange={(e) => {
|
||||
setClientId(e.target.value);
|
||||
setDirty(true);
|
||||
}}
|
||||
placeholder="sh-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
|
||||
spellCheck={false}
|
||||
autoComplete="off"
|
||||
className={inputCls}
|
||||
/>
|
||||
{/* Backend status */}
|
||||
<div className="mx-4 mt-3 p-3 border border-[var(--border-primary)] bg-[var(--bg-primary)]/30">
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<span className="text-[13px] font-mono text-[var(--text-muted)] tracking-widest">
|
||||
BACKEND STATUS
|
||||
</span>
|
||||
<span className={`text-[11px] font-mono font-bold ${statusColor}`}>
|
||||
{statusLabel}
|
||||
</span>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-[13px] font-mono text-[var(--text-muted)] tracking-widest mb-1 block">
|
||||
CLIENT SECRET
|
||||
</label>
|
||||
<input
|
||||
type={showSecret ? 'text' : 'password'}
|
||||
value={clientSecret}
|
||||
onChange={(e) => {
|
||||
setClientSecret(e.target.value);
|
||||
setDirty(true);
|
||||
}}
|
||||
placeholder="Paste client secret here..."
|
||||
spellCheck={false}
|
||||
autoComplete="new-password"
|
||||
className={inputCls}
|
||||
/>
|
||||
<p className="text-[13px] text-[var(--text-muted)] font-mono leading-relaxed">
|
||||
{backendConfigured === false
|
||||
? 'Sentinel credentials are not yet set in the backend .env. Open the API Keys panel to enter them — the tile overlay and Sentinel-2 Intel Card will work as soon as both fields are saved.'
|
||||
: backendConfigured === true
|
||||
? 'Sentinel credentials are configured on the backend. The dashboard fetches tokens automatically; your browser does not handle the secret.'
|
||||
: 'Checking backend configuration…'}
|
||||
</p>
|
||||
<div className="mt-3 flex items-center gap-2">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowSecret((current) => !current)}
|
||||
className="mt-2 inline-flex items-center gap-1.5 text-[13px] font-mono text-[var(--text-muted)] hover:text-[var(--text-secondary)] transition-colors"
|
||||
onClick={onGoToApiKeys}
|
||||
className="flex-1 px-4 py-2 bg-purple-500/20 border border-purple-500/40 text-purple-400 hover:bg-purple-500/30 transition-colors text-sm font-mono flex items-center justify-center gap-1.5"
|
||||
>
|
||||
{showSecret ? <EyeOff size={10} /> : <Eye size={10} />}
|
||||
{showSecret ? 'HIDE SECRET' : 'SHOW SECRET'}
|
||||
OPEN API KEYS PANEL
|
||||
</button>
|
||||
<button
|
||||
onClick={refresh}
|
||||
disabled={refreshing}
|
||||
className="px-3 py-2 border border-[var(--border-primary)] text-[var(--text-muted)] hover:text-cyan-400 hover:border-cyan-500/50 transition-all text-sm font-mono disabled:opacity-40"
|
||||
title="Re-check backend status"
|
||||
>
|
||||
{refreshing ? 'CHECKING…' : 'REFRESH'}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Status */}
|
||||
{status && (
|
||||
<div
|
||||
className={`mx-4 mb-2 px-3 py-2 text-sm font-mono ${status.ok ? 'text-green-400 bg-green-950/20 border border-green-900/30' : 'text-red-400 bg-red-950/20 border border-red-900/30'}`}
|
||||
>
|
||||
{status.msg}
|
||||
{/* Migration notice (only if we actually cleared anything) */}
|
||||
{migrationResult && migrationResult.cleared.length > 0 && (
|
||||
<div className="mx-4 mt-3 px-3 py-2 text-sm font-mono text-cyan-400 bg-cyan-950/20 border border-cyan-900/30">
|
||||
<p className="font-bold mb-1">LEGACY BROWSER CREDENTIALS CLEARED</p>
|
||||
<p className="text-[13px] leading-relaxed text-[var(--text-muted)]">
|
||||
Found and removed pre-#298 Sentinel credentials from browser storage
|
||||
({migrationResult.cleared.join(', ')}). Re-enter them in the API Keys panel
|
||||
above; they'll be stored server-side from now on and never sent back to
|
||||
the browser.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Actions */}
|
||||
{/* Footer + Usage Meter */}
|
||||
<div className="p-4 border-t border-[var(--border-primary)]/80 mt-auto">
|
||||
<div className="flex items-center gap-2">
|
||||
<button
|
||||
onClick={save}
|
||||
disabled={!dirty}
|
||||
className="flex-1 px-4 py-2 bg-purple-500/20 border border-purple-500/40 text-purple-400 hover:bg-purple-500/30 transition-colors text-sm font-mono flex items-center justify-center gap-1.5 disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
>
|
||||
<Save size={10} />
|
||||
SAVE
|
||||
</button>
|
||||
<button
|
||||
onClick={testConnection}
|
||||
disabled={testing || !clientId || !clientSecret}
|
||||
className="flex-1 px-4 py-2 bg-cyan-500/20 border border-cyan-500/40 text-cyan-400 hover:bg-cyan-500/30 transition-colors text-sm font-mono flex items-center justify-center gap-1.5 disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
>
|
||||
{testing ? 'TESTING...' : 'TEST CONNECTION'}
|
||||
</button>
|
||||
<button
|
||||
onClick={clear}
|
||||
className="px-3 py-2 border border-[var(--border-primary)] text-[var(--text-muted)] hover:text-red-400 hover:border-red-500/50 hover:bg-red-950/10 transition-all text-sm font-mono flex items-center gap-1.5"
|
||||
title="Clear credentials"
|
||||
>
|
||||
<Trash2 size={10} />
|
||||
</button>
|
||||
</div>
|
||||
{/* Usage Meter */}
|
||||
<UsageMeter />
|
||||
|
||||
<div className="mt-2 p-2 border border-[var(--border-primary)]/40 bg-[var(--bg-primary)]/30">
|
||||
<p className="text-[13px] text-[var(--text-muted)] font-mono leading-relaxed">
|
||||
Credentials stay in browser-only storage and never touch ShadowBroker servers.
|
||||
{storageMode === 'session'
|
||||
? ' Current privacy mode keeps them in session storage only.'
|
||||
: ' Current privacy mode keeps them in local storage for persistence.'}
|
||||
Credentials are stored in the backend <span className="text-cyan-400">.env</span>{' '}
|
||||
and never sent to the browser. The tile proxy mints short-lived OAuth tokens
|
||||
on demand using those values.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -859,7 +859,7 @@ export default function TopRightControls({
|
||||
}>
|
||||
{activatingPhase === 'done'
|
||||
? (syncOutcomeRaw === 'solo'
|
||||
? `${t('node.soloReady')} — ${nodeStatus?.total_events ?? 0} ${t('node.events')}`
|
||||
? `${t('node.soloNodeReady')} — ${nodeStatus?.total_events ?? 0} ${t('node.events')}`
|
||||
: `${t('node.synced')} — ${nodeStatus?.total_events ?? 0} ${t('node.events')}`)
|
||||
: activatingPhase === 'sync'
|
||||
? `${t('node.syncingChain')}${(nodeStatus?.total_events ?? 0) > 0 ? ` ${nodeStatus?.total_events} ${t('node.events')}` : ''}`
|
||||
@@ -1013,8 +1013,8 @@ export default function TopRightControls({
|
||||
: t('terminal.terminalDetail')}
|
||||
<div className="mt-2 text-[12px] text-cyan-200/70 normal-case tracking-normal">
|
||||
{terminalPrivateReady
|
||||
? t('terminal.enterTerminalDetail')
|
||||
: t('terminal.terminalDetailMore')}
|
||||
? t('terminal.identityReady')
|
||||
: t('terminal.identityNotReady')}
|
||||
</div>
|
||||
</div>
|
||||
{terminalLaunchError && (
|
||||
@@ -1025,15 +1025,15 @@ export default function TopRightControls({
|
||||
<div className="border border-cyan-500/20 bg-black/30 px-4 py-4 text-[12px] font-mono text-slate-200 leading-[1.85]">
|
||||
<div className="text-cyan-300 tracking-[0.18em]">{t('terminal.beforeYouEnter')}</div>
|
||||
<ul className="mt-3 space-y-2 list-disc pl-5">
|
||||
<li>{t('terminal.term1')}</li>
|
||||
<li>{t('terminal.term2')}</li>
|
||||
<li>{t('terminal.term3')}</li>
|
||||
<li>{t('terminal.termTerminal1')}</li>
|
||||
<li>{t('terminal.termTerminal2')}</li>
|
||||
<li>{t('terminal.termTerminal3')}</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div className="border border-amber-500/20 bg-amber-950/10 px-4 py-3 text-[12px] font-mono text-amber-200/80 leading-[1.85]">
|
||||
<div className="text-amber-300 tracking-[0.18em]">{t('terminal.wormholeCleanup')}</div>
|
||||
<div className="mt-2">
|
||||
{t('terminal.wormholeCleanupDetail')}
|
||||
{t('terminal.cleanupDetail')}
|
||||
</div>
|
||||
</div>
|
||||
<div className="grid grid-cols-1 gap-3 sm:grid-cols-3">
|
||||
|
||||
@@ -8,6 +8,7 @@ import {
|
||||
normalizeViewBounds,
|
||||
type ViewBounds,
|
||||
} from '@/lib/viewportPrivacy';
|
||||
import { setLiveDataBounds } from '@/lib/liveDataViewport';
|
||||
|
||||
const VIEWPORT_POST_DEBOUNCE_MS = 2500;
|
||||
const VIEWPORT_POST_MIN_INTERVAL_MS = 12000;
|
||||
@@ -70,6 +71,17 @@ export function useViewportBounds(
|
||||
window.dispatchEvent(new CustomEvent(VIEWPORT_COMMITTED_EVENT));
|
||||
}
|
||||
|
||||
// Issue #288: hand the same coarsened/expanded bounds to the live-data
|
||||
// poller so heavy collections in /api/live-data/{fast,slow} can be
|
||||
// scoped to the visible region. Static reference layers are unaffected
|
||||
// — see backend _FAST_BBOX_HEAVY_KEYS / _SLOW_BBOX_HEAVY_KEYS.
|
||||
setLiveDataBounds({
|
||||
south: preloadBounds.south,
|
||||
west: preloadBounds.west,
|
||||
north: preloadBounds.north,
|
||||
east: preloadBounds.east,
|
||||
});
|
||||
|
||||
// Debounce POSTing viewport bounds to backend for dynamic AIS stream filtering
|
||||
if (debounceTimerRef.current) clearTimeout(debounceTimerRef.current);
|
||||
debounceTimerRef.current = setTimeout(() => {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { useEffect, useRef } from "react";
|
||||
import { API_BASE } from "@/lib/api";
|
||||
import { mergeData, setBackendStatus as setStoreBackendStatus } from "./useDataStore";
|
||||
import { appendLiveDataBoundsParams } from "@/lib/liveDataViewport";
|
||||
|
||||
export type BackendStatus = 'connecting' | 'connected' | 'disconnected';
|
||||
|
||||
@@ -32,8 +33,8 @@ export async function forceRefreshLiveData(): Promise<void> {
|
||||
|
||||
try {
|
||||
const [fastRes, slowRes] = await Promise.all([
|
||||
fetch(`${API_BASE}/api/live-data/fast`),
|
||||
fetch(`${API_BASE}/api/live-data/slow`),
|
||||
fetch(appendLiveDataBoundsParams(`${API_BASE}/api/live-data/fast`)),
|
||||
fetch(appendLiveDataBoundsParams(`${API_BASE}/api/live-data/slow`)),
|
||||
]);
|
||||
|
||||
if (fastRes.ok) {
|
||||
@@ -85,9 +86,13 @@ export const LAYER_TOGGLE_EVENT = 'sb:layer-toggle';
|
||||
/**
|
||||
* Polls the backend for fast and slow data tiers.
|
||||
*
|
||||
* All data is fetched globally (no bbox filtering) — the backend returns its
|
||||
* full in-memory cache and MapLibre culls off-screen entities on the GPU.
|
||||
* This eliminates the "empty map when zooming out" lag.
|
||||
* Issue #288: heavy, density-driven layers (vessels, aircraft, gdelt
|
||||
* events, fires, sigint, …) are bbox-scoped to the visible map area via
|
||||
* `appendLiveDataBoundsParams`. Static reference layers (datacenters,
|
||||
* military bases, power plants, satellites, weather, news, …) are NOT
|
||||
* filtered backend-side, so panning never reveals an "empty world" of
|
||||
* infrastructure. World-zoomed views skip bbox params entirely and hit
|
||||
* the shared ETag cache exactly like the pre-#288 behaviour.
|
||||
*
|
||||
* The AIS stream viewport POST (/api/viewport) is still handled separately
|
||||
* by useViewportBounds to limit upstream AIS ingestion.
|
||||
@@ -147,7 +152,9 @@ export function useDataPolling() {
|
||||
const useStartupPayload = !fetchedStartupFastPayload && !fastEtag.current;
|
||||
const headers: Record<string, string> = {};
|
||||
if (!useStartupPayload && fastEtag.current) headers['If-None-Match'] = fastEtag.current;
|
||||
const url = `${API_BASE}/api/live-data/fast${useStartupPayload ? '?initial=1' : ''}`;
|
||||
const url = appendLiveDataBoundsParams(
|
||||
`${API_BASE}/api/live-data/fast${useStartupPayload ? '?initial=1' : ''}`,
|
||||
);
|
||||
const res = await fetch(url, {
|
||||
headers,
|
||||
signal: controller.signal,
|
||||
@@ -193,10 +200,13 @@ export function useDataPolling() {
|
||||
try {
|
||||
const headers: Record<string, string> = {};
|
||||
if (slowEtag.current) headers['If-None-Match'] = slowEtag.current;
|
||||
const res = await fetch(`${API_BASE}/api/live-data/slow`, {
|
||||
headers,
|
||||
signal: controller.signal,
|
||||
});
|
||||
const res = await fetch(
|
||||
appendLiveDataBoundsParams(`${API_BASE}/api/live-data/slow`),
|
||||
{
|
||||
headers,
|
||||
signal: controller.signal,
|
||||
},
|
||||
);
|
||||
if (res.status === 304) { scheduleNext('slow'); return; }
|
||||
if (res.ok) {
|
||||
slowEtag.current = res.headers.get('etag') || null;
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
/**
|
||||
* Shared module-level state for the current map viewport bounds, used by
|
||||
* `useDataPolling` to scope `/api/live-data/{fast,slow}` to the visible
|
||||
* area when the user has zoomed in.
|
||||
*
|
||||
* Issue #288: the backend now bbox-filters dense layers (vessels, aircraft,
|
||||
* gdelt events, fires, sigint, …) when all four bounds are supplied. Light
|
||||
* reference layers stay world-scale. Heavy collections aren't sent over the
|
||||
* wire for parts of the planet the operator isn't looking at, which cuts
|
||||
* the steady-state poll from ~27 MB to ~5 MB for a typical regional view.
|
||||
*
|
||||
* No bounds set → callers omit the params entirely → backend ships full
|
||||
* world data (byte-identical to pre-#288 behaviour). This keeps the cold
|
||||
* boot path (where no map is mounted yet) and the world-zoomed view
|
||||
* unchanged.
|
||||
*/
|
||||
|
||||
export interface LiveDataBounds {
|
||||
south: number;
|
||||
west: number;
|
||||
north: number;
|
||||
east: number;
|
||||
}
|
||||
|
||||
let _current: LiveDataBounds | null = null;
|
||||
|
||||
/** True when lng_span ≥ 300 OR lat_span ≥ 120. Backend treats these as
|
||||
* world-scale and skips filtering — so the frontend doesn't bother sending
|
||||
* bounds at all, which keeps the ETag cache shared across operators in the
|
||||
* zoomed-out case. */
|
||||
function isEffectivelyWorld(bounds: LiveDataBounds): boolean {
|
||||
const latSpan = Math.max(0, bounds.north - bounds.south);
|
||||
let lngSpan = bounds.east - bounds.west;
|
||||
if (lngSpan < 0) lngSpan += 360;
|
||||
return lngSpan >= 300 || latSpan >= 120;
|
||||
}
|
||||
|
||||
/** Push the latest committed bounds. Called from `useViewportBounds`
|
||||
* whenever the map's bounds change enough to matter. Pass `null` to
|
||||
* fall back to world-scale fetching (e.g. on unmount). */
|
||||
export function setLiveDataBounds(bounds: LiveDataBounds | null): void {
|
||||
if (bounds === null) {
|
||||
_current = null;
|
||||
return;
|
||||
}
|
||||
if (
|
||||
!Number.isFinite(bounds.south) ||
|
||||
!Number.isFinite(bounds.west) ||
|
||||
!Number.isFinite(bounds.north) ||
|
||||
!Number.isFinite(bounds.east)
|
||||
) {
|
||||
_current = null;
|
||||
return;
|
||||
}
|
||||
if (isEffectivelyWorld(bounds)) {
|
||||
// World-zoomed → fetch globally, share the ETag cache across operators.
|
||||
_current = null;
|
||||
return;
|
||||
}
|
||||
_current = bounds;
|
||||
}
|
||||
|
||||
/** Read the current bounds, or `null` if the caller should fetch the full
|
||||
* world payload. Reader contract: must tolerate `null` and call without
|
||||
* bbox params in that case. */
|
||||
export function getLiveDataBounds(): LiveDataBounds | null {
|
||||
return _current;
|
||||
}
|
||||
|
||||
/** Append `s/w/n/e` query params to a URL when bounds are set, otherwise
|
||||
* return the URL unchanged. Centralised so all live-data callers stay in
|
||||
* sync about quantization and the world-scale skip rule. */
|
||||
export function appendLiveDataBoundsParams(url: string): string {
|
||||
const b = _current;
|
||||
if (!b) return url;
|
||||
const sep = url.includes('?') ? '&' : '?';
|
||||
// Match backend ETag quantization (1° floor/ceil) so the client and
|
||||
// server agree on which bounds round to the same cache key.
|
||||
const s = Math.floor(b.south);
|
||||
const w = Math.floor(b.west);
|
||||
const n = Math.ceil(b.north);
|
||||
const e = Math.ceil(b.east);
|
||||
return `${url}${sep}s=${s}&w=${w}&n=${n}&e=${e}`;
|
||||
}
|
||||
+119
-68
@@ -1,77 +1,137 @@
|
||||
/**
|
||||
* Sentinel Hub (Copernicus CDSE) — client-side token management & Process API tile fetcher.
|
||||
* Sentinel Hub (Copernicus CDSE) — client-side token + Process API tile fetcher.
|
||||
*
|
||||
* Credentials are stored in browser-controlled storage only. In privacy/session
|
||||
* mode they stay session-scoped; otherwise they persist in local storage. Token
|
||||
* exchange is proxied through the ShadowBroker backend (/api/sentinel/token) to
|
||||
* avoid CORS blocks from the Copernicus identity provider. Credentials are
|
||||
* forwarded, never stored server-side.
|
||||
* Issue #298 (tg12): Credentials are now stored server-side in the backend
|
||||
* ``.env`` (managed through the existing ``/api/settings/api-keys`` flow,
|
||||
* same as every other third-party API key). The browser no longer holds
|
||||
* ``client_id`` / ``client_secret`` in localStorage or sessionStorage and
|
||||
* no longer forwards them in proxy requests.
|
||||
*
|
||||
* Uses the Process API with inline evalscripts — no Instance ID / Configuration needed.
|
||||
* Old browser-storage keys (``sb_sentinel_client_id`` / ``sb_sentinel_client_secret``
|
||||
* / ``sb_sentinel_instance_id``) are migrated out by ``SettingsPanel`` on
|
||||
* first mount after the upgrade — see ``migrateLegacySentinelBrowserKeys()``
|
||||
* exported below.
|
||||
*/
|
||||
|
||||
import { API_BASE } from '@/lib/api';
|
||||
import {
|
||||
getSensitiveBrowserItem,
|
||||
getSensitiveBrowserStorageMode,
|
||||
removeSensitiveBrowserItem,
|
||||
setSensitiveBrowserItem,
|
||||
} from '@/lib/privacyBrowserStorage';
|
||||
|
||||
// Token exchange proxied through our backend (Copernicus blocks browser CORS)
|
||||
// Token exchange proxied through our backend (Copernicus blocks browser CORS).
|
||||
const TOKEN_PROXY_URL = `${API_BASE}/api/sentinel/token`;
|
||||
|
||||
// browser-storage keys
|
||||
const LS_CLIENT_ID = 'sb_sentinel_client_id';
|
||||
const LS_CLIENT_SECRET = 'sb_sentinel_client_secret';
|
||||
|
||||
// In-memory token cache (never persisted)
|
||||
let cachedToken: string | null = null;
|
||||
let tokenExpiry = 0;
|
||||
// Dedup: only one in-flight token request at a time
|
||||
let _tokenPromise: Promise<string | null> | null = null;
|
||||
|
||||
// ─── Credential helpers ────────────────────────────────────────────────────
|
||||
// In-memory cache of "does the backend have Sentinel credentials configured?"
|
||||
// so the rest of the UI can short-circuit tile load attempts without a server
|
||||
// round-trip per tile. Refreshed by callers via `refreshSentinelStatus()`.
|
||||
let _backendCredentialsConfigured: boolean | null = null;
|
||||
let _backendStatusPromise: Promise<boolean> | null = null;
|
||||
|
||||
export function getSentinelCredentials(): {
|
||||
clientId: string;
|
||||
clientSecret: string;
|
||||
} {
|
||||
if (typeof window === 'undefined') return { clientId: '', clientSecret: '' };
|
||||
return {
|
||||
clientId: getSensitiveBrowserItem(LS_CLIENT_ID) || '',
|
||||
clientSecret: getSensitiveBrowserItem(LS_CLIENT_SECRET) || '',
|
||||
};
|
||||
// ─── Credential status (server-side) ───────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Ask the backend whether Sentinel credentials are configured in ``.env``.
|
||||
* Caches the result in memory; call ``refreshSentinelStatus()`` after the
|
||||
* operator saves new API keys in the settings panel.
|
||||
*
|
||||
* Returns ``false`` on network errors so the UI fails safely (no broken
|
||||
* tile requests). Never returns the secret itself — that stays server-side.
|
||||
*/
|
||||
export async function checkBackendSentinelStatus(): Promise<boolean> {
|
||||
if (_backendCredentialsConfigured !== null) return _backendCredentialsConfigured;
|
||||
if (_backendStatusPromise) return _backendStatusPromise;
|
||||
|
||||
_backendStatusPromise = (async () => {
|
||||
try {
|
||||
const resp = await fetch(`${API_BASE}/api/settings/api-keys`, {
|
||||
headers: { Accept: 'application/json' },
|
||||
});
|
||||
if (!resp.ok) return false;
|
||||
const list = await resp.json();
|
||||
// /api/settings/api-keys returns an array of { id, env_key, is_set, ... }
|
||||
const ids = new Set(['sentinel_client_id', 'sentinel_client_secret']);
|
||||
const configured = Array.isArray(list)
|
||||
&& list.filter((row: { id?: string; is_set?: boolean }) =>
|
||||
row && row.id && ids.has(row.id) && row.is_set === true,
|
||||
).length === 2;
|
||||
_backendCredentialsConfigured = configured;
|
||||
return configured;
|
||||
} catch {
|
||||
_backendCredentialsConfigured = false;
|
||||
return false;
|
||||
} finally {
|
||||
_backendStatusPromise = null;
|
||||
}
|
||||
})();
|
||||
|
||||
return _backendStatusPromise;
|
||||
}
|
||||
|
||||
export function setSentinelCredentials(clientId: string, clientSecret: string): void {
|
||||
setSensitiveBrowserItem(LS_CLIENT_ID, clientId);
|
||||
setSensitiveBrowserItem(LS_CLIENT_SECRET, clientSecret);
|
||||
// Invalidate cached token when credentials change
|
||||
/** Invalidate the cached status — call this after the API Keys panel saves. */
|
||||
export function refreshSentinelStatus(): void {
|
||||
_backendCredentialsConfigured = null;
|
||||
// Drop any cached token too — credentials may have changed.
|
||||
cachedToken = null;
|
||||
tokenExpiry = 0;
|
||||
}
|
||||
|
||||
export function clearSentinelCredentials(): void {
|
||||
removeSensitiveBrowserItem(LS_CLIENT_ID);
|
||||
removeSensitiveBrowserItem(LS_CLIENT_SECRET);
|
||||
// Also remove legacy instance ID if present
|
||||
removeSensitiveBrowserItem('sb_sentinel_instance_id');
|
||||
if (typeof window !== 'undefined') {
|
||||
localStorage.removeItem('sb_sentinel_instance_id');
|
||||
sessionStorage.removeItem('sb_sentinel_instance_id');
|
||||
}
|
||||
cachedToken = null;
|
||||
tokenExpiry = 0;
|
||||
}
|
||||
|
||||
export function getSentinelCredentialStorageMode(): 'local' | 'session' {
|
||||
return getSensitiveBrowserStorageMode();
|
||||
/**
|
||||
* Synchronous getter — returns the last known status without a network call.
|
||||
* Returns ``null`` until ``checkBackendSentinelStatus()`` has run at least once.
|
||||
*/
|
||||
export function getCachedSentinelStatus(): boolean | null {
|
||||
return _backendCredentialsConfigured;
|
||||
}
|
||||
|
||||
/**
|
||||
* Back-compat shim. Pre-#298 callers asked ``hasSentinelCredentials()`` to
|
||||
* decide whether to render the Sentinel layer / open the API key prompt.
|
||||
* The credential now lives server-side, so this is just the cached
|
||||
* server-status check. Returns ``false`` until the first
|
||||
* ``checkBackendSentinelStatus()`` resolves (callers should kick that off
|
||||
* once at app startup — see ``page.tsx`` mount effect).
|
||||
*/
|
||||
export function hasSentinelCredentials(): boolean {
|
||||
const { clientId, clientSecret } = getSentinelCredentials();
|
||||
return Boolean(clientId && clientSecret);
|
||||
return _backendCredentialsConfigured === true;
|
||||
}
|
||||
|
||||
/**
|
||||
* One-time migration helper: clear the legacy browser-storage keys that
|
||||
* pre-#298 versions used to persist Sentinel credentials. Idempotent and
|
||||
* safe to call on every page load — does nothing if no keys are present.
|
||||
*
|
||||
* Called by ``SettingsPanel`` on mount. We do NOT auto-POST the legacy
|
||||
* browser values to the backend, because doing so would silently migrate
|
||||
* a secret across a trust boundary without operator consent. Operators
|
||||
* who relied on browser-stored credentials will re-enter them once in
|
||||
* the API Keys panel, and the legacy keys get wiped here.
|
||||
*/
|
||||
export function migrateLegacySentinelBrowserKeys(): { cleared: string[] } {
|
||||
if (typeof window === 'undefined') return { cleared: [] };
|
||||
const legacy = [
|
||||
'sb_sentinel_client_id',
|
||||
'sb_sentinel_client_secret',
|
||||
'sb_sentinel_instance_id',
|
||||
];
|
||||
const cleared: string[] = [];
|
||||
for (const key of legacy) {
|
||||
try {
|
||||
if (window.localStorage?.getItem(key) !== null) {
|
||||
window.localStorage.removeItem(key);
|
||||
cleared.push(key);
|
||||
}
|
||||
} catch { /* ignore quota / privacy mode errors */ }
|
||||
try {
|
||||
if (window.sessionStorage?.getItem(key) !== null) {
|
||||
window.sessionStorage.removeItem(key);
|
||||
if (!cleared.includes(key)) cleared.push(key);
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
return { cleared };
|
||||
}
|
||||
|
||||
// ─── OAuth2 token ──────────────────────────────────────────────────────────
|
||||
@@ -79,14 +139,16 @@ export function hasSentinelCredentials(): boolean {
|
||||
/**
|
||||
* Fetch an OAuth2 access token using the client_credentials grant.
|
||||
* Caches in memory; auto-refreshes 30 s before expiry.
|
||||
*
|
||||
* The request body NO LONGER carries client_id/secret — the backend
|
||||
* resolves credentials from its ``.env`` via the API Keys flow. The
|
||||
* server-side proxy still accepts body credentials for legacy callers,
|
||||
* but the dashboard does not supply them.
|
||||
*/
|
||||
export function getSentinelToken(): Promise<string | null> {
|
||||
// Return cached token if still valid (with 30 s margin)
|
||||
if (cachedToken && Date.now() < tokenExpiry - 30_000) return Promise.resolve(cachedToken);
|
||||
|
||||
const { clientId, clientSecret } = getSentinelCredentials();
|
||||
if (!clientId || !clientSecret) return Promise.resolve(null);
|
||||
|
||||
// Dedup: reuse in-flight request so 20 tiles don't each trigger a token fetch
|
||||
if (_tokenPromise) return _tokenPromise;
|
||||
|
||||
@@ -94,11 +156,9 @@ export function getSentinelToken(): Promise<string | null> {
|
||||
try {
|
||||
const resp = await fetch(TOKEN_PROXY_URL, {
|
||||
method: 'POST',
|
||||
// Backend resolves credentials from env. Empty body = "use server-side".
|
||||
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||||
body: new URLSearchParams({
|
||||
client_id: clientId,
|
||||
client_secret: clientSecret,
|
||||
}),
|
||||
body: new URLSearchParams({}),
|
||||
});
|
||||
|
||||
if (!resp.ok) {
|
||||
@@ -131,6 +191,8 @@ const TILE_PROXY_URL = `${API_BASE}/api/sentinel/tile`;
|
||||
/**
|
||||
* Fetch a single 256×256 tile via backend proxy to Sentinel Hub Process API.
|
||||
* Returns a PNG ArrayBuffer or null on failure.
|
||||
*
|
||||
* Body no longer carries client_id/secret — the backend uses .env values.
|
||||
*/
|
||||
export async function fetchSentinelTile(
|
||||
z: number,
|
||||
@@ -139,21 +201,10 @@ export async function fetchSentinelTile(
|
||||
preset: string,
|
||||
date: string,
|
||||
): Promise<ArrayBuffer | null> {
|
||||
const { clientId, clientSecret } = getSentinelCredentials();
|
||||
if (!clientId || !clientSecret) return null;
|
||||
|
||||
const resp = await fetch(TILE_PROXY_URL, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
client_id: clientId,
|
||||
client_secret: clientSecret,
|
||||
preset,
|
||||
date,
|
||||
z,
|
||||
x,
|
||||
y,
|
||||
}),
|
||||
body: JSON.stringify({ preset, date, z, x, y }),
|
||||
});
|
||||
|
||||
if (!resp.ok) return null;
|
||||
|
||||
@@ -1,51 +1,37 @@
|
||||
/**
|
||||
* wikimediaClient — single fetch surface for Wikipedia / Wikidata.
|
||||
*
|
||||
* Issues #218, #219, #220 (tg12 external audit):
|
||||
* Issues #218, #219, #220 (tg12 external audit) + Round 7a:
|
||||
*
|
||||
* Wikimedia's User-Agent policy asks API clients to identify themselves
|
||||
* via `Api-User-Agent` when calling from browser JavaScript (because the
|
||||
* browser does not let JS set `User-Agent` directly). Before this
|
||||
* module existed, three independent components issued anonymous browser
|
||||
* fetches against Wikipedia / Wikidata:
|
||||
* browser does not let JS set `User-Agent` directly). Three independent
|
||||
* components used to issue anonymous browser fetches against Wikipedia /
|
||||
* Wikidata:
|
||||
*
|
||||
* - useRegionDossier (Wikidata SPARQL + Wikipedia REST summary)
|
||||
* - WikiImage (Wikipedia REST summary)
|
||||
* - NewsFeed (Wikipedia REST summary)
|
||||
*
|
||||
* Each component shipped its own copy-pasted fetch + module-local cache.
|
||||
* Provider-policy compliance was missing in all three places.
|
||||
* PR #284 collapsed them into this shared helper with one stable
|
||||
* `Api-User-Agent`. That fixed compliance but introduced a new problem:
|
||||
* the `Api-User-Agent` was project-wide, so from Wikimedia's perspective
|
||||
* every Shadowbroker install looked like one giant scraper. If one
|
||||
* install misbehaved, Wikimedia's only recourse was to block the project
|
||||
* as a whole.
|
||||
*
|
||||
* This module centralizes:
|
||||
* Round 7a fixes that. The frontend fetches the per-install operator
|
||||
* handle from `GET /api/settings/operator-handle` once on first use and
|
||||
* embeds it in the `Api-User-Agent`. Wikimedia can now rate-limit /
|
||||
* contact the specific install instead of the project. The handle is
|
||||
* auto-generated on the backend (`shadow-XXXXXX`) or operator-chosen via
|
||||
* the `OPERATOR_HANDLE` setting.
|
||||
*
|
||||
* 1. The `Api-User-Agent` header on every request.
|
||||
* 2. A single LRU cache for Wikipedia summary lookups (keyed by article
|
||||
* title). Multiple components asking for the same article share
|
||||
* one in-flight request and one cache slot.
|
||||
* 3. One predictable kill switch — if Wikimedia ever asks us to back
|
||||
* off, we change `WIKIMEDIA_API_USER_AGENT` here and the whole
|
||||
* frontend updates.
|
||||
*
|
||||
* This does NOT change end-user UX:
|
||||
*
|
||||
* - WikiImage still shows the same thumbnails.
|
||||
* - NewsFeed still shows aircraft thumbnails.
|
||||
* - useRegionDossier still returns the same place summary + leader.
|
||||
*
|
||||
* What changes:
|
||||
*
|
||||
* - Wikimedia can identify our traffic from any other anonymous
|
||||
* browser visitor pool.
|
||||
* - Provider-policy fixes happen here once, not in three places.
|
||||
* UX impact: zero. Same thumbnails, same summaries, same load behavior.
|
||||
* The only observable change is the value of the outgoing
|
||||
* `Api-User-Agent` header.
|
||||
*/
|
||||
|
||||
// Stable identifier per Wikimedia UA policy. Includes a contact path so
|
||||
// Wikimedia's operators can reach the project if they need to rate-limit
|
||||
// or coordinate. Bump the version when the contact path changes.
|
||||
export const WIKIMEDIA_API_USER_AGENT =
|
||||
'Shadowbroker/1.0 (+https://github.com/BigBodyCobain/Shadowbroker; ' +
|
||||
'report issues at /issues)';
|
||||
|
||||
// Module-level cache shared by WikiImage, NewsFeed, and useRegionDossier.
|
||||
// Keyed by Wikipedia article title (NOT slug — we keep the human-readable
|
||||
// form so debugging the cache is easier). Values track in-flight state
|
||||
@@ -73,6 +59,66 @@ function evictIfOverCap() {
|
||||
if (oldest) _summaryCache.delete(oldest);
|
||||
}
|
||||
|
||||
// ─── Per-operator handle (Round 7a) ────────────────────────────────────────
|
||||
|
||||
// Fetched once from the backend on first need and cached for the page
|
||||
// lifetime. The handle is NOT a secret — Wikimedia will see it on every
|
||||
// Wikipedia / Wikidata request we make — but caching it locally avoids a
|
||||
// round-trip on every Wikipedia fetch and lets the offline / no-backend
|
||||
// case still produce a stable UA (the fallback handle).
|
||||
let _handlePromise: Promise<string> | null = null;
|
||||
let _cachedHandle: string | null = null;
|
||||
|
||||
const FALLBACK_HANDLE = 'operator-offline';
|
||||
const HANDLE_ENDPOINT = '/api/settings/operator-handle';
|
||||
|
||||
async function fetchOperatorHandle(): Promise<string> {
|
||||
try {
|
||||
const res = await fetch(HANDLE_ENDPOINT, {
|
||||
// Use the standard relative-path proxy so the Next.js admin-key
|
||||
// injection (same-origin) flows naturally for legitimate browser
|
||||
// sessions. A cross-origin scanner will be blocked by the proxy
|
||||
// before this even leaves their browser.
|
||||
credentials: 'same-origin',
|
||||
});
|
||||
if (!res.ok) return FALLBACK_HANDLE;
|
||||
const data = await res.json();
|
||||
const h = (data && typeof data.handle === 'string' && data.handle.trim()) || '';
|
||||
return h || FALLBACK_HANDLE;
|
||||
} catch {
|
||||
return FALLBACK_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
async function getOperatorHandle(): Promise<string> {
|
||||
if (_cachedHandle) return _cachedHandle;
|
||||
if (!_handlePromise) {
|
||||
_handlePromise = fetchOperatorHandle().then((h) => {
|
||||
_cachedHandle = h;
|
||||
return h;
|
||||
});
|
||||
}
|
||||
return _handlePromise;
|
||||
}
|
||||
|
||||
/** Build the Wikimedia Api-User-Agent for this install.
|
||||
*
|
||||
* Includes the per-install operator handle so Wikimedia can rate-limit /
|
||||
* contact the specific operator instead of the project as a whole.
|
||||
* Exported for tests; production callers should let
|
||||
* `fetchWikipediaSummary` / `fetchWikidataSparql` build it implicitly.
|
||||
*/
|
||||
export async function buildWikimediaUserAgent(purpose: string): Promise<string> {
|
||||
const handle = await getOperatorHandle();
|
||||
const safePurpose = (purpose || '').replace(/[^a-zA-Z0-9_-]/g, '-').toLowerCase();
|
||||
return (
|
||||
`Shadowbroker/1.0 (operator: ${handle}; purpose: ${safePurpose}; ` +
|
||||
'+https://github.com/BigBodyCobain/Shadowbroker; report issues at /issues)'
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Wikipedia summary fetch ───────────────────────────────────────────────
|
||||
|
||||
/** Fetch a Wikipedia article summary (titles, NOT URLs).
|
||||
*
|
||||
* Empty / invalid input resolves to `null`. Network errors and disambig
|
||||
@@ -92,40 +138,42 @@ export async function fetchWikipediaSummary(
|
||||
const slug = encodeURIComponent(trimmed.replace(/ /g, '_'));
|
||||
const url = `https://en.wikipedia.org/api/rest_v1/page/summary/${slug}`;
|
||||
|
||||
const promise = fetch(url, {
|
||||
headers: { 'Api-User-Agent': WIKIMEDIA_API_USER_AGENT },
|
||||
})
|
||||
.then(async (r) => {
|
||||
const promise = (async (): Promise<WikipediaSummary | null> => {
|
||||
try {
|
||||
const ua = await buildWikimediaUserAgent('wikipedia-summary');
|
||||
const r = await fetch(url, { headers: { 'Api-User-Agent': ua } });
|
||||
if (!r.ok) return null;
|
||||
const d = await r.json();
|
||||
if (d?.type === 'disambiguation') return null;
|
||||
const summary: WikipediaSummary = {
|
||||
return {
|
||||
title: trimmed,
|
||||
description: d?.description || '',
|
||||
extract: d?.extract || '',
|
||||
thumbnail: d?.thumbnail?.source || d?.originalimage?.source || '',
|
||||
type: d?.type || 'standard',
|
||||
};
|
||||
return summary;
|
||||
})
|
||||
.catch(() => null)
|
||||
.then((summary) => {
|
||||
_summaryCache.set(trimmed, { summary, inflight: null, loaded: true });
|
||||
evictIfOverCap();
|
||||
return summary;
|
||||
});
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
})().then((summary) => {
|
||||
_summaryCache.set(trimmed, { summary, inflight: null, loaded: true });
|
||||
evictIfOverCap();
|
||||
return summary;
|
||||
});
|
||||
|
||||
_summaryCache.set(trimmed, { summary: null, inflight: promise, loaded: false });
|
||||
evictIfOverCap();
|
||||
return promise;
|
||||
}
|
||||
|
||||
// ─── Wikidata SPARQL ───────────────────────────────────────────────────────
|
||||
|
||||
/** Fetch a Wikidata SPARQL query result.
|
||||
*
|
||||
* Returns the parsed JSON `results.bindings` array on success; `null`
|
||||
* (not throwing) on any failure so callers can render fallbacks
|
||||
* silently. Kept as a thin wrapper so the audit-required UA header is
|
||||
* applied in exactly one place.
|
||||
* silently. Per-install operator handle threaded through `Api-User-Agent`
|
||||
* (Round 7a).
|
||||
*/
|
||||
export async function fetchWikidataSparql<T = Record<string, { value: string }>>(
|
||||
sparql: string,
|
||||
@@ -136,9 +184,10 @@ export async function fetchWikidataSparql<T = Record<string, { value: string }>>
|
||||
trimmed,
|
||||
)}&format=json`;
|
||||
try {
|
||||
const ua = await buildWikimediaUserAgent('wikidata-sparql');
|
||||
const res = await fetch(url, {
|
||||
headers: {
|
||||
'Api-User-Agent': WIKIMEDIA_API_USER_AGENT,
|
||||
'Api-User-Agent': ua,
|
||||
Accept: 'application/sparql-results+json',
|
||||
},
|
||||
});
|
||||
@@ -151,7 +200,11 @@ export async function fetchWikidataSparql<T = Record<string, { value: string }>>
|
||||
}
|
||||
}
|
||||
|
||||
/** Internal: clear the shared cache. Exposed for tests only. */
|
||||
// ─── Test helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
/** Internal: clear the shared cache + the handle cache. Exposed for tests only. */
|
||||
export function _resetWikimediaClientCacheForTests() {
|
||||
_summaryCache.clear();
|
||||
_handlePromise = null;
|
||||
_cachedHandle = null;
|
||||
}
|
||||
|
||||
+9
-2
@@ -76,6 +76,13 @@ function canRun(command, args) {
|
||||
return !result.error && result.status === 0;
|
||||
}
|
||||
|
||||
function canRunBackendPython(pythonBin) {
|
||||
return (
|
||||
canRun(pythonBin, ["-V"]) &&
|
||||
canRun(pythonBin, ["-c", "import fastapi, uvicorn"])
|
||||
);
|
||||
}
|
||||
|
||||
function findBasePython() {
|
||||
const candidates = isWindows
|
||||
? [
|
||||
@@ -135,12 +142,12 @@ function rebuildBackendVenv(targetDir, basePython) {
|
||||
if (result.error || result.status !== 0) {
|
||||
return null;
|
||||
}
|
||||
return canRun(repairedBin, ["-V"]) ? repairedBin : null;
|
||||
return canRunBackendPython(repairedBin) ? repairedBin : null;
|
||||
}
|
||||
|
||||
function ensureBackendVenv() {
|
||||
for (const candidate of venvCandidates) {
|
||||
if (fs.existsSync(candidate) && canRun(candidate, ["-V"])) {
|
||||
if (fs.existsSync(candidate) && canRunBackendPython(candidate)) {
|
||||
persistSelectedVenv(candidate);
|
||||
return candidate;
|
||||
}
|
||||
|
||||
@@ -80,7 +80,6 @@ dependencies = [
|
||||
{ name = "apscheduler" },
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "cachetools" },
|
||||
{ name = "cloudscraper" },
|
||||
{ name = "cryptography" },
|
||||
{ name = "defusedxml" },
|
||||
{ name = "fastapi" },
|
||||
@@ -119,7 +118,6 @@ requires-dist = [
|
||||
{ name = "apscheduler", specifier = "==3.10.3" },
|
||||
{ name = "beautifulsoup4", specifier = ">=4.9.0" },
|
||||
{ name = "cachetools", specifier = "==5.5.2" },
|
||||
{ name = "cloudscraper", specifier = "==1.2.71" },
|
||||
{ name = "cryptography", specifier = ">=41.0.0" },
|
||||
{ name = "defusedxml", specifier = ">=0.7.1" },
|
||||
{ name = "fastapi", specifier = "==0.115.12" },
|
||||
@@ -453,20 +451,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cloudscraper"
|
||||
version = "1.2.71"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pyparsing" },
|
||||
{ name = "requests" },
|
||||
{ name = "requests-toolbelt" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ac/25/6d0481860583f44953bd791de0b7c4f6d7ead7223f8a17e776247b34a5b4/cloudscraper-1.2.71.tar.gz", hash = "sha256:429c6e8aa6916d5bad5c8a5eac50f3ea53c9ac22616f6cb21b18dcc71517d0d3", size = 93261, upload-time = "2023-04-25T23:20:19.467Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/81/97/fc88803a451029688dffd7eb446dc1b529657577aec13aceff1cc9628c5d/cloudscraper-1.2.71-py2.py3-none-any.whl", hash = "sha256:76f50ca529ed2279e220837befdec892626f9511708e200d48d5bb76ded679b0", size = 99652, upload-time = "2023-04-25T23:20:15.974Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.6"
|
||||
@@ -1643,15 +1627,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/99/32/15e08a0c4bb536303e1568e2ba5cae1ce39a2e026a03aea46173af4c7a2d/pyobjc_framework_libdispatch-12.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:23fc9915cba328216b6a736c7a48438a16213f16dfb467f69506300b95938cc7", size = 15976, upload-time = "2025-11-14T09:53:07.936Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyparsing"
|
||||
version = "3.3.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f3/91/9c6ee907786a473bf81c5f53cf703ba0957b23ab84c264080fb5a450416f/pyparsing-3.3.2.tar.gz", hash = "sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc", size = 6851574, upload-time = "2026-01-21T03:57:59.36Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pypubsub"
|
||||
version = "4.0.7"
|
||||
@@ -1901,18 +1876,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", size = 62574, upload-time = "2023-05-22T15:12:42.313Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "requests-toolbelt"
|
||||
version = "1.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "reverse-geocoder"
|
||||
version = "1.5.1"
|
||||
|
||||
Reference in New Issue
Block a user