Merge pull request #232 from BigBodyCobain/security/post-pr227-gap-fixes-v2

[security] Close post-#227 control-surface and fetcher gaps
This commit is contained in:
Shadowbroker
2026-05-18 14:03:47 -06:00
committed by GitHub
16 changed files with 289 additions and 18 deletions
+33 -1
View File
@@ -24,8 +24,40 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
# Requires MESH_DEBUG_MODE=true; do not enable this for ordinary use.
# ALLOW_INSECURE_ADMIN=false
# Default outbound User-Agent for all third-party HTTP fetchers.
# Project-generic by default — does NOT include any personal contact info or
# operator-specific identifier. Override only if you run a public relay and
# want upstreams to be able to reach you (e.g. Nominatim/OSM usage policy).
# SHADOWBROKER_USER_AGENT=ShadowBroker-OSINT/0.9 (contact: ops@example.com)
# User-Agent for Nominatim geocoding requests (per OSM usage policy).
# NOMINATIM_USER_AGENT=ShadowBroker/1.0 (https://github.com/BigBodyCobain/Shadowbroker)
# NOMINATIM_USER_AGENT=ShadowBroker/1.0
# ── Third-party fetcher opt-ins ────────────────────────────────
# These data sources phone home to politically/commercially sensitive
# upstreams. Disabled by default; set to "true" only if the operator
# explicitly wants the node's IP to contact these services.
#
# CrowdThreat — backend.crowdthreat.world (paid threat-intel aggregator).
# CROWDTHREAT_ENABLED=false
#
# EUvsDisinfo FIMI — euvsdisinfo.eu (EU disinformation tracker).
# FIMI_ENABLED=false
#
# Polymarket + Kalshi — US political/election prediction markets.
# PREDICTION_MARKETS_ENABLED=false
#
# Finnhub fallback / yfinance — financial market data.
# Set FINNHUB_API_KEY to enable Finnhub, or set FINANCIAL_ENABLED=true to allow
# the unauthenticated yfinance fallback to call Yahoo Finance.
# FINANCIAL_ENABLED=false
#
# NUFORC UAP sightings — huggingface.co dataset download.
# NUFORC_ENABLED=false
#
# News RSS aggregator — defaults ON. Set to "false" to disable all
# configured news feeds (kill switch for the news layer).
# NEWS_ENABLED=true
# LTA Singapore traffic cameras — leave blank to skip this data source.
# LTA_ACCOUNT_KEY=
+1 -1
View File
@@ -35,7 +35,7 @@ async def thermal_verify(
return result
@router.post("/api/sigint/transmit")
@router.post("/api/sigint/transmit", dependencies=[Depends(require_local_operator)])
@limiter.limit("5/minute")
async def sigint_transmit(request: Request):
"""Send an APRS-IS message to a specific callsign. Requires ham radio credentials."""
+1 -1
View File
@@ -589,7 +589,7 @@ async def api_get_wormhole_status(request: Request):
)
@router.post("/api/wormhole/join")
@router.post("/api/wormhole/join", dependencies=[Depends(require_local_operator)])
@limiter.limit("10/minute")
async def api_wormhole_join(request: Request):
from services.config import get_settings
@@ -31,11 +31,7 @@ _S3_NS = "{http://s3.amazonaws.com/doc/2006-03-01/}"
_REFRESH_INTERVAL_S = 5 * 24 * 3600
_LIST_TIMEOUT_S = 30
_DOWNLOAD_TIMEOUT_S = 600
_USER_AGENT = (
"ShadowBroker-OSINT/0.9.79 "
"(+https://github.com/BigBodyCobain/Shadowbroker; "
"contact: bigbodycobain@gmail.com)"
)
from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT
_lock = threading.RLock()
_aircraft_by_hex: dict[str, dict[str, str]] = {}
@@ -279,9 +279,13 @@ def fetch_weather_alerts():
return
alerts = []
try:
# weather.gov requires a User-Agent per their API policy, but it
# need not identify the operator. Use a project-generic string and
# let the user override via SHADOWBROKER_USER_AGENT if needed.
from services.network_utils import DEFAULT_USER_AGENT
url = "https://api.weather.gov/alerts/active?status=actual"
headers = {
"User-Agent": "(ShadowBroker OSINT Dashboard, github.com/BigBodyCobain/Shadowbroker)",
"User-Agent": DEFAULT_USER_AGENT,
"Accept": "application/geo+json",
}
response = fetch_with_curl(url, timeout=15, headers=headers)
+17
View File
@@ -5,6 +5,7 @@ debunked claims, threat actor mentions, and target country references.
Refreshes every 12 hours (FIMI data updates weekly).
"""
import os
import re
import logging
from datetime import datetime, timezone
@@ -18,6 +19,16 @@ logger = logging.getLogger("services.data_fetcher")
_FIMI_FEED_URL = "https://euvsdisinfo.eu/feed/"
def fimi_fetch_enabled() -> bool:
"""Return True only when the operator explicitly opts into FIMI pulls."""
return str(os.environ.get("FIMI_ENABLED", "")).strip().lower() in {
"1",
"true",
"yes",
"on",
}
# ── Threat actor keywords ──────────────────────────────────────────────────
# Map of keyword → canonical actor name. Checked case-insensitively.
_THREAT_ACTORS: dict[str, str] = {
@@ -173,6 +184,12 @@ def _is_major_wave(narratives: list[dict], targets: dict[str, int]) -> bool:
@with_retry(max_retries=1, base_delay=5)
def fetch_fimi():
"""Fetch and parse the EUvsDisinfo RSS feed."""
if not fimi_fetch_enabled():
logger.debug("FIMI fetch skipped; set FIMI_ENABLED=true to opt in")
with _data_lock:
latest_data["fimi"] = []
_mark_fresh("fimi")
return
try:
resp = fetch_with_curl(_FIMI_FEED_URL, timeout=15)
feed = feedparser.parse(resp.text)
+28 -1
View File
@@ -82,10 +82,37 @@ def _fetch_yfinance_single(symbol: str, period: str = "2d"):
@with_retry(max_retries=1, base_delay=1)
def financial_fetch_enabled() -> bool:
"""Return True only when the operator explicitly opts into financial pulls.
Either ``FINANCIAL_ENABLED=true`` or the presence of ``FINNHUB_API_KEY``
counts as an explicit opt-in. Without either, the default yfinance path
is disabled to avoid silent outbound calls to finance.yahoo.com.
"""
if os.getenv("FINNHUB_API_KEY", "").strip():
return True
return str(os.environ.get("FINANCIAL_ENABLED", "")).strip().lower() in {
"1",
"true",
"yes",
"on",
}
def fetch_financial_markets():
"""Fetches full market list with smart throttling (3s for Finnhub, 60s for yfinance)."""
global _last_fetch_time, _last_fetch_results, _rotating_index
if not financial_fetch_enabled():
logger.debug(
"Financial fetch skipped; set FINANCIAL_ENABLED=true or supply "
"FINNHUB_API_KEY to opt in"
)
with _data_lock:
latest_data["financial"] = {}
_mark_fresh("financial")
return
finnhub_key = os.getenv("FINNHUB_API_KEY", "").strip()
use_finnhub = bool(finnhub_key)
+2 -1
View File
@@ -182,7 +182,8 @@ def fetch_meshtastic_nodes():
callsign = str(getattr(get_settings(), "MESHTASTIC_OPERATOR_CALLSIGN", "") or "").strip()
except Exception:
callsign = ""
ua_base = "ShadowBroker-OSINT/0.9.79 (+https://github.com/BigBodyCobain/Shadowbroker; contact: bigbodycobain@gmail.com; 24h polling)"
from services.network_utils import DEFAULT_USER_AGENT
ua_base = f"{DEFAULT_USER_AGENT}; 24h polling"
user_agent = f"{ua_base}; node={callsign}" if callsign else ua_base
try:
+23
View File
@@ -1,4 +1,5 @@
"""News fetching, geocoding, clustering, and risk assessment."""
import os
import re
import time
import logging
@@ -11,6 +12,22 @@ from services.fetchers._store import latest_data, _data_lock, _mark_fresh
from services.fetchers.retry import with_retry
from services.oracle_service import enrich_news_items, compute_global_threat_level, detect_breaking_events
def news_fetch_enabled() -> bool:
"""Return True only when the operator explicitly opts into news RSS pulls.
Defaults to **on** for backward compatibility (this is the only fetcher
where opting out is the new behavior, not the old one). Set
``NEWS_ENABLED=false`` to disable all outbound RSS feed traffic.
"""
return str(os.environ.get("NEWS_ENABLED", "true")).strip().lower() not in {
"0",
"false",
"no",
"off",
"",
}
logger = logging.getLogger("services.data_fetcher")
# Maximum article age in seconds. Anything older than this is dropped
@@ -160,6 +177,12 @@ def _resolve_coords(text: str) -> tuple[float, float] | None:
@with_retry(max_retries=1, base_delay=2)
def fetch_news():
if not news_fetch_enabled():
logger.debug("News fetch skipped; unset NEWS_ENABLED=false to re-enable")
with _data_lock:
latest_data["news"] = []
_mark_fresh("news")
return
from services.news_feed_config import get_feeds
feed_config = get_feeds()
feeds = {f["name"]: f["url"] for f in feed_config}
@@ -49,6 +49,16 @@ _HF_CSV_URL = (
"https://huggingface.co/datasets/kcimc/NUFORC/resolve/main/nuforc_str.csv"
)
def nuforc_fetch_enabled() -> bool:
"""Return True only when the operator explicitly opts into NUFORC pulls."""
return str(os.environ.get("NUFORC_ENABLED", "")).strip().lower() in {
"1",
"true",
"yes",
"on",
}
# Only keep sightings from the last N years for the enrichment index
_KEEP_YEARS = 5
@@ -160,6 +170,12 @@ def _download_and_build() -> dict | None:
Returns the index dict or None on failure.
"""
if not nuforc_fetch_enabled():
logger.debug(
"NUFORC enrichment skipped; set NUFORC_ENABLED=true to opt in"
)
return None
cutoff = datetime.utcnow() - timedelta(days=_KEEP_YEARS * 365)
cutoff_str = cutoff.strftime("%Y-%m-%d")
@@ -25,6 +25,16 @@ _provider_pace_lock = threading.Lock()
_provider_last_request_at: dict[str, float] = {}
def prediction_markets_fetch_enabled() -> bool:
"""Return True only when the operator explicitly opts into Polymarket/Kalshi pulls."""
return str(os.environ.get("PREDICTION_MARKETS_ENABLED", "")).strip().lower() in {
"1",
"true",
"yes",
"on",
}
def _pace_provider(provider: str, min_interval_s: float) -> None:
if min_interval_s <= 0:
return
@@ -755,6 +765,16 @@ def fetch_prediction_markets():
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
global _prev_probabilities
if not prediction_markets_fetch_enabled():
logger.debug(
"Prediction markets fetch skipped; set "
"PREDICTION_MARKETS_ENABLED=true to opt in"
)
with _data_lock:
latest_data["prediction_markets"] = []
_mark_fresh("prediction_markets")
return
markets = fetch_prediction_markets_raw()
# Compute probability deltas vs previous fetch
+1 -5
View File
@@ -24,11 +24,7 @@ _AIRPORTS_URL = "https://vrs-standing-data.adsb.lol/airports.csv.gz"
_REFRESH_INTERVAL_S = 5 * 24 * 3600
_HTTP_TIMEOUT_S = 60
_USER_AGENT = (
"ShadowBroker-OSINT/0.9.79 "
"(+https://github.com/BigBodyCobain/Shadowbroker; "
"contact: bigbodycobain@gmail.com)"
)
from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT
_lock = threading.RLock()
_routes_by_callsign: dict[str, dict[str, Any]] = {}
@@ -228,11 +228,15 @@ def _fetch_dm_prekey_bundle_from_public_lookup(lookup_token: str) -> dict[str, A
normalized_peer_url = str(peer_url or "").strip().rstrip("/")
if not normalized_peer_url:
continue
# Generic UA: any peer-facing crypto request should not carry a
# fork-specific identifier — that turns prekey lookups into a
# software-fingerprinting beacon.
from services.network_utils import DEFAULT_USER_AGENT
request = urllib.request.Request(
f"{normalized_peer_url}/api/mesh/dm/prekey-bundle?{encoded}",
headers={
"Accept": "application/json",
"User-Agent": "ShadowBroker-Infonet/0.9 (+https://github.com/BigBodyCobain/Shadowbroker)",
"User-Agent": DEFAULT_USER_AGENT,
},
method="GET",
)
+12 -1
View File
@@ -19,6 +19,17 @@ _retry = Retry(total=1, backoff_factor=0.3, status_forcelist=[502, 503, 504])
_session.mount("https://", HTTPAdapter(max_retries=_retry, pool_maxsize=20))
_session.mount("http://", HTTPAdapter(max_retries=_retry, pool_maxsize=10))
# Default outbound User-Agent. Generic by design — does NOT include any
# personal contact info or a fork-specific repo URL. Operators who run a
# public-facing relay and want to identify themselves to upstreams (e.g.
# for Nominatim / weather.gov usage-policy compliance) can override this
# via the SHADOWBROKER_USER_AGENT env var.
DEFAULT_USER_AGENT = os.environ.get(
"SHADOWBROKER_USER_AGENT",
"ShadowBroker-OSINT/0.9",
)
# Find bash for curl fallback — Git bash's curl has the TLS features
# needed to pass CDN fingerprint checks (brotli, zstd, libpsl)
@@ -73,7 +84,7 @@ def fetch_with_curl(url, method="GET", json_data=None, timeout=15, headers=None,
both Python requests and the barebones Windows system curl.
"""
default_headers = {
"User-Agent": "ShadowBroker-OSINT/0.9.79 (+https://github.com/BigBodyCobain/Shadowbroker; contact: bigbodycobain@gmail.com)",
"User-Agent": DEFAULT_USER_AGENT,
}
if headers:
default_headers.update(headers)
@@ -52,6 +52,24 @@ import pytest
("post", "/api/wormhole/connect", {}),
("post", "/api/layers", {"layers": {"viirs_nightlights": True}}),
("post", "/api/ais/feed", {"msgs": []}),
# Added in post-#227 gap audit:
# /api/wormhole/join also calls bootstrap_wormhole_identity() — same
# identity-takeover surface as /identity/bootstrap. PR #227 hardened
# the latter but missed the former.
("post", "/api/wormhole/join", {}),
# /api/sigint/transmit relays APRS-IS packets over radio using
# operator-supplied credentials. Any caller who reaches this endpoint
# could transmit on the operator's authority. Must be local-only.
(
"post",
"/api/sigint/transmit",
{
"callsign": "N0CALL",
"passcode": "12345",
"target": "NOCALL",
"message": "test",
},
),
],
)
def test_remote_control_surface_rejects_without_local_operator_or_admin(
@@ -0,0 +1,106 @@
"""Third-party fetchers that phone home to politically/commercially
sensitive upstreams must be operator opt-in only.
Companion to ``test_crowdthreat_opt_in.py`` — extends the same default-off
posture to:
* EUvsDisinfo FIMI (``FIMI_ENABLED``)
* Polymarket + Kalshi (``PREDICTION_MARKETS_ENABLED``)
* Finnhub / yfinance financial data (``FINANCIAL_ENABLED`` /
``FINNHUB_API_KEY``)
* NUFORC HuggingFace dataset (``NUFORC_ENABLED``)
Each test asserts that with the env var unset (or set to a falsy value)
the fetcher's network entry point is NOT called.
"""
def _explode(*_args, **_kwargs):
raise AssertionError("upstream called while fetcher was meant to be disabled")
def test_fimi_disabled_by_default_does_not_call_upstream(monkeypatch):
from services.fetchers import _store, fimi
monkeypatch.delenv("FIMI_ENABLED", raising=False)
monkeypatch.setitem(_store.latest_data, "fimi", [{"id": "old"}])
monkeypatch.setattr(fimi, "fetch_with_curl", _explode)
fimi.fetch_fimi()
assert _store.latest_data["fimi"] == []
def test_fimi_falsy_value_does_not_call_upstream(monkeypatch):
from services.fetchers import _store, fimi
monkeypatch.setenv("FIMI_ENABLED", "false")
monkeypatch.setitem(_store.latest_data, "fimi", [{"id": "old"}])
monkeypatch.setattr(fimi, "fetch_with_curl", _explode)
fimi.fetch_fimi()
assert _store.latest_data["fimi"] == []
def test_prediction_markets_disabled_by_default(monkeypatch):
from services.fetchers import _store, prediction_markets
monkeypatch.delenv("PREDICTION_MARKETS_ENABLED", raising=False)
monkeypatch.setitem(_store.latest_data, "prediction_markets", [{"id": "old"}])
monkeypatch.setattr(
prediction_markets, "fetch_prediction_markets_raw", _explode
)
prediction_markets.fetch_prediction_markets()
assert _store.latest_data["prediction_markets"] == []
def test_financial_disabled_when_no_optin_or_api_key(monkeypatch):
"""yfinance fallback path must not run silently — needs FINANCIAL_ENABLED."""
from services.fetchers import _store, financial
monkeypatch.delenv("FINANCIAL_ENABLED", raising=False)
monkeypatch.delenv("FINNHUB_API_KEY", raising=False)
monkeypatch.setitem(_store.latest_data, "financial", {"BTC": {"price": 1}})
monkeypatch.setattr(financial, "_fetch_finnhub_quote", _explode)
monkeypatch.setattr(financial, "_fetch_yfinance_single", _explode)
financial.fetch_financial_markets()
assert _store.latest_data["financial"] == {}
def test_financial_enabled_via_finnhub_api_key(monkeypatch):
"""Presence of FINNHUB_API_KEY counts as explicit opt-in."""
from services.fetchers import financial
monkeypatch.delenv("FINANCIAL_ENABLED", raising=False)
monkeypatch.setenv("FINNHUB_API_KEY", "test-key")
assert financial.financial_fetch_enabled() is True
def test_nuforc_disabled_by_default_skips_download(monkeypatch):
from services.fetchers import nuforc_enrichment
monkeypatch.delenv("NUFORC_ENABLED", raising=False)
monkeypatch.setattr(nuforc_enrichment, "fetch_with_curl", _explode)
result = nuforc_enrichment._download_and_build()
assert result is None
def test_news_default_on_but_killable(monkeypatch):
"""News defaults on (kill switch only), but NEWS_ENABLED=false must disable it."""
from services.fetchers import _store, news
monkeypatch.setenv("NEWS_ENABLED", "false")
monkeypatch.setitem(_store.latest_data, "news", [{"id": "old"}])
monkeypatch.setattr(news, "fetch_with_curl", _explode)
news.fetch_news()
assert _store.latest_data["news"] == []