mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-05-27 01:22:27 +02:00
Merge pull request #232 from BigBodyCobain/security/post-pr227-gap-fixes-v2
[security] Close post-#227 control-surface and fetcher gaps
This commit is contained in:
+33
-1
@@ -24,8 +24,40 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# Requires MESH_DEBUG_MODE=true; do not enable this for ordinary use.
|
||||
# ALLOW_INSECURE_ADMIN=false
|
||||
|
||||
# Default outbound User-Agent for all third-party HTTP fetchers.
|
||||
# Project-generic by default — does NOT include any personal contact info or
|
||||
# operator-specific identifier. Override only if you run a public relay and
|
||||
# want upstreams to be able to reach you (e.g. Nominatim/OSM usage policy).
|
||||
# SHADOWBROKER_USER_AGENT=ShadowBroker-OSINT/0.9 (contact: ops@example.com)
|
||||
|
||||
# User-Agent for Nominatim geocoding requests (per OSM usage policy).
|
||||
# NOMINATIM_USER_AGENT=ShadowBroker/1.0 (https://github.com/BigBodyCobain/Shadowbroker)
|
||||
# NOMINATIM_USER_AGENT=ShadowBroker/1.0
|
||||
|
||||
# ── Third-party fetcher opt-ins ────────────────────────────────
|
||||
# These data sources phone home to politically/commercially sensitive
|
||||
# upstreams. Disabled by default; set to "true" only if the operator
|
||||
# explicitly wants the node's IP to contact these services.
|
||||
#
|
||||
# CrowdThreat — backend.crowdthreat.world (paid threat-intel aggregator).
|
||||
# CROWDTHREAT_ENABLED=false
|
||||
#
|
||||
# EUvsDisinfo FIMI — euvsdisinfo.eu (EU disinformation tracker).
|
||||
# FIMI_ENABLED=false
|
||||
#
|
||||
# Polymarket + Kalshi — US political/election prediction markets.
|
||||
# PREDICTION_MARKETS_ENABLED=false
|
||||
#
|
||||
# Finnhub fallback / yfinance — financial market data.
|
||||
# Set FINNHUB_API_KEY to enable Finnhub, or set FINANCIAL_ENABLED=true to allow
|
||||
# the unauthenticated yfinance fallback to call Yahoo Finance.
|
||||
# FINANCIAL_ENABLED=false
|
||||
#
|
||||
# NUFORC UAP sightings — huggingface.co dataset download.
|
||||
# NUFORC_ENABLED=false
|
||||
#
|
||||
# News RSS aggregator — defaults ON. Set to "false" to disable all
|
||||
# configured news feeds (kill switch for the news layer).
|
||||
# NEWS_ENABLED=true
|
||||
|
||||
# LTA Singapore traffic cameras — leave blank to skip this data source.
|
||||
# LTA_ACCOUNT_KEY=
|
||||
|
||||
@@ -35,7 +35,7 @@ async def thermal_verify(
|
||||
return result
|
||||
|
||||
|
||||
@router.post("/api/sigint/transmit")
|
||||
@router.post("/api/sigint/transmit", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("5/minute")
|
||||
async def sigint_transmit(request: Request):
|
||||
"""Send an APRS-IS message to a specific callsign. Requires ham radio credentials."""
|
||||
|
||||
@@ -589,7 +589,7 @@ async def api_get_wormhole_status(request: Request):
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/wormhole/join")
|
||||
@router.post("/api/wormhole/join", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def api_wormhole_join(request: Request):
|
||||
from services.config import get_settings
|
||||
|
||||
@@ -31,11 +31,7 @@ _S3_NS = "{http://s3.amazonaws.com/doc/2006-03-01/}"
|
||||
_REFRESH_INTERVAL_S = 5 * 24 * 3600
|
||||
_LIST_TIMEOUT_S = 30
|
||||
_DOWNLOAD_TIMEOUT_S = 600
|
||||
_USER_AGENT = (
|
||||
"ShadowBroker-OSINT/0.9.79 "
|
||||
"(+https://github.com/BigBodyCobain/Shadowbroker; "
|
||||
"contact: bigbodycobain@gmail.com)"
|
||||
)
|
||||
from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT
|
||||
|
||||
_lock = threading.RLock()
|
||||
_aircraft_by_hex: dict[str, dict[str, str]] = {}
|
||||
|
||||
@@ -279,9 +279,13 @@ def fetch_weather_alerts():
|
||||
return
|
||||
alerts = []
|
||||
try:
|
||||
# weather.gov requires a User-Agent per their API policy, but it
|
||||
# need not identify the operator. Use a project-generic string and
|
||||
# let the user override via SHADOWBROKER_USER_AGENT if needed.
|
||||
from services.network_utils import DEFAULT_USER_AGENT
|
||||
url = "https://api.weather.gov/alerts/active?status=actual"
|
||||
headers = {
|
||||
"User-Agent": "(ShadowBroker OSINT Dashboard, github.com/BigBodyCobain/Shadowbroker)",
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
"Accept": "application/geo+json",
|
||||
}
|
||||
response = fetch_with_curl(url, timeout=15, headers=headers)
|
||||
|
||||
@@ -5,6 +5,7 @@ debunked claims, threat actor mentions, and target country references.
|
||||
Refreshes every 12 hours (FIMI data updates weekly).
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
@@ -18,6 +19,16 @@ logger = logging.getLogger("services.data_fetcher")
|
||||
|
||||
_FIMI_FEED_URL = "https://euvsdisinfo.eu/feed/"
|
||||
|
||||
|
||||
def fimi_fetch_enabled() -> bool:
|
||||
"""Return True only when the operator explicitly opts into FIMI pulls."""
|
||||
return str(os.environ.get("FIMI_ENABLED", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
# ── Threat actor keywords ──────────────────────────────────────────────────
|
||||
# Map of keyword → canonical actor name. Checked case-insensitively.
|
||||
_THREAT_ACTORS: dict[str, str] = {
|
||||
@@ -173,6 +184,12 @@ def _is_major_wave(narratives: list[dict], targets: dict[str, int]) -> bool:
|
||||
@with_retry(max_retries=1, base_delay=5)
|
||||
def fetch_fimi():
|
||||
"""Fetch and parse the EUvsDisinfo RSS feed."""
|
||||
if not fimi_fetch_enabled():
|
||||
logger.debug("FIMI fetch skipped; set FIMI_ENABLED=true to opt in")
|
||||
with _data_lock:
|
||||
latest_data["fimi"] = []
|
||||
_mark_fresh("fimi")
|
||||
return
|
||||
try:
|
||||
resp = fetch_with_curl(_FIMI_FEED_URL, timeout=15)
|
||||
feed = feedparser.parse(resp.text)
|
||||
|
||||
@@ -82,10 +82,37 @@ def _fetch_yfinance_single(symbol: str, period: str = "2d"):
|
||||
|
||||
|
||||
@with_retry(max_retries=1, base_delay=1)
|
||||
def financial_fetch_enabled() -> bool:
|
||||
"""Return True only when the operator explicitly opts into financial pulls.
|
||||
|
||||
Either ``FINANCIAL_ENABLED=true`` or the presence of ``FINNHUB_API_KEY``
|
||||
counts as an explicit opt-in. Without either, the default yfinance path
|
||||
is disabled to avoid silent outbound calls to finance.yahoo.com.
|
||||
"""
|
||||
if os.getenv("FINNHUB_API_KEY", "").strip():
|
||||
return True
|
||||
return str(os.environ.get("FINANCIAL_ENABLED", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
|
||||
def fetch_financial_markets():
|
||||
"""Fetches full market list with smart throttling (3s for Finnhub, 60s for yfinance)."""
|
||||
global _last_fetch_time, _last_fetch_results, _rotating_index
|
||||
|
||||
|
||||
if not financial_fetch_enabled():
|
||||
logger.debug(
|
||||
"Financial fetch skipped; set FINANCIAL_ENABLED=true or supply "
|
||||
"FINNHUB_API_KEY to opt in"
|
||||
)
|
||||
with _data_lock:
|
||||
latest_data["financial"] = {}
|
||||
_mark_fresh("financial")
|
||||
return
|
||||
|
||||
finnhub_key = os.getenv("FINNHUB_API_KEY", "").strip()
|
||||
use_finnhub = bool(finnhub_key)
|
||||
|
||||
|
||||
@@ -182,7 +182,8 @@ def fetch_meshtastic_nodes():
|
||||
callsign = str(getattr(get_settings(), "MESHTASTIC_OPERATOR_CALLSIGN", "") or "").strip()
|
||||
except Exception:
|
||||
callsign = ""
|
||||
ua_base = "ShadowBroker-OSINT/0.9.79 (+https://github.com/BigBodyCobain/Shadowbroker; contact: bigbodycobain@gmail.com; 24h polling)"
|
||||
from services.network_utils import DEFAULT_USER_AGENT
|
||||
ua_base = f"{DEFAULT_USER_AGENT}; 24h polling"
|
||||
user_agent = f"{ua_base}; node={callsign}" if callsign else ua_base
|
||||
|
||||
try:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""News fetching, geocoding, clustering, and risk assessment."""
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import logging
|
||||
@@ -11,6 +12,22 @@ from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
from services.fetchers.retry import with_retry
|
||||
from services.oracle_service import enrich_news_items, compute_global_threat_level, detect_breaking_events
|
||||
|
||||
|
||||
def news_fetch_enabled() -> bool:
|
||||
"""Return True only when the operator explicitly opts into news RSS pulls.
|
||||
|
||||
Defaults to **on** for backward compatibility (this is the only fetcher
|
||||
where opting out is the new behavior, not the old one). Set
|
||||
``NEWS_ENABLED=false`` to disable all outbound RSS feed traffic.
|
||||
"""
|
||||
return str(os.environ.get("NEWS_ENABLED", "true")).strip().lower() not in {
|
||||
"0",
|
||||
"false",
|
||||
"no",
|
||||
"off",
|
||||
"",
|
||||
}
|
||||
|
||||
logger = logging.getLogger("services.data_fetcher")
|
||||
|
||||
# Maximum article age in seconds. Anything older than this is dropped
|
||||
@@ -160,6 +177,12 @@ def _resolve_coords(text: str) -> tuple[float, float] | None:
|
||||
|
||||
@with_retry(max_retries=1, base_delay=2)
|
||||
def fetch_news():
|
||||
if not news_fetch_enabled():
|
||||
logger.debug("News fetch skipped; unset NEWS_ENABLED=false to re-enable")
|
||||
with _data_lock:
|
||||
latest_data["news"] = []
|
||||
_mark_fresh("news")
|
||||
return
|
||||
from services.news_feed_config import get_feeds
|
||||
feed_config = get_feeds()
|
||||
feeds = {f["name"]: f["url"] for f in feed_config}
|
||||
|
||||
@@ -49,6 +49,16 @@ _HF_CSV_URL = (
|
||||
"https://huggingface.co/datasets/kcimc/NUFORC/resolve/main/nuforc_str.csv"
|
||||
)
|
||||
|
||||
|
||||
def nuforc_fetch_enabled() -> bool:
|
||||
"""Return True only when the operator explicitly opts into NUFORC pulls."""
|
||||
return str(os.environ.get("NUFORC_ENABLED", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
# Only keep sightings from the last N years for the enrichment index
|
||||
_KEEP_YEARS = 5
|
||||
|
||||
@@ -160,6 +170,12 @@ def _download_and_build() -> dict | None:
|
||||
|
||||
Returns the index dict or None on failure.
|
||||
"""
|
||||
if not nuforc_fetch_enabled():
|
||||
logger.debug(
|
||||
"NUFORC enrichment skipped; set NUFORC_ENABLED=true to opt in"
|
||||
)
|
||||
return None
|
||||
|
||||
cutoff = datetime.utcnow() - timedelta(days=_KEEP_YEARS * 365)
|
||||
cutoff_str = cutoff.strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
@@ -25,6 +25,16 @@ _provider_pace_lock = threading.Lock()
|
||||
_provider_last_request_at: dict[str, float] = {}
|
||||
|
||||
|
||||
def prediction_markets_fetch_enabled() -> bool:
|
||||
"""Return True only when the operator explicitly opts into Polymarket/Kalshi pulls."""
|
||||
return str(os.environ.get("PREDICTION_MARKETS_ENABLED", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
|
||||
def _pace_provider(provider: str, min_interval_s: float) -> None:
|
||||
if min_interval_s <= 0:
|
||||
return
|
||||
@@ -755,6 +765,16 @@ def fetch_prediction_markets():
|
||||
from services.fetchers._store import latest_data, _data_lock, _mark_fresh
|
||||
global _prev_probabilities
|
||||
|
||||
if not prediction_markets_fetch_enabled():
|
||||
logger.debug(
|
||||
"Prediction markets fetch skipped; set "
|
||||
"PREDICTION_MARKETS_ENABLED=true to opt in"
|
||||
)
|
||||
with _data_lock:
|
||||
latest_data["prediction_markets"] = []
|
||||
_mark_fresh("prediction_markets")
|
||||
return
|
||||
|
||||
markets = fetch_prediction_markets_raw()
|
||||
|
||||
# Compute probability deltas vs previous fetch
|
||||
|
||||
@@ -24,11 +24,7 @@ _AIRPORTS_URL = "https://vrs-standing-data.adsb.lol/airports.csv.gz"
|
||||
_REFRESH_INTERVAL_S = 5 * 24 * 3600
|
||||
_HTTP_TIMEOUT_S = 60
|
||||
|
||||
_USER_AGENT = (
|
||||
"ShadowBroker-OSINT/0.9.79 "
|
||||
"(+https://github.com/BigBodyCobain/Shadowbroker; "
|
||||
"contact: bigbodycobain@gmail.com)"
|
||||
)
|
||||
from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT
|
||||
|
||||
_lock = threading.RLock()
|
||||
_routes_by_callsign: dict[str, dict[str, Any]] = {}
|
||||
|
||||
@@ -228,11 +228,15 @@ def _fetch_dm_prekey_bundle_from_public_lookup(lookup_token: str) -> dict[str, A
|
||||
normalized_peer_url = str(peer_url or "").strip().rstrip("/")
|
||||
if not normalized_peer_url:
|
||||
continue
|
||||
# Generic UA: any peer-facing crypto request should not carry a
|
||||
# fork-specific identifier — that turns prekey lookups into a
|
||||
# software-fingerprinting beacon.
|
||||
from services.network_utils import DEFAULT_USER_AGENT
|
||||
request = urllib.request.Request(
|
||||
f"{normalized_peer_url}/api/mesh/dm/prekey-bundle?{encoded}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "ShadowBroker-Infonet/0.9 (+https://github.com/BigBodyCobain/Shadowbroker)",
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
},
|
||||
method="GET",
|
||||
)
|
||||
|
||||
@@ -19,6 +19,17 @@ _retry = Retry(total=1, backoff_factor=0.3, status_forcelist=[502, 503, 504])
|
||||
_session.mount("https://", HTTPAdapter(max_retries=_retry, pool_maxsize=20))
|
||||
_session.mount("http://", HTTPAdapter(max_retries=_retry, pool_maxsize=10))
|
||||
|
||||
|
||||
# Default outbound User-Agent. Generic by design — does NOT include any
|
||||
# personal contact info or a fork-specific repo URL. Operators who run a
|
||||
# public-facing relay and want to identify themselves to upstreams (e.g.
|
||||
# for Nominatim / weather.gov usage-policy compliance) can override this
|
||||
# via the SHADOWBROKER_USER_AGENT env var.
|
||||
DEFAULT_USER_AGENT = os.environ.get(
|
||||
"SHADOWBROKER_USER_AGENT",
|
||||
"ShadowBroker-OSINT/0.9",
|
||||
)
|
||||
|
||||
# Find bash for curl fallback — Git bash's curl has the TLS features
|
||||
# needed to pass CDN fingerprint checks (brotli, zstd, libpsl)
|
||||
|
||||
@@ -73,7 +84,7 @@ def fetch_with_curl(url, method="GET", json_data=None, timeout=15, headers=None,
|
||||
both Python requests and the barebones Windows system curl.
|
||||
"""
|
||||
default_headers = {
|
||||
"User-Agent": "ShadowBroker-OSINT/0.9.79 (+https://github.com/BigBodyCobain/Shadowbroker; contact: bigbodycobain@gmail.com)",
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
}
|
||||
if headers:
|
||||
default_headers.update(headers)
|
||||
|
||||
@@ -52,6 +52,24 @@ import pytest
|
||||
("post", "/api/wormhole/connect", {}),
|
||||
("post", "/api/layers", {"layers": {"viirs_nightlights": True}}),
|
||||
("post", "/api/ais/feed", {"msgs": []}),
|
||||
# Added in post-#227 gap audit:
|
||||
# /api/wormhole/join also calls bootstrap_wormhole_identity() — same
|
||||
# identity-takeover surface as /identity/bootstrap. PR #227 hardened
|
||||
# the latter but missed the former.
|
||||
("post", "/api/wormhole/join", {}),
|
||||
# /api/sigint/transmit relays APRS-IS packets over radio using
|
||||
# operator-supplied credentials. Any caller who reaches this endpoint
|
||||
# could transmit on the operator's authority. Must be local-only.
|
||||
(
|
||||
"post",
|
||||
"/api/sigint/transmit",
|
||||
{
|
||||
"callsign": "N0CALL",
|
||||
"passcode": "12345",
|
||||
"target": "NOCALL",
|
||||
"message": "test",
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_remote_control_surface_rejects_without_local_operator_or_admin(
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
"""Third-party fetchers that phone home to politically/commercially
|
||||
sensitive upstreams must be operator opt-in only.
|
||||
|
||||
Companion to ``test_crowdthreat_opt_in.py`` — extends the same default-off
|
||||
posture to:
|
||||
|
||||
* EUvsDisinfo FIMI (``FIMI_ENABLED``)
|
||||
* Polymarket + Kalshi (``PREDICTION_MARKETS_ENABLED``)
|
||||
* Finnhub / yfinance financial data (``FINANCIAL_ENABLED`` /
|
||||
``FINNHUB_API_KEY``)
|
||||
* NUFORC HuggingFace dataset (``NUFORC_ENABLED``)
|
||||
|
||||
Each test asserts that with the env var unset (or set to a falsy value)
|
||||
the fetcher's network entry point is NOT called.
|
||||
"""
|
||||
|
||||
|
||||
def _explode(*_args, **_kwargs):
|
||||
raise AssertionError("upstream called while fetcher was meant to be disabled")
|
||||
|
||||
|
||||
def test_fimi_disabled_by_default_does_not_call_upstream(monkeypatch):
|
||||
from services.fetchers import _store, fimi
|
||||
|
||||
monkeypatch.delenv("FIMI_ENABLED", raising=False)
|
||||
monkeypatch.setitem(_store.latest_data, "fimi", [{"id": "old"}])
|
||||
monkeypatch.setattr(fimi, "fetch_with_curl", _explode)
|
||||
|
||||
fimi.fetch_fimi()
|
||||
|
||||
assert _store.latest_data["fimi"] == []
|
||||
|
||||
|
||||
def test_fimi_falsy_value_does_not_call_upstream(monkeypatch):
|
||||
from services.fetchers import _store, fimi
|
||||
|
||||
monkeypatch.setenv("FIMI_ENABLED", "false")
|
||||
monkeypatch.setitem(_store.latest_data, "fimi", [{"id": "old"}])
|
||||
monkeypatch.setattr(fimi, "fetch_with_curl", _explode)
|
||||
|
||||
fimi.fetch_fimi()
|
||||
|
||||
assert _store.latest_data["fimi"] == []
|
||||
|
||||
|
||||
def test_prediction_markets_disabled_by_default(monkeypatch):
|
||||
from services.fetchers import _store, prediction_markets
|
||||
|
||||
monkeypatch.delenv("PREDICTION_MARKETS_ENABLED", raising=False)
|
||||
monkeypatch.setitem(_store.latest_data, "prediction_markets", [{"id": "old"}])
|
||||
monkeypatch.setattr(
|
||||
prediction_markets, "fetch_prediction_markets_raw", _explode
|
||||
)
|
||||
|
||||
prediction_markets.fetch_prediction_markets()
|
||||
|
||||
assert _store.latest_data["prediction_markets"] == []
|
||||
|
||||
|
||||
def test_financial_disabled_when_no_optin_or_api_key(monkeypatch):
|
||||
"""yfinance fallback path must not run silently — needs FINANCIAL_ENABLED."""
|
||||
from services.fetchers import _store, financial
|
||||
|
||||
monkeypatch.delenv("FINANCIAL_ENABLED", raising=False)
|
||||
monkeypatch.delenv("FINNHUB_API_KEY", raising=False)
|
||||
monkeypatch.setitem(_store.latest_data, "financial", {"BTC": {"price": 1}})
|
||||
monkeypatch.setattr(financial, "_fetch_finnhub_quote", _explode)
|
||||
monkeypatch.setattr(financial, "_fetch_yfinance_single", _explode)
|
||||
|
||||
financial.fetch_financial_markets()
|
||||
|
||||
assert _store.latest_data["financial"] == {}
|
||||
|
||||
|
||||
def test_financial_enabled_via_finnhub_api_key(monkeypatch):
|
||||
"""Presence of FINNHUB_API_KEY counts as explicit opt-in."""
|
||||
from services.fetchers import financial
|
||||
|
||||
monkeypatch.delenv("FINANCIAL_ENABLED", raising=False)
|
||||
monkeypatch.setenv("FINNHUB_API_KEY", "test-key")
|
||||
|
||||
assert financial.financial_fetch_enabled() is True
|
||||
|
||||
|
||||
def test_nuforc_disabled_by_default_skips_download(monkeypatch):
|
||||
from services.fetchers import nuforc_enrichment
|
||||
|
||||
monkeypatch.delenv("NUFORC_ENABLED", raising=False)
|
||||
monkeypatch.setattr(nuforc_enrichment, "fetch_with_curl", _explode)
|
||||
|
||||
result = nuforc_enrichment._download_and_build()
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_news_default_on_but_killable(monkeypatch):
|
||||
"""News defaults on (kill switch only), but NEWS_ENABLED=false must disable it."""
|
||||
from services.fetchers import _store, news
|
||||
|
||||
monkeypatch.setenv("NEWS_ENABLED", "false")
|
||||
monkeypatch.setitem(_store.latest_data, "news", [{"id": "old"}])
|
||||
monkeypatch.setattr(news, "fetch_with_curl", _explode)
|
||||
|
||||
news.fetch_news()
|
||||
|
||||
assert _store.latest_data["news"] == []
|
||||
Reference in New Issue
Block a user