mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-09 15:53:56 +02:00
feat: Telegram OSINT map layer, Osiris intel ports, and maritime settings
Add Telegram OSINT with hourly incremental t.me scraping, metro geocoding separate from news centroids, threat-intercept popup UI with inline media, and HTML markers above alert boxes so pins stay clickable. Expose GFW_API_TOKEN in onboarding and Settings Maritime; harden GFW/CCTV/geo fetchers. Port Osiris- derived recon, SCM, entity graph, malware/cyber feeds, sanctions, and submarine cable layers with tests and documentation. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -100,6 +100,19 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# configured news feeds (kill switch for the news layer).
|
||||
# NEWS_ENABLED=true
|
||||
|
||||
# Global Fishing Watch — fishing vessel activity events (Fishing Activity map layer).
|
||||
# Free API token from https://globalfishingwatch.org/our-apis/tokens
|
||||
# Without this the fishing_activity layer stays empty.
|
||||
# GFW_API_TOKEN=
|
||||
# Optional tuning — GFW can return 40k+ global events; defaults cap fetch for map paint.
|
||||
# GFW_EVENTS_PAGE_SIZE=500
|
||||
# GFW_EVENTS_MAX_PAGES=10
|
||||
# GFW_EVENTS_LOOKBACK_DAYS=7
|
||||
# GFW_EVENTS_TIMEOUT_S=90
|
||||
|
||||
# Windy Webcams global CCTV layer — free key from https://api.windy.com/webcams/docs
|
||||
# WINDY_API_KEY=
|
||||
|
||||
# LTA Singapore traffic cameras — leave blank to skip this data source.
|
||||
# LTA_ACCOUNT_KEY=
|
||||
|
||||
|
||||
+96
-2
@@ -366,6 +366,10 @@ ai_intel_router = _load_optional_router("routers.ai_intel")
|
||||
sar_router = _load_optional_router("routers.sar")
|
||||
infonet_router = _load_optional_router("routers.infonet")
|
||||
road_corridors_router = _load_optional_router("routers.road_corridors")
|
||||
osint_router = _load_optional_router("routers.osint")
|
||||
scm_router = _load_optional_router("routers.scm")
|
||||
entity_graph_router = _load_optional_router("routers.entity_graph")
|
||||
intel_feeds_router = _load_optional_router("routers.intel_feeds")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -3643,6 +3647,10 @@ app.include_router(ai_intel_router)
|
||||
app.include_router(sar_router)
|
||||
app.include_router(infonet_router)
|
||||
app.include_router(road_corridors_router)
|
||||
app.include_router(osint_router)
|
||||
app.include_router(scm_router)
|
||||
app.include_router(entity_graph_router)
|
||||
app.include_router(intel_feeds_router)
|
||||
|
||||
from services.data_fetcher import update_all_data
|
||||
|
||||
@@ -3774,6 +3782,8 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
old_mesh = is_any_active("sigint_meshtastic")
|
||||
old_aprs = is_any_active("sigint_aprs")
|
||||
old_viirs = is_any_active("viirs_nightlights")
|
||||
old_datacenters = is_any_active("datacenters")
|
||||
old_fishing = is_any_active("fishing_activity")
|
||||
|
||||
# Update only known keys
|
||||
changed = False
|
||||
@@ -3792,6 +3802,8 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
new_mesh = is_any_active("sigint_meshtastic")
|
||||
new_aprs = is_any_active("sigint_aprs")
|
||||
new_viirs = is_any_active("viirs_nightlights")
|
||||
new_datacenters = is_any_active("datacenters")
|
||||
new_fishing = is_any_active("fishing_activity")
|
||||
|
||||
# Start/stop AIS stream on transition
|
||||
if old_ships and not new_ships:
|
||||
@@ -3847,6 +3859,18 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
_queue_viirs_change_refresh()
|
||||
logger.info("VIIRS change refresh queued (layer enabled)")
|
||||
|
||||
if not old_datacenters and new_datacenters:
|
||||
from services.fetchers.infrastructure import fetch_datacenters
|
||||
|
||||
fetch_datacenters()
|
||||
logger.info("Datacenters loaded (layer enabled)")
|
||||
|
||||
if not old_fishing and new_fishing:
|
||||
from services.fetchers.geo import fetch_fishing_activity
|
||||
|
||||
fetch_fishing_activity()
|
||||
logger.info("Fishing activity refresh queued (layer enabled)")
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@@ -7834,6 +7858,8 @@ _CCTV_PROXY_ALLOWED_HOSTS = {
|
||||
"www.tripcheck.com",
|
||||
"infocar.dgt.es", # Spain DGT
|
||||
"informo.madrid.es", # Madrid
|
||||
"webcams2.asfinag.at", # Austria ASFINAG motorway cameras
|
||||
"odo.asfinag.at", # ASFINAG catalog API host
|
||||
"www.windy.com",
|
||||
"imgproxy.windy.com", # Windy preview image CDN
|
||||
"www.lakecountypassage.com", # Illinois Lake County PASSAGE snapshots
|
||||
@@ -7842,6 +7868,14 @@ _CCTV_PROXY_ALLOWED_HOSTS = {
|
||||
"www.nps.gov", # WSDOT-linked Mount Rainier camera
|
||||
"home.lewiscounty.com", # WSDOT partner public camera
|
||||
"www.seattle.gov", # Seattle traffic camera media linked from WSDOT
|
||||
"511on.ca", # Ontario 511 cameras
|
||||
"511.alberta.ca", # Alberta 511 cameras
|
||||
"fl511.com", # Florida 511 cameras
|
||||
"www.fl511.com",
|
||||
"webcams.transport.nsw.gov.au", # NSW Live Traffic camera snapshots
|
||||
"www.livetraffic.com",
|
||||
"livetraffic.com",
|
||||
"opendata.ndw.nu", # Netherlands RWS legacy open-data host
|
||||
}
|
||||
|
||||
|
||||
@@ -7937,7 +7971,7 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
cache_seconds=15,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "http://navigator-c2c.dot.ga.gov/",
|
||||
"Referer": "https://navigator-c2c.dot.ga.gov/",
|
||||
},
|
||||
)
|
||||
if host == "511ga.org":
|
||||
@@ -7957,7 +7991,7 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
cache_seconds=10,
|
||||
headers={
|
||||
"Accept": "application/vnd.apple.mpegurl,application/x-mpegURL,video/*,*/*;q=0.8",
|
||||
"Referer": "http://navigator-c2c.dot.ga.gov/",
|
||||
"Referer": "https://navigator-c2c.dot.ga.gov/",
|
||||
},
|
||||
)
|
||||
if host in {"gettingaroundillinois.com", "cctv.travelmidwest.com"}:
|
||||
@@ -8039,6 +8073,16 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
"Referer": "https://informo.madrid.es/",
|
||||
},
|
||||
)
|
||||
if host in {"webcams2.asfinag.at", "odo.asfinag.at"}:
|
||||
return _CCTVProxyProfile(
|
||||
name="asfinag-austria",
|
||||
timeout=(5.0, 15.0),
|
||||
cache_seconds=60,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.asfinag.at/",
|
||||
},
|
||||
)
|
||||
if host in {"www.windy.com", "imgproxy.windy.com"}:
|
||||
return _CCTVProxyProfile(
|
||||
name="windy-webcams",
|
||||
@@ -8049,6 +8093,56 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
"Referer": "https://www.windy.com/",
|
||||
},
|
||||
)
|
||||
if host == "511on.ca":
|
||||
return _CCTVProxyProfile(
|
||||
name="ontario-511",
|
||||
timeout=(5.0, 15.0),
|
||||
cache_seconds=30,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://511on.ca/",
|
||||
},
|
||||
)
|
||||
if host == "511.alberta.ca":
|
||||
return _CCTVProxyProfile(
|
||||
name="alberta-511",
|
||||
timeout=(5.0, 15.0),
|
||||
cache_seconds=30,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://511.alberta.ca/",
|
||||
},
|
||||
)
|
||||
if host in {"fl511.com", "www.fl511.com"}:
|
||||
return _CCTVProxyProfile(
|
||||
name="florida-511",
|
||||
timeout=(5.0, 15.0),
|
||||
cache_seconds=30,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://fl511.com/",
|
||||
},
|
||||
)
|
||||
if host == "webcams.transport.nsw.gov.au":
|
||||
return _CCTVProxyProfile(
|
||||
name="nsw-live-traffic",
|
||||
timeout=(5.0, 12.0),
|
||||
cache_seconds=60,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.livetraffic.com/",
|
||||
},
|
||||
)
|
||||
if host in {"opendata.ndw.nu", "www.ndw.nu"}:
|
||||
return _CCTVProxyProfile(
|
||||
name="ndw-netherlands",
|
||||
timeout=(5.0, 12.0),
|
||||
cache_seconds=120,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.ndw.nu/",
|
||||
},
|
||||
)
|
||||
if host in {
|
||||
"webcam.forkswa.com",
|
||||
"webcam.sunmountainlodge.com",
|
||||
|
||||
+36
-2
@@ -47,6 +47,8 @@ _CCTV_PROXY_ALLOWED_HOSTS = {
|
||||
"www.tripcheck.com",
|
||||
"infocar.dgt.es",
|
||||
"informo.madrid.es",
|
||||
"webcams2.asfinag.at",
|
||||
"odo.asfinag.at",
|
||||
"www.windy.com",
|
||||
"imgproxy.windy.com",
|
||||
"www.lakecountypassage.com",
|
||||
@@ -55,6 +57,14 @@ _CCTV_PROXY_ALLOWED_HOSTS = {
|
||||
"www.nps.gov",
|
||||
"home.lewiscounty.com",
|
||||
"www.seattle.gov",
|
||||
"511on.ca",
|
||||
"511.alberta.ca",
|
||||
"fl511.com",
|
||||
"www.fl511.com",
|
||||
"webcams.transport.nsw.gov.au",
|
||||
"www.livetraffic.com",
|
||||
"livetraffic.com",
|
||||
"opendata.ndw.nu",
|
||||
}
|
||||
|
||||
|
||||
@@ -120,7 +130,7 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
read_timeout = 18.0 if "/snapshots/" in path else 12.0
|
||||
return _CCTVProxyProfile(name="gdot-snapshot", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, read_timeout), cache_seconds=15,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "http://navigator-c2c.dot.ga.gov/"})
|
||||
"Referer": "https://navigator-c2c.dot.ga.gov/"})
|
||||
if host == "511ga.org":
|
||||
return _CCTVProxyProfile(name="gdot-511ga-image", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=15,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
@@ -128,7 +138,7 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
if host.startswith("vss") and host.endswith("dot.ga.gov"):
|
||||
return _CCTVProxyProfile(name="gdot-hls", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 20.0), cache_seconds=10,
|
||||
headers={"Accept": "application/vnd.apple.mpegurl,application/x-mpegURL,video/*,*/*;q=0.8",
|
||||
"Referer": "http://navigator-c2c.dot.ga.gov/"})
|
||||
"Referer": "https://navigator-c2c.dot.ga.gov/"})
|
||||
if host in {"gettingaroundillinois.com", "cctv.travelmidwest.com"}:
|
||||
return _CCTVProxyProfile(name="illinois-dot", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8"})
|
||||
@@ -156,10 +166,34 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
return _CCTVProxyProfile(name="madrid-city", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://informo.madrid.es/"})
|
||||
if host in {"webcams2.asfinag.at", "odo.asfinag.at"}:
|
||||
return _CCTVProxyProfile(name="asfinag-austria", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 15.0), cache_seconds=60,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.asfinag.at/"})
|
||||
if host in {"www.windy.com", "imgproxy.windy.com"}:
|
||||
return _CCTVProxyProfile(name="windy-webcams", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=60,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.windy.com/"})
|
||||
if host == "511on.ca":
|
||||
return _CCTVProxyProfile(name="ontario-511", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 15.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://511on.ca/"})
|
||||
if host == "511.alberta.ca":
|
||||
return _CCTVProxyProfile(name="alberta-511", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 15.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://511.alberta.ca/"})
|
||||
if host in {"fl511.com", "www.fl511.com"}:
|
||||
return _CCTVProxyProfile(name="florida-511", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 15.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://fl511.com/"})
|
||||
if host == "webcams.transport.nsw.gov.au":
|
||||
return _CCTVProxyProfile(name="nsw-live-traffic", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=60,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.livetraffic.com/"})
|
||||
if host in {"opendata.ndw.nu", "www.ndw.nu"}:
|
||||
return _CCTVProxyProfile(name="ndw-netherlands", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=120,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.ndw.nu/"})
|
||||
return _CCTVProxyProfile(name="generic-cctv", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 8.0), cache_seconds=30,
|
||||
headers={"Accept": "*/*"})
|
||||
|
||||
|
||||
@@ -502,6 +502,8 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
old_mesh = is_any_active("sigint_meshtastic")
|
||||
old_aprs = is_any_active("sigint_aprs")
|
||||
old_viirs = is_any_active("viirs_nightlights")
|
||||
old_datacenters = is_any_active("datacenters")
|
||||
old_fishing = is_any_active("fishing_activity")
|
||||
changed = False
|
||||
for key, value in update.layers.items():
|
||||
if key in active_layers:
|
||||
@@ -514,6 +516,8 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
new_mesh = is_any_active("sigint_meshtastic")
|
||||
new_aprs = is_any_active("sigint_aprs")
|
||||
new_viirs = is_any_active("viirs_nightlights")
|
||||
new_datacenters = is_any_active("datacenters")
|
||||
new_fishing = is_any_active("fishing_activity")
|
||||
if old_ships and not new_ships:
|
||||
from services.ais_stream import stop_ais_stream
|
||||
stop_ais_stream()
|
||||
@@ -557,6 +561,16 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
if not old_viirs and new_viirs:
|
||||
_queue_viirs_change_refresh()
|
||||
logger.info("VIIRS change refresh queued (layer enabled)")
|
||||
if not old_datacenters and new_datacenters:
|
||||
from services.fetchers.infrastructure import fetch_datacenters
|
||||
|
||||
fetch_datacenters()
|
||||
logger.info("Datacenters loaded (layer enabled)")
|
||||
if not old_fishing and new_fishing:
|
||||
from services.fetchers.geo import fetch_fishing_activity
|
||||
|
||||
fetch_fishing_activity()
|
||||
logger.info("Fishing activity refresh queued (layer enabled)")
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@@ -759,6 +773,7 @@ async def live_data_slow(
|
||||
"scanners", "weather_alerts", "ukraine_alerts", "air_quality", "volcanoes",
|
||||
"fishing_activity", "psk_reporter", "correlations", "uap_sightings", "wastewater",
|
||||
"crowdthreat", "threat_level", "trending_markets", "road_corridor_trends",
|
||||
"malware_threats", "cyber_threats", "scm_suppliers", "telegram_osint",
|
||||
)
|
||||
freshness = get_source_timestamps_snapshot()
|
||||
payload = {
|
||||
@@ -804,6 +819,26 @@ async def live_data_slow(
|
||||
)
|
||||
if active_layers.get("road_corridor_trends", False)
|
||||
else {"updated_at": None, "corridors": []},
|
||||
"malware_threats": (
|
||||
d.get("malware_threats") or {"threats": [], "total": 0}
|
||||
)
|
||||
if active_layers.get("malware_c2", False)
|
||||
else {"threats": [], "total": 0},
|
||||
"cyber_threats": (
|
||||
d.get("cyber_threats") or {"threats": [], "stats": {}}
|
||||
)
|
||||
if active_layers.get("cyber_threats", False)
|
||||
else {"threats": [], "stats": {}},
|
||||
"scm_suppliers": (
|
||||
d.get("scm_suppliers") or {"suppliers": [], "total": 0, "critical_count": 0}
|
||||
)
|
||||
if active_layers.get("scm_suppliers", False)
|
||||
else {"suppliers": [], "total": 0, "critical_count": 0},
|
||||
"telegram_osint": (
|
||||
d.get("telegram_osint") or {"posts": [], "total": 0, "geolocated": 0}
|
||||
)
|
||||
if active_layers.get("telegram_osint", True)
|
||||
else {"posts": [], "total": 0, "geolocated": 0},
|
||||
"freshness": freshness,
|
||||
}
|
||||
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
"""Entity graph expansion (intel layer)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
|
||||
from auth import require_local_operator
|
||||
from limiter import limiter
|
||||
from services.osint_intel.resolve import resolve_entity
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/api/entity/expand")
|
||||
@limiter.limit("30/minute")
|
||||
async def entity_expand(
|
||||
request: Request,
|
||||
_: None = Depends(require_local_operator),
|
||||
type: str = Query(..., min_length=3, max_length=32),
|
||||
id: str = Query(..., min_length=2, max_length=200),
|
||||
registration: str | None = Query(default=None, max_length=32),
|
||||
model: str | None = Query(default=None, max_length=64),
|
||||
icao24: str | None = Query(default=None, max_length=16),
|
||||
) -> dict:
|
||||
props = {"label": id, "registration": registration, "model": model, "icao24": icao24}
|
||||
try:
|
||||
return resolve_entity(type, id, props)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail="Intelligence layer unavailable") from exc
|
||||
@@ -0,0 +1,122 @@
|
||||
"""Malware, cyber threats, and country risk feeds."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from fastapi import APIRouter, HTTPException, Query, Request
|
||||
from fastapi.responses import StreamingResponse
|
||||
from starlette.background import BackgroundTask
|
||||
|
||||
from limiter import limiter
|
||||
from services.fetchers._store import get_latest_data_subset_refs
|
||||
from services.fetchers.telegram_osint import telegram_media_host_allowed
|
||||
from services.intel_feeds.country_risk import build_country_risk_payload
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/api/malware")
|
||||
@limiter.limit("60/minute")
|
||||
async def malware_feed(request: Request) -> dict:
|
||||
snap = get_latest_data_subset_refs("malware_threats")
|
||||
payload = snap.get("malware_threats")
|
||||
if isinstance(payload, dict) and payload.get("threats") is not None:
|
||||
return payload
|
||||
return {"threats": [], "total": 0, "timestamp": None, "source": "abuse.ch"}
|
||||
|
||||
|
||||
@router.get("/api/cyber-threats")
|
||||
@limiter.limit("60/minute")
|
||||
async def cyber_threats(request: Request) -> dict:
|
||||
snap = get_latest_data_subset_refs("cyber_threats")
|
||||
return snap.get("cyber_threats") or {"threats": [], "stats": {}}
|
||||
|
||||
|
||||
@router.get("/api/country-risk")
|
||||
@limiter.limit("30/minute")
|
||||
async def country_risk(request: Request) -> dict:
|
||||
return build_country_risk_payload()
|
||||
|
||||
|
||||
@router.get("/api/telegram-feed")
|
||||
@limiter.limit("30/minute")
|
||||
async def telegram_feed(request: Request) -> dict:
|
||||
snap = get_latest_data_subset_refs("telegram_osint")
|
||||
payload = snap.get("telegram_osint")
|
||||
if isinstance(payload, dict) and payload.get("posts") is not None:
|
||||
return payload
|
||||
return {"posts": [], "total": 0, "geolocated": 0, "timestamp": None}
|
||||
|
||||
|
||||
def _infer_telegram_media_type(target_url: str, content_type: str) -> str:
|
||||
clean_type = str(content_type or "").split(";", 1)[0].strip().lower()
|
||||
if clean_type and clean_type not in {"application/octet-stream", "binary/octet-stream"}:
|
||||
return content_type
|
||||
path = str(urlparse(target_url).path or "").lower()
|
||||
if path.endswith((".jpg", ".jpeg")):
|
||||
return "image/jpeg"
|
||||
if path.endswith(".png"):
|
||||
return "image/png"
|
||||
if path.endswith(".webp"):
|
||||
return "image/webp"
|
||||
if path.endswith(".gif"):
|
||||
return "image/gif"
|
||||
if path.endswith(".mp4"):
|
||||
return "video/mp4"
|
||||
if path.endswith(".webm"):
|
||||
return "video/webm"
|
||||
return content_type or "application/octet-stream"
|
||||
|
||||
|
||||
@router.get("/api/telegram/media")
|
||||
@limiter.limit("60/minute")
|
||||
async def telegram_media_proxy(request: Request, url: str = Query(...)) -> StreamingResponse:
|
||||
"""Stream Telegram CDN media for in-app playback (host allowlist only)."""
|
||||
parsed = urlparse(url)
|
||||
if parsed.scheme not in ("http", "https"):
|
||||
raise HTTPException(status_code=400, detail="Invalid scheme")
|
||||
if not telegram_media_host_allowed(parsed.hostname):
|
||||
raise HTTPException(status_code=403, detail="Host not allowed")
|
||||
|
||||
headers = {
|
||||
"User-Agent": (
|
||||
f"Mozilla/5.0 (compatible; {outbound_user_agent('telegram-media')}) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "*/*",
|
||||
}
|
||||
if range_header := request.headers.get("range"):
|
||||
headers["Range"] = range_header
|
||||
|
||||
try:
|
||||
resp = requests.get(url, stream=True, timeout=(3, 45), headers=headers)
|
||||
except requests.RequestException as exc:
|
||||
logger.warning("Telegram media upstream failure %s: %s", url, exc)
|
||||
raise HTTPException(status_code=502, detail="Upstream fetch failed") from exc
|
||||
|
||||
if resp.status_code >= 400:
|
||||
resp.close()
|
||||
raise HTTPException(status_code=int(resp.status_code), detail=f"Upstream returned {resp.status_code}")
|
||||
|
||||
media_type = _infer_telegram_media_type(url, resp.headers.get("Content-Type", "application/octet-stream"))
|
||||
response_headers = {
|
||||
"Cache-Control": "private, max-age=300",
|
||||
"Accept-Ranges": resp.headers.get("Accept-Ranges", "bytes"),
|
||||
}
|
||||
if content_length := resp.headers.get("Content-Length"):
|
||||
response_headers["Content-Length"] = content_length
|
||||
if content_range := resp.headers.get("Content-Range"):
|
||||
response_headers["Content-Range"] = content_range
|
||||
|
||||
return StreamingResponse(
|
||||
resp.iter_content(chunk_size=65536),
|
||||
status_code=resp.status_code,
|
||||
media_type=media_type,
|
||||
headers=response_headers,
|
||||
background=BackgroundTask(resp.close),
|
||||
)
|
||||
@@ -0,0 +1,151 @@
|
||||
"""Operator OSINT recon routes (server-side proxies, SSRF guarded)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from auth import require_local_operator
|
||||
from limiter import limiter
|
||||
from services.osint import lookups
|
||||
|
||||
router = APIRouter(dependencies=[Depends(require_local_operator)])
|
||||
|
||||
_ALLOWED_SCHEMAS = {
|
||||
"Person",
|
||||
"Organization",
|
||||
"Company",
|
||||
"Vessel",
|
||||
"Airplane",
|
||||
"LegalEntity",
|
||||
}
|
||||
|
||||
|
||||
class SweepScanRequest(BaseModel):
|
||||
ip: str = Field(min_length=7, max_length=45)
|
||||
cidr: int = Field(default=24, ge=24, le=32)
|
||||
|
||||
|
||||
def _bad_request(exc: ValueError) -> HTTPException:
|
||||
return HTTPException(status_code=400, detail=str(exc))
|
||||
|
||||
|
||||
@router.get("/api/osint/ip")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_ip(request: Request, ip: str = Query(..., min_length=7, max_length=45)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_ip(ip)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/dns")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_dns(request: Request, domain: str = Query(..., min_length=4, max_length=253)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_dns(domain)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/whois")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_whois(request: Request, domain: str = Query(..., min_length=4, max_length=253)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_whois(domain)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/certs")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_certs(request: Request, domain: str = Query(..., min_length=4, max_length=253)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_certs(domain)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/threats")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_threats(request: Request, query: str | None = Query(default=None, max_length=253)) -> dict:
|
||||
return lookups.lookup_threats(query)
|
||||
|
||||
|
||||
@router.get("/api/osint/bgp")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_bgp(request: Request, query: str = Query(..., min_length=2, max_length=64)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_bgp(query)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/sanctions")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_sanctions(
|
||||
request: Request,
|
||||
query: str = Query(..., min_length=4, max_length=200),
|
||||
schema: str | None = Query(default=None),
|
||||
limit: int = Query(default=25, ge=1, le=100),
|
||||
) -> dict:
|
||||
if schema and schema not in _ALLOWED_SCHEMAS:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid schema. Allowed: {', '.join(sorted(_ALLOWED_SCHEMAS))}")
|
||||
return lookups.lookup_sanctions(query, schema=schema, limit=limit)
|
||||
|
||||
|
||||
@router.get("/api/osint/cve")
|
||||
@limiter.limit("30/minute")
|
||||
async def osint_cve(request: Request, cve: str = Query(..., min_length=10, max_length=32)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_cve(cve)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404 if "not found" in str(exc).lower() else 400, detail=str(exc)) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/mac")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_mac(request: Request, mac: str = Query(..., min_length=5, max_length=32)) -> dict:
|
||||
return lookups.lookup_mac(mac)
|
||||
|
||||
|
||||
@router.get("/api/osint/github")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_github(request: Request, username: str = Query(..., min_length=1, max_length=64)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_github(username)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/leaks")
|
||||
@limiter.limit("10/minute")
|
||||
async def osint_leaks(request: Request, email: str = Query(..., min_length=5, max_length=254)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_leaks(email)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/sweep")
|
||||
@limiter.limit("5/minute")
|
||||
async def osint_sweep_init(
|
||||
request: Request,
|
||||
ip: str = Query(..., min_length=7, max_length=45),
|
||||
cidr: int = Query(default=24, ge=24, le=32),
|
||||
) -> dict:
|
||||
try:
|
||||
return lookups.sweep_init(ip, cidr)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.post("/api/osint/sweep/scan")
|
||||
@limiter.limit("3/minute")
|
||||
async def osint_sweep_scan(request: Request, payload: SweepScanRequest) -> dict:
|
||||
try:
|
||||
subnet = lookups.subnet_start_for(payload.ip, payload.cidr)
|
||||
scan = lookups.sweep_scan(subnet, payload.cidr)
|
||||
init = lookups.sweep_init(payload.ip, payload.cidr)
|
||||
return {**init, **scan, "subnet": f"{subnet}/{payload.cidr}"}
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
@@ -0,0 +1,16 @@
|
||||
"""Supply-chain risk overlay."""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, Request
|
||||
|
||||
from auth import require_local_operator
|
||||
from limiter import limiter
|
||||
from services.scm.suppliers import build_scm_payload
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/api/scm-suppliers")
|
||||
@limiter.limit("30/minute")
|
||||
async def scm_suppliers(request: Request, _: None = Depends(require_local_operator)) -> dict:
|
||||
return build_scm_payload()
|
||||
@@ -51,6 +51,15 @@ API_REGISTRY = [
|
||||
"url": "https://aisstream.io/",
|
||||
"required": True,
|
||||
},
|
||||
{
|
||||
"id": "gfw_api_token",
|
||||
"env_key": "GFW_API_TOKEN",
|
||||
"name": "Global Fishing Watch",
|
||||
"description": "Bearer token for Global Fishing Watch fishing-vessel activity events (Fishing Activity map layer). Free registration at globalfishingwatch.org.",
|
||||
"category": "Maritime",
|
||||
"url": "https://globalfishingwatch.org/our-apis/",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"id": "adsb_lol",
|
||||
"env_key": None,
|
||||
|
||||
+416
-109
@@ -17,6 +17,9 @@ _KNOWN_CCTV_MEDIA_HOST_ALIASES = {
|
||||
# Trusted upstream occasionally publishes a typo for this Georgia camera
|
||||
# host. Normalize it at ingest so the proxy and client stay consistent.
|
||||
"navigatos-c2c.dot.ga.gov": "navigator-c2c.dot.ga.gov",
|
||||
# TravelIQ staging hosts occasionally appear in 511 catalog metadata.
|
||||
"on.stage.traveliq.co": "511on.ca",
|
||||
"ab.stage.traveliq.co": "511.alberta.ca",
|
||||
}
|
||||
|
||||
_POINT_WKT_RE = re.compile(
|
||||
@@ -40,6 +43,17 @@ def _normalize_cctv_media_url(raw_url: str) -> str:
|
||||
return urlunparse(parsed._replace(netloc=netloc))
|
||||
|
||||
|
||||
def _ensure_https_url(raw_url: str) -> str:
|
||||
"""Upgrade http:// media/catalog URLs to https:// at ingest time."""
|
||||
candidate = _normalize_cctv_media_url(str(raw_url or "").strip())
|
||||
if not candidate:
|
||||
return ""
|
||||
parsed = urlparse(candidate)
|
||||
if parsed.scheme.lower() == "http":
|
||||
return urlunparse(parsed._replace(scheme="https"))
|
||||
return candidate
|
||||
|
||||
|
||||
def _looks_like_direct_cctv_media_url(url: str) -> bool:
|
||||
candidate = str(url or "").strip().lower()
|
||||
if not candidate.startswith(("http://", "https://")):
|
||||
@@ -93,6 +107,165 @@ def _parse_wkt_point(raw_point: str) -> tuple[float | None, float | None]:
|
||||
return lat, lon
|
||||
|
||||
|
||||
def _fetch_traveliq_v2_cameras(
|
||||
*,
|
||||
api_url: str,
|
||||
base_url: str,
|
||||
id_prefix: str,
|
||||
source_agency: str,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Parse TravelIQ-style GET /api/v2/get/cameras feeds (Ontario, Alberta)."""
|
||||
resp = fetch_with_curl(
|
||||
api_url,
|
||||
timeout=30,
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.error(
|
||||
"%s CCTV fetch failed: HTTP %s",
|
||||
source_agency,
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
return []
|
||||
|
||||
data = resp.json()
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
for cam in data:
|
||||
if not isinstance(cam, dict):
|
||||
continue
|
||||
try:
|
||||
lat = float(cam.get("Latitude"))
|
||||
lon = float(cam.get("Longitude"))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
|
||||
site_id = cam.get("Id")
|
||||
location = str(cam.get("Location") or cam.get("Roadway") or "Camera")[:120]
|
||||
views = cam.get("Views") or []
|
||||
if not views:
|
||||
continue
|
||||
|
||||
for view in views:
|
||||
if not isinstance(view, dict):
|
||||
continue
|
||||
status = str(view.get("Status") or "enabled").strip().lower()
|
||||
if status and status not in {"enabled", "active"}:
|
||||
continue
|
||||
media_url = _ensure_https_url(
|
||||
urljoin(base_url, str(view.get("Url") or "").strip())
|
||||
)
|
||||
if not media_url:
|
||||
continue
|
||||
view_id = view.get("Id") or site_id
|
||||
if site_id is None or view_id is None:
|
||||
continue
|
||||
label = str(view.get("Description") or location or "Camera")[:120]
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"{id_prefix}-{site_id}-{view_id}",
|
||||
"source_agency": source_agency,
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"direction_facing": label,
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 60,
|
||||
}
|
||||
)
|
||||
return cameras
|
||||
|
||||
|
||||
def _fetch_511_datatables_cameras(
|
||||
*,
|
||||
list_url: str,
|
||||
base_url: str,
|
||||
id_prefix: str,
|
||||
source_agency: str,
|
||||
referer: str,
|
||||
page_size: int = 500,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Parse 511 DataTables POST /List/GetData/Cameras feeds (Georgia, Florida)."""
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
start = 0
|
||||
draw = 1
|
||||
while True:
|
||||
resp = fetch_with_curl(
|
||||
list_url,
|
||||
method="POST",
|
||||
json_data={"draw": draw, "start": start, "length": page_size},
|
||||
timeout=30,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Referer": referer,
|
||||
"Origin": base_url.rstrip("/"),
|
||||
},
|
||||
)
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.error(
|
||||
"%s CCTV fetch failed: HTTP %s",
|
||||
source_agency,
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
break
|
||||
|
||||
data = resp.json()
|
||||
rows = data.get("data") or []
|
||||
if not rows:
|
||||
break
|
||||
|
||||
for row in rows:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
site_id = row.get("id") or row.get("DT_RowId")
|
||||
location = row.get("location") or row.get("roadway") or source_agency
|
||||
lat_lng = row.get("latLng") or {}
|
||||
geography = lat_lng.get("geography") if isinstance(lat_lng, dict) else {}
|
||||
lat, lon = _parse_wkt_point(
|
||||
geography.get("wellKnownText") if isinstance(geography, dict) else ""
|
||||
)
|
||||
images = row.get("images") or []
|
||||
image = next(
|
||||
(
|
||||
candidate
|
||||
for candidate in images
|
||||
if str(candidate.get("imageUrl") or "").strip()
|
||||
and not bool(candidate.get("blocked"))
|
||||
),
|
||||
None,
|
||||
)
|
||||
if not (site_id and image and lat is not None and lon is not None):
|
||||
continue
|
||||
media_url = _ensure_https_url(
|
||||
urljoin(base_url, str(image.get("imageUrl") or "").strip())
|
||||
)
|
||||
if not media_url:
|
||||
continue
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"{id_prefix}-{site_id}",
|
||||
"source_agency": source_agency,
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"direction_facing": str(location)[:120],
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 60,
|
||||
}
|
||||
)
|
||||
|
||||
start += len(rows)
|
||||
draw += 1
|
||||
total = int(data.get("recordsTotal") or 0)
|
||||
if total and start >= total:
|
||||
break
|
||||
if not total and len(rows) < page_size:
|
||||
break
|
||||
return cameras
|
||||
|
||||
|
||||
def init_db():
|
||||
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(str(DB_PATH))
|
||||
@@ -169,7 +342,7 @@ class BaseCCTVIngestor(ABC):
|
||||
cam.get("lat"),
|
||||
cam.get("lon"),
|
||||
cam.get("direction_facing", "Unknown"),
|
||||
cam.get("media_url"),
|
||||
_ensure_https_url(cam.get("media_url", "")),
|
||||
cam.get("media_type", _detect_media_type(cam.get("media_url", ""))),
|
||||
cam.get("refresh_rate_seconds", 60),
|
||||
),
|
||||
@@ -454,77 +627,14 @@ class WSDOTIngestor(BaseCCTVIngestor):
|
||||
class GeorgiaDOTIngestor(BaseCCTVIngestor):
|
||||
"""Georgia cameras via the public 511GA list feed."""
|
||||
|
||||
URL = "https://511ga.org/List/GetData/Cameras"
|
||||
BASE_URL = "https://511ga.org"
|
||||
PAGE_SIZE = 500
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
cameras = []
|
||||
start = 0
|
||||
draw = 1
|
||||
while True:
|
||||
resp = fetch_with_curl(
|
||||
self.URL,
|
||||
method="POST",
|
||||
json_data={"draw": draw, "start": start, "length": self.PAGE_SIZE},
|
||||
timeout=30,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Referer": "https://511ga.org/cctv",
|
||||
"Origin": "https://511ga.org",
|
||||
},
|
||||
)
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.error(
|
||||
"Georgia CCTV fetch failed: HTTP %s",
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
break
|
||||
data = resp.json()
|
||||
rows = data.get("data") or []
|
||||
if not rows:
|
||||
break
|
||||
for row in rows:
|
||||
site_id = row.get("id") or row.get("DT_RowId")
|
||||
location = row.get("location") or row.get("roadway") or "GA Camera"
|
||||
lat_lng = row.get("latLng") or {}
|
||||
geography = lat_lng.get("geography") if isinstance(lat_lng, dict) else {}
|
||||
lat, lon = _parse_wkt_point(geography.get("wellKnownText") if isinstance(geography, dict) else "")
|
||||
images = row.get("images") or []
|
||||
image = next(
|
||||
(
|
||||
candidate
|
||||
for candidate in images
|
||||
if str(candidate.get("imageUrl") or "").strip()
|
||||
and not bool(candidate.get("blocked"))
|
||||
),
|
||||
None,
|
||||
)
|
||||
if not (site_id and image and lat is not None and lon is not None):
|
||||
continue
|
||||
media_url = _normalize_cctv_media_url(
|
||||
urljoin(self.BASE_URL, str(image.get("imageUrl") or "").strip())
|
||||
)
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"GDOT-{site_id}",
|
||||
"source_agency": "Georgia DOT",
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"direction_facing": str(location)[:120],
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 60,
|
||||
}
|
||||
)
|
||||
start += len(rows)
|
||||
draw += 1
|
||||
total = int(data.get("recordsTotal") or 0)
|
||||
if total and start >= total:
|
||||
break
|
||||
if not total and len(rows) < self.PAGE_SIZE:
|
||||
break
|
||||
return cameras
|
||||
return _fetch_511_datatables_cameras(
|
||||
list_url="https://511ga.org/List/GetData/Cameras",
|
||||
base_url="https://511ga.org",
|
||||
id_prefix="GDOT",
|
||||
source_agency="Georgia DOT",
|
||||
referer="https://511ga.org/cctv",
|
||||
)
|
||||
|
||||
|
||||
class IllinoisDOTIngestor(BaseCCTVIngestor):
|
||||
@@ -1009,30 +1119,66 @@ def _extract_img_src(html_fragment: str):
|
||||
return None
|
||||
|
||||
|
||||
class AsfinagIngestor(BaseCCTVIngestor):
|
||||
"""Austria ASFINAG motorway webcams (Osiris port)."""
|
||||
|
||||
API_URL = "https://odo.asfinag.at/odo/rest/sec/resource/001/json/webcams?language=atDE"
|
||||
HEADERS = {
|
||||
"User-Agent": "Shadowbroker-CCTV/1.0",
|
||||
"Accept": "application/json",
|
||||
"Referer": "https://www.asfinag.at/",
|
||||
"Authorization": "Basic bWFwX3dpZGdldDp0ZWdkaXc=",
|
||||
}
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
try:
|
||||
response = fetch_with_curl(self.API_URL, timeout=15, headers=self.HEADERS)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
except Exception as exc:
|
||||
logger.error("AsfinagIngestor: fetch failed: %s", exc)
|
||||
return []
|
||||
if not isinstance(payload, list):
|
||||
return []
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
for cam in payload:
|
||||
cam_id = cam.get("wcs_id")
|
||||
lat = cam.get("wgs84_lat")
|
||||
lon = cam.get("wgs84_lon")
|
||||
image_url = cam.get("url_campic")
|
||||
if not cam_id or lat is None or lon is None or not image_url:
|
||||
continue
|
||||
if str(cam_id).startswith("Utinform"):
|
||||
continue
|
||||
label = cam.get("position_txt") or cam.get("direction_txt") or "ASFINAG Webcam"
|
||||
secure_url = _ensure_https_url(image_url)
|
||||
if not secure_url:
|
||||
continue
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"ASFINAG-{cam_id}",
|
||||
"source_agency": "ASFINAG Austria",
|
||||
"lat": float(lat),
|
||||
"lon": float(lon),
|
||||
"direction_facing": label,
|
||||
"media_url": secure_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 300,
|
||||
}
|
||||
)
|
||||
logger.info("AsfinagIngestor: parsed %s cameras", len(cameras))
|
||||
return cameras
|
||||
|
||||
|
||||
class MadridCityIngestor(BaseCCTVIngestor):
|
||||
"""Madrid City Hall traffic cameras from datos.madrid.es KML feed."""
|
||||
|
||||
KML_URL_HTTPS = "https://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
KML_URL_HTTP = "http://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
KML_URL = "https://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
|
||||
def _fetch_kml(self):
|
||||
"""Prefer HTTPS; fall back to legacy HTTP if the catalog is HTTP-only (#363)."""
|
||||
last_error: Exception | None = None
|
||||
for url in (self.KML_URL_HTTPS, self.KML_URL_HTTP):
|
||||
try:
|
||||
response = fetch_with_curl(url, timeout=20)
|
||||
response.raise_for_status()
|
||||
if url == self.KML_URL_HTTP:
|
||||
logger.warning(
|
||||
"MadridCityIngestor: HTTPS KML unavailable, using HTTP catalog feed"
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.debug("MadridCityIngestor: KML fetch failed for %s: %s", url, e)
|
||||
if last_error is not None:
|
||||
raise last_error
|
||||
raise RuntimeError("Madrid KML fetch failed")
|
||||
response = fetch_with_curl(self.KML_URL, timeout=20)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
import defusedxml.ElementTree as ET
|
||||
@@ -1074,6 +1220,9 @@ class MadridCityIngestor(BaseCCTVIngestor):
|
||||
if desc_el is not None and desc_el.text:
|
||||
image_url = _extract_img_src(desc_el.text)
|
||||
|
||||
if not image_url:
|
||||
continue
|
||||
image_url = _ensure_https_url(image_url)
|
||||
if not image_url:
|
||||
continue
|
||||
|
||||
@@ -1095,6 +1244,153 @@ class MadridCityIngestor(BaseCCTVIngestor):
|
||||
return cameras
|
||||
|
||||
|
||||
class Ontario511Ingestor(BaseCCTVIngestor):
|
||||
"""Ontario highway cameras via 511on.ca TravelIQ API."""
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
return _fetch_traveliq_v2_cameras(
|
||||
api_url="https://511on.ca/api/v2/get/cameras",
|
||||
base_url="https://511on.ca",
|
||||
id_prefix="ON511",
|
||||
source_agency="511 Ontario",
|
||||
)
|
||||
|
||||
|
||||
class Alberta511Ingestor(BaseCCTVIngestor):
|
||||
"""Alberta highway cameras via 511 Alberta TravelIQ API."""
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
return _fetch_traveliq_v2_cameras(
|
||||
api_url="https://511.alberta.ca/api/v2/get/cameras",
|
||||
base_url="https://511.alberta.ca",
|
||||
id_prefix="AB511",
|
||||
source_agency="511 Alberta",
|
||||
)
|
||||
|
||||
|
||||
class Florida511Ingestor(BaseCCTVIngestor):
|
||||
"""Florida cameras via FL511 DataTables feed (~4,800 sites)."""
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
return _fetch_511_datatables_cameras(
|
||||
list_url="https://fl511.com/List/GetData/Cameras",
|
||||
base_url="https://fl511.com",
|
||||
id_prefix="FL511",
|
||||
source_agency="Florida 511",
|
||||
referer="https://fl511.com/",
|
||||
)
|
||||
|
||||
|
||||
class AustraliaLiveTrafficIngestor(BaseCCTVIngestor):
|
||||
"""NSW / Australia live traffic cameras via Transport for NSW JSON feed."""
|
||||
|
||||
URL = "https://www.livetraffic.com/datajson/all-feeds-web.json"
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
resp = fetch_with_curl(self.URL, timeout=35, headers={"Accept": "application/json"})
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.error(
|
||||
"Australia Live Traffic CCTV fetch failed: HTTP %s",
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
return []
|
||||
|
||||
data = resp.json()
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
for item in data:
|
||||
if not isinstance(item, dict) or item.get("eventType") != "liveCams":
|
||||
continue
|
||||
geometry = item.get("geometry") if isinstance(item.get("geometry"), dict) else {}
|
||||
coords = geometry.get("coordinates") if isinstance(geometry.get("coordinates"), list) else []
|
||||
if len(coords) < 2:
|
||||
continue
|
||||
try:
|
||||
lon = float(coords[0])
|
||||
lat = float(coords[1])
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
|
||||
props = item.get("properties") if isinstance(item.get("properties"), dict) else {}
|
||||
media_url = _ensure_https_url(str(props.get("href") or "").strip())
|
||||
if not media_url:
|
||||
continue
|
||||
|
||||
cam_id = str(item.get("path") or props.get("id") or len(cameras)).strip("/")
|
||||
label = str(props.get("title") or props.get("headline") or "Australia Camera")[:120]
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"AUS-{cam_id}",
|
||||
"source_agency": "NSW Live Traffic",
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"direction_facing": label,
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 120,
|
||||
}
|
||||
)
|
||||
logger.info("AustraliaLiveTrafficIngestor: parsed %s cameras", len(cameras))
|
||||
return cameras
|
||||
|
||||
|
||||
class NetherlandsRWSIngestor(BaseCCTVIngestor):
|
||||
"""Netherlands Rijkswaterstaat cameras from legacy NDW open-data JSON.
|
||||
|
||||
The opendata.ndw.nu/cameras.json feed Osiris used is often offline; when
|
||||
unavailable this ingestor returns an empty set and logs a warning.
|
||||
"""
|
||||
|
||||
URL = "https://opendata.ndw.nu/cameras.json"
|
||||
MAX_CAMERAS = 1200
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
resp = fetch_with_curl(self.URL, timeout=25, headers={"Accept": "application/json"})
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.warning(
|
||||
"Netherlands RWS cameras.json unavailable (HTTP %s) — "
|
||||
"NDW retired this open-data endpoint; no cameras ingested",
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
return []
|
||||
|
||||
data = resp.json()
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
for i, cam in enumerate(data[: self.MAX_CAMERAS]):
|
||||
if not isinstance(cam, dict):
|
||||
continue
|
||||
lat = cam.get("lat") if cam.get("lat") is not None else cam.get("latitude")
|
||||
lon = cam.get("lng") if cam.get("lng") is not None else cam.get("longitude")
|
||||
media_url = _ensure_https_url(
|
||||
str(cam.get("imageUrl") or cam.get("feed_url") or cam.get("url") or "").strip()
|
||||
)
|
||||
if lat is None or lon is None or not media_url:
|
||||
continue
|
||||
try:
|
||||
lat_f, lon_f = float(lat), float(lon)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"NLRWS-{cam.get('id') or i}",
|
||||
"source_agency": "Rijkswaterstaat",
|
||||
"lat": lat_f,
|
||||
"lon": lon_f,
|
||||
"direction_facing": str(cam.get("name") or "Netherlands Camera")[:120],
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 120,
|
||||
}
|
||||
)
|
||||
logger.info("NetherlandsRWSIngestor: parsed %s cameras", len(cameras))
|
||||
return cameras
|
||||
|
||||
|
||||
def _detect_media_type(url: str) -> str:
|
||||
"""Detect the media type from a camera URL for proper frontend rendering."""
|
||||
if not url:
|
||||
@@ -1113,29 +1409,40 @@ def _detect_media_type(url: str) -> str:
|
||||
return "image"
|
||||
|
||||
|
||||
def scheduled_cctv_ingestors() -> List[tuple["BaseCCTVIngestor", str]]:
|
||||
"""Canonical list of CCTV ingestors for startup, scheduler, and DB seeding."""
|
||||
return [
|
||||
(TFLJamCamIngestor(), "cctv_tfl"),
|
||||
(LTASingaporeIngestor(), "cctv_lta"),
|
||||
(AustinTXIngestor(), "cctv_atx"),
|
||||
(NYCDOTIngestor(), "cctv_nyc"),
|
||||
(CaltransIngestor(), "cctv_caltrans"),
|
||||
(ColoradoDOTIngestor(), "cctv_codot"),
|
||||
(WSDOTIngestor(), "cctv_wsdot"),
|
||||
(GeorgiaDOTIngestor(), "cctv_gdot"),
|
||||
(IllinoisDOTIngestor(), "cctv_idot"),
|
||||
(MichiganDOTIngestor(), "cctv_mdot"),
|
||||
(WindyWebcamsIngestor(), "cctv_windy"),
|
||||
(DGTNationalIngestor(), "cctv_dgt"),
|
||||
(MadridCityIngestor(), "cctv_madrid"),
|
||||
(OSMTrafficCameraIngestor(), "cctv_osm"),
|
||||
(AsfinagIngestor(), "cctv_asfinag"),
|
||||
(OSMALPRCameraIngestor(), "cctv_osm_alpr"),
|
||||
(Ontario511Ingestor(), "cctv_on511"),
|
||||
(Alberta511Ingestor(), "cctv_ab511"),
|
||||
(Florida511Ingestor(), "cctv_fl511"),
|
||||
(AustraliaLiveTrafficIngestor(), "cctv_australia"),
|
||||
(NetherlandsRWSIngestor(), "cctv_nl_rws"),
|
||||
]
|
||||
|
||||
|
||||
def run_all_ingestors():
|
||||
"""Run all CCTV ingestors synchronously. Used for first-run DB seeding."""
|
||||
ingestors = [
|
||||
TFLJamCamIngestor(),
|
||||
LTASingaporeIngestor(),
|
||||
AustinTXIngestor(),
|
||||
NYCDOTIngestor(),
|
||||
CaltransIngestor(),
|
||||
ColoradoDOTIngestor(),
|
||||
WSDOTIngestor(),
|
||||
GeorgiaDOTIngestor(),
|
||||
IllinoisDOTIngestor(),
|
||||
MichiganDOTIngestor(),
|
||||
WindyWebcamsIngestor(),
|
||||
OSMTrafficCameraIngestor(),
|
||||
DGTNationalIngestor(),
|
||||
MadridCityIngestor(),
|
||||
]
|
||||
for ing in ingestors:
|
||||
for ingestor, _name in scheduled_cctv_ingestors():
|
||||
try:
|
||||
ing.ingest()
|
||||
ingestor.ingest()
|
||||
except Exception as e:
|
||||
logger.warning(f"Ingestor {ing.__class__.__name__} failed during seed: {e}")
|
||||
logger.warning(f"Ingestor {ingestor.__class__.__name__} failed during seed: {e}")
|
||||
|
||||
|
||||
def get_all_cameras() -> List[Dict[str, Any]]:
|
||||
|
||||
@@ -101,6 +101,10 @@ from services.fetchers.crowdthreat import fetch_crowdthreat # noqa: F401
|
||||
from services.fetchers.wastewater import fetch_wastewater # noqa: F401
|
||||
from services.fetchers.sar_catalog import fetch_sar_catalog # noqa: F401
|
||||
from services.fetchers.sar_products import fetch_sar_products # noqa: F401
|
||||
from services.fetchers.malware import fetch_malware_threats # noqa: F401
|
||||
from services.fetchers.telegram_osint import fetch_telegram_osint # noqa: F401
|
||||
from services.fetchers.cyber_status import fetch_cyber_threats # noqa: F401
|
||||
from services.scm.suppliers import fetch_scm_suppliers # noqa: F401
|
||||
from services.ais_stream import prune_stale_vessels # noqa: F401
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -480,6 +484,9 @@ def update_slow_data():
|
||||
fetch_fishing_activity,
|
||||
fetch_power_plants,
|
||||
fetch_ukraine_air_raid_alerts,
|
||||
fetch_malware_threats,
|
||||
fetch_cyber_threats,
|
||||
fetch_scm_suppliers,
|
||||
]
|
||||
_run_tasks("slow-tier", slow_funcs)
|
||||
# Run correlation engine after all data is fresh
|
||||
@@ -523,6 +530,15 @@ def _load_cctv_cache_for_startup() -> None:
|
||||
logger.warning("Startup CCTV cache load failed (non-fatal): %s", e)
|
||||
|
||||
|
||||
def _load_static_infrastructure_for_startup() -> None:
|
||||
"""Disk-backed reference layers — instant, no network."""
|
||||
for func in (fetch_datacenters, fetch_military_bases, fetch_power_plants):
|
||||
try:
|
||||
func()
|
||||
except Exception as e:
|
||||
logger.warning("Startup static infrastructure load failed for %s: %s", func.__name__, e)
|
||||
|
||||
|
||||
def _run_delayed_startup_heavy_refresh() -> None:
|
||||
if _STARTUP_HEAVY_REFRESH_DELAY_S > 0:
|
||||
logger.info(
|
||||
@@ -535,6 +551,7 @@ def _run_delayed_startup_heavy_refresh() -> None:
|
||||
"startup-heavy",
|
||||
[
|
||||
update_slow_data,
|
||||
fetch_telegram_osint,
|
||||
fetch_volcanoes,
|
||||
fetch_viirs_change_nodes,
|
||||
fetch_unusual_whales,
|
||||
@@ -573,6 +590,7 @@ def update_all_data(*, startup_mode: bool = False):
|
||||
logger.info("Full data update starting (parallel)...")
|
||||
# Preload Meshtastic map cache immediately (instant, from disk)
|
||||
seed_startup_caches()
|
||||
_load_static_infrastructure_for_startup()
|
||||
with _data_lock:
|
||||
meshtastic_seeded = bool(latest_data.get("meshtastic_map_nodes"))
|
||||
if startup_mode:
|
||||
@@ -649,22 +667,9 @@ def update_all_data(*, startup_mode: bool = False):
|
||||
# (the scheduled job also runs every 10 min for ongoing refresh).
|
||||
if startup_mode:
|
||||
try:
|
||||
from services.cctv_pipeline import (
|
||||
TFLJamCamIngestor, LTASingaporeIngestor, AustinTXIngestor,
|
||||
NYCDOTIngestor, CaltransIngestor, ColoradoDOTIngestor,
|
||||
WSDOTIngestor, GeorgiaDOTIngestor, IllinoisDOTIngestor,
|
||||
MichiganDOTIngestor, WindyWebcamsIngestor, DGTNationalIngestor,
|
||||
MadridCityIngestor, OSMTrafficCameraIngestor, get_all_cameras,
|
||||
)
|
||||
from services.cctv_pipeline import OSMALPRCameraIngestor
|
||||
_startup_ingestors = [
|
||||
TFLJamCamIngestor(), LTASingaporeIngestor(), AustinTXIngestor(),
|
||||
NYCDOTIngestor(), CaltransIngestor(), ColoradoDOTIngestor(),
|
||||
WSDOTIngestor(), GeorgiaDOTIngestor(), IllinoisDOTIngestor(),
|
||||
MichiganDOTIngestor(), WindyWebcamsIngestor(), DGTNationalIngestor(),
|
||||
MadridCityIngestor(), OSMTrafficCameraIngestor(),
|
||||
OSMALPRCameraIngestor(),
|
||||
]
|
||||
from services.cctv_pipeline import get_all_cameras, scheduled_cctv_ingestors
|
||||
|
||||
_startup_ingestors = [ing for ing, _name in scheduled_cctv_ingestors()]
|
||||
logger.info("Running CCTV ingest at startup (%d ingestors)...", len(_startup_ingestors))
|
||||
ingest_futures = {
|
||||
_SHARED_EXECUTOR.submit(ing.ingest): ing.__class__.__name__
|
||||
@@ -800,6 +805,18 @@ def start_scheduler():
|
||||
misfire_grace_time=120,
|
||||
)
|
||||
|
||||
# Telegram OSINT — hourly t.me/s channel scrape (kept off the 5-minute slow tier).
|
||||
_telegram_interval_m = max(15, int(os.environ.get("TELEGRAM_OSINT_INTERVAL_MINUTES", "60")))
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_telegram_osint, "fetch_telegram_osint"),
|
||||
"interval",
|
||||
minutes=_telegram_interval_m,
|
||||
next_run_time=datetime.utcnow() + timedelta(seconds=45),
|
||||
id="telegram_osint",
|
||||
max_instances=1,
|
||||
misfire_grace_time=600,
|
||||
)
|
||||
|
||||
# Prediction markets — own jittered cadence (Polymarket/Kalshi clearnet egress).
|
||||
# Kept off the fixed 5-minute slow tier so poll timing is less fingerprintable.
|
||||
from services.fetchers.prediction_markets import fetch_prediction_markets
|
||||
@@ -938,39 +955,9 @@ def start_scheduler():
|
||||
|
||||
# CCTV pipeline refresh — runs all ingestors, then refreshes in-memory data.
|
||||
# Delay the first run slightly so startup serves cached/DB-backed data first.
|
||||
from services.cctv_pipeline import (
|
||||
TFLJamCamIngestor,
|
||||
LTASingaporeIngestor,
|
||||
AustinTXIngestor,
|
||||
NYCDOTIngestor,
|
||||
CaltransIngestor,
|
||||
ColoradoDOTIngestor,
|
||||
WSDOTIngestor,
|
||||
GeorgiaDOTIngestor,
|
||||
IllinoisDOTIngestor,
|
||||
MichiganDOTIngestor,
|
||||
WindyWebcamsIngestor,
|
||||
DGTNationalIngestor,
|
||||
MadridCityIngestor,
|
||||
OSMTrafficCameraIngestor,
|
||||
)
|
||||
from services.cctv_pipeline import scheduled_cctv_ingestors
|
||||
|
||||
_cctv_ingestors = [
|
||||
(TFLJamCamIngestor(), "cctv_tfl"),
|
||||
(LTASingaporeIngestor(), "cctv_lta"),
|
||||
(AustinTXIngestor(), "cctv_atx"),
|
||||
(NYCDOTIngestor(), "cctv_nyc"),
|
||||
(CaltransIngestor(), "cctv_caltrans"),
|
||||
(ColoradoDOTIngestor(), "cctv_codot"),
|
||||
(WSDOTIngestor(), "cctv_wsdot"),
|
||||
(GeorgiaDOTIngestor(), "cctv_gdot"),
|
||||
(IllinoisDOTIngestor(), "cctv_idot"),
|
||||
(MichiganDOTIngestor(), "cctv_mdot"),
|
||||
(WindyWebcamsIngestor(), "cctv_windy"),
|
||||
(DGTNationalIngestor(), "cctv_dgt"),
|
||||
(MadridCityIngestor(), "cctv_madrid"),
|
||||
(OSMTrafficCameraIngestor(), "cctv_osm"),
|
||||
]
|
||||
_cctv_ingestors = scheduled_cctv_ingestors()
|
||||
|
||||
def _run_cctv_ingest_cycle():
|
||||
from services.fetchers._store import is_any_active
|
||||
|
||||
@@ -46,6 +46,7 @@ _CRITICAL_WARN = {
|
||||
|
||||
_OPTIONAL = {
|
||||
"AIS_API_KEY": "AIS vessel streaming (ships layer will be empty without it)",
|
||||
"GFW_API_TOKEN": "Global Fishing Watch fishing-vessel activity (fishing_activity layer)",
|
||||
"LTA_ACCOUNT_KEY": "Singapore LTA traffic cameras (CCTV layer)",
|
||||
"PUBLIC_API_KEY": "Optional client auth for public endpoints (recommended for exposed deployments)",
|
||||
}
|
||||
|
||||
@@ -70,6 +70,10 @@ class DashboardData(TypedDict, total=False):
|
||||
sar_anomalies: List[Dict[str, Any]]
|
||||
sar_aoi_coverage: List[Dict[str, Any]]
|
||||
road_corridor_trends: Dict[str, Any]
|
||||
malware_threats: Dict[str, Any]
|
||||
cyber_threats: Dict[str, Any]
|
||||
scm_suppliers: Dict[str, Any]
|
||||
telegram_osint: Dict[str, Any]
|
||||
|
||||
|
||||
# In-memory store
|
||||
@@ -121,6 +125,10 @@ latest_data: DashboardData = {
|
||||
"sar_anomalies": [],
|
||||
"sar_aoi_coverage": [],
|
||||
"road_corridor_trends": {"updated_at": None, "corridors": []},
|
||||
"malware_threats": {"threats": [], "total": 0, "timestamp": None},
|
||||
"cyber_threats": {"threats": [], "stats": {}},
|
||||
"scm_suppliers": {"suppliers": [], "total": 0, "critical_count": 0},
|
||||
"telegram_osint": {"posts": [], "total": 0, "geolocated": 0, "timestamp": None},
|
||||
}
|
||||
|
||||
# Per-source freshness timestamps
|
||||
@@ -331,6 +339,11 @@ active_layers: dict[str, bool] = {
|
||||
"crowdthreat": False,
|
||||
"sar": True,
|
||||
"road_corridor_trends": False,
|
||||
"malware_c2": False,
|
||||
"submarine_cables": False,
|
||||
"scm_suppliers": False,
|
||||
"cyber_threats": False,
|
||||
"telegram_osint": True,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
"""CISA KEV + cyber threat stats (Osiris port)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def fetch_cyber_threats() -> dict[str, Any]:
|
||||
if not is_any_active("cyber_threats"):
|
||||
return latest_data.get("cyber_threats") or {"threats": [], "stats": {}}
|
||||
|
||||
results: dict[str, Any] = {"threats": [], "stats": {}, "timestamp": datetime.now(timezone.utc).isoformat()}
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json",
|
||||
timeout=15,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
vulns = data.get("vulnerabilities") or []
|
||||
results["stats"]["cisa_total"] = len(vulns)
|
||||
now = datetime.now(timezone.utc)
|
||||
recent = []
|
||||
for v in vulns:
|
||||
try:
|
||||
added = datetime.fromisoformat(v.get("dateAdded", "").replace("Z", "+00:00"))
|
||||
days = (now - added).total_seconds() / 86400
|
||||
except Exception:
|
||||
continue
|
||||
if days <= 30:
|
||||
recent.append(v)
|
||||
recent = recent[:10]
|
||||
results["threats"] = [
|
||||
{
|
||||
"id": v.get("cveID"),
|
||||
"name": v.get("vulnerabilityName"),
|
||||
"vendor": v.get("vendorProject"),
|
||||
"product": v.get("product"),
|
||||
"severity": "CRITICAL",
|
||||
"date": v.get("dateAdded"),
|
||||
"due": v.get("dueDate"),
|
||||
"source": "CISA KEV",
|
||||
}
|
||||
for v in recent
|
||||
]
|
||||
except Exception as exc:
|
||||
logger.warning("CISA KEV fetch failed: %s", exc)
|
||||
|
||||
count = len(results["threats"])
|
||||
results["stats"]["active_cves"] = count
|
||||
results["stats"]["threat_level"] = "CRITICAL" if count >= 8 else "HIGH" if count >= 4 else "ELEVATED"
|
||||
|
||||
with _data_lock:
|
||||
latest_data["cyber_threats"] = results
|
||||
_mark_fresh("cyber_threats")
|
||||
return results
|
||||
@@ -278,6 +278,16 @@ _FISHING_FETCH_INTERVAL_S = 3600 # once per hour — GFW data has ~5 day lag
|
||||
_last_fishing_fetch_ts: float = 0.0
|
||||
|
||||
|
||||
def _gfw_int_env(name: str, default: int, *, minimum: int = 1, maximum: int | None = None) -> int:
|
||||
try:
|
||||
value = int(os.environ.get(name, str(default)) or default)
|
||||
except (TypeError, ValueError):
|
||||
value = default
|
||||
if maximum is not None:
|
||||
value = min(maximum, value)
|
||||
return max(minimum, value)
|
||||
|
||||
|
||||
@with_retry(max_retries=1, base_delay=5)
|
||||
def fetch_fishing_activity():
|
||||
"""Fetch recent fishing events from Global Fishing Watch (~5 day lag)."""
|
||||
@@ -300,10 +310,16 @@ def fetch_fishing_activity():
|
||||
try:
|
||||
import datetime as _dt
|
||||
|
||||
# GFW publishes with ~5 day lag; windows shorter than ~7 days often return 0 events.
|
||||
lookback_days = _gfw_int_env("GFW_EVENTS_LOOKBACK_DAYS", 7, minimum=1, maximum=14)
|
||||
max_pages = _gfw_int_env("GFW_EVENTS_MAX_PAGES", 10, minimum=1, maximum=100)
|
||||
timeout_s = _gfw_int_env("GFW_EVENTS_TIMEOUT_S", 90, minimum=30, maximum=180)
|
||||
_end = _dt.date.today().isoformat()
|
||||
_start = (_dt.date.today() - _dt.timedelta(days=7)).isoformat()
|
||||
page_size = max(1, int(os.environ.get("GFW_EVENTS_PAGE_SIZE", "500") or "500"))
|
||||
_start = (_dt.date.today() - _dt.timedelta(days=lookback_days)).isoformat()
|
||||
page_size = _gfw_int_env("GFW_EVENTS_PAGE_SIZE", 500, minimum=1, maximum=1000)
|
||||
offset = 0
|
||||
pages_fetched = 0
|
||||
total_available: int | None = None
|
||||
seen_offsets: set[int] = set()
|
||||
seen_ids: set[str] = set()
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
@@ -324,7 +340,7 @@ def fetch_fishing_activity():
|
||||
}
|
||||
)
|
||||
url = f"https://gateway.api.globalfishingwatch.org/v3/events?{query}"
|
||||
response = fetch_with_curl(url, timeout=30, headers=headers)
|
||||
response = fetch_with_curl(url, timeout=timeout_s, headers=headers)
|
||||
if response.status_code != 200:
|
||||
logger.warning(
|
||||
"Fishing activity fetch failed at offset=%s: HTTP %s",
|
||||
@@ -334,10 +350,16 @@ def fetch_fishing_activity():
|
||||
break
|
||||
|
||||
payload = response.json() or {}
|
||||
if total_available is None:
|
||||
try:
|
||||
total_available = int(payload.get("total")) if payload.get("total") is not None else None
|
||||
except (TypeError, ValueError):
|
||||
total_available = None
|
||||
entries = payload.get("entries", [])
|
||||
if not entries:
|
||||
break
|
||||
|
||||
pages_fetched += 1
|
||||
added_this_page = 0
|
||||
for e in entries:
|
||||
pos = e.get("position", {})
|
||||
@@ -372,6 +394,15 @@ def fetch_fishing_activity():
|
||||
if len(entries) < page_size:
|
||||
break
|
||||
|
||||
if pages_fetched >= max_pages:
|
||||
logger.info(
|
||||
"Fishing activity: capped at %s pages (%s events fetched; GFW total=%s)",
|
||||
max_pages,
|
||||
len(events),
|
||||
total_available if total_available is not None else "unknown",
|
||||
)
|
||||
break
|
||||
|
||||
next_offset = payload.get("nextOffset")
|
||||
if next_offset is None:
|
||||
next_offset = (payload.get("pagination") or {}).get("nextOffset")
|
||||
|
||||
@@ -235,11 +235,11 @@ _DC_GEOCODED_PATH = Path(__file__).parent.parent.parent / "data" / "datacenters_
|
||||
|
||||
|
||||
def fetch_datacenters():
|
||||
"""Load geocoded data centers (5K+ street-level precise locations)."""
|
||||
from services.fetchers._store import is_any_active
|
||||
"""Load geocoded data centers (5K+ street-level precise locations).
|
||||
|
||||
if not is_any_active("datacenters"):
|
||||
return
|
||||
Always loads from disk; /api/live-data/slow gates the payload on the
|
||||
datacenters layer toggle so enabling the layer can render immediately.
|
||||
"""
|
||||
dcs = []
|
||||
try:
|
||||
if not _DC_GEOCODED_PATH.exists():
|
||||
|
||||
@@ -0,0 +1,107 @@
|
||||
"""Malware C2 / URLhaus feed (abuse.ch, Osiris port)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
COUNTRY_CENTROIDS: dict[str, tuple[float, float]] = {
|
||||
"AF": (65, 33), "AL": (20, 41), "DZ": (3, 28), "AR": (-64, -34), "AU": (134, -25),
|
||||
"AT": (14, 47.5), "BE": (4, 50.8), "BR": (-51, -10), "CA": (-96, 62), "CN": (105, 35),
|
||||
"DE": (10, 51), "FR": (2, 46), "GB": (-2, 54), "IN": (79, 22), "IR": (53, 32),
|
||||
"IT": (12.5, 42.8), "JP": (138, 36), "KR": (128, 36), "MX": (-102, 23.5), "NL": (5.5, 52.5),
|
||||
"PL": (19.5, 52), "RU": (100, 60), "SG": (103.8, 1.35), "TW": (121, 23.7), "UA": (32, 49),
|
||||
"US": (-97, 38), "VN": (106, 16),
|
||||
}
|
||||
|
||||
|
||||
def fetch_malware_threats() -> list[dict[str, Any]]:
|
||||
if not is_any_active("malware_c2"):
|
||||
return latest_data.get("malware_threats") or []
|
||||
|
||||
threats: list[dict[str, Any]] = []
|
||||
threat_id = 0
|
||||
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://feodotracker.abuse.ch/downloads/ipblocklist.json",
|
||||
timeout=10,
|
||||
headers={"User-Agent": "Shadowbroker/1.0", "Accept": "application/json"},
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
entries = resp.json()
|
||||
if not isinstance(entries, list):
|
||||
entries = []
|
||||
for entry in entries[:200]:
|
||||
cc = entry.get("country")
|
||||
if not cc or cc not in COUNTRY_CENTROIDS:
|
||||
continue
|
||||
lng, lat = COUNTRY_CENTROIDS[cc]
|
||||
j_lng = ((threat_id * 173.7) % 200 - 100) / 100 * 4
|
||||
j_lat = ((threat_id * 293.1) % 200 - 100) / 100 * 4
|
||||
threats.append(
|
||||
{
|
||||
"id": f"feodo-{threat_id}",
|
||||
"lat": lat + j_lat,
|
||||
"lng": lng + j_lng,
|
||||
"ip": entry.get("ip_address") or "unknown",
|
||||
"port": entry.get("dst_port") or 0,
|
||||
"malware": entry.get("malware") or "unknown",
|
||||
"status": entry.get("status") or "active",
|
||||
"first_seen": entry.get("first_seen"),
|
||||
"last_online": entry.get("last_online"),
|
||||
"country": cc,
|
||||
"threat_type": "botnet_c2",
|
||||
}
|
||||
)
|
||||
threat_id += 1
|
||||
except Exception as exc:
|
||||
logger.warning("Feodo fetch failed: %s", exc)
|
||||
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://urlhaus-api.abuse.ch/v1/urls/recent/limit/100/",
|
||||
timeout=8,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
urls = (resp.json() or {}).get("urls") or []
|
||||
for u in urls:
|
||||
cc = u.get("country")
|
||||
if not cc or cc not in COUNTRY_CENTROIDS:
|
||||
cc = next(iter(COUNTRY_CENTROIDS))
|
||||
lng, lat = COUNTRY_CENTROIDS[cc]
|
||||
j_lng = ((threat_id * 137.3) % 200 - 100) / 100 * 5
|
||||
j_lat = ((threat_id * 211.7) % 200 - 100) / 100 * 5
|
||||
threats.append(
|
||||
{
|
||||
"id": f"urlhaus-{threat_id}",
|
||||
"lat": lat + j_lat,
|
||||
"lng": lng + j_lng,
|
||||
"ip": u.get("host") or "unknown",
|
||||
"port": 0,
|
||||
"malware": ", ".join(u.get("tags") or []) or u.get("threat") or "malware",
|
||||
"status": u.get("url_status") or "online",
|
||||
"first_seen": u.get("dateadded"),
|
||||
"country": cc,
|
||||
"threat_type": "malware_url",
|
||||
}
|
||||
)
|
||||
threat_id += 1
|
||||
except Exception as exc:
|
||||
logger.debug("URLhaus supplement failed: %s", exc)
|
||||
|
||||
payload = {
|
||||
"threats": threats,
|
||||
"total": len(threats),
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"source": "abuse.ch Feodo Tracker + URLhaus",
|
||||
}
|
||||
with _data_lock:
|
||||
latest_data["malware_threats"] = payload
|
||||
_mark_fresh("malware_threats")
|
||||
return threats
|
||||
@@ -158,21 +158,26 @@ _KEYWORD_COORDS = {
|
||||
_SORTED_KEYWORDS = sorted(_KEYWORD_COORDS.items(), key=lambda x: len(x[0]), reverse=True)
|
||||
|
||||
|
||||
def resolve_coords_match(text: str) -> tuple[tuple[float, float], str] | None:
|
||||
"""Return ((lat, lng), matched_keyword) for the most specific keyword hit."""
|
||||
padded_text = f" {text} "
|
||||
for kw, coords in _SORTED_KEYWORDS:
|
||||
if kw.startswith(" ") or kw.endswith(" "):
|
||||
if kw in padded_text:
|
||||
return coords, kw
|
||||
elif re.search(r"\b" + re.escape(kw) + r"\b", text):
|
||||
return coords, kw
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_coords(text: str) -> tuple[float, float] | None:
|
||||
"""Return (lat, lng) for the most specific keyword match, or None.
|
||||
|
||||
Longer keywords are tried first. Space-padded keywords (" us ", " uk ")
|
||||
use substring matching on padded text; all others use word-boundary regex.
|
||||
"""
|
||||
padded_text = f" {text} "
|
||||
for kw, coords in _SORTED_KEYWORDS:
|
||||
if kw.startswith(" ") or kw.endswith(" "):
|
||||
if kw in padded_text:
|
||||
return coords
|
||||
else:
|
||||
if re.search(r'\b' + re.escape(kw) + r'\b', text):
|
||||
return coords
|
||||
return None
|
||||
match = resolve_coords_match(text)
|
||||
return match[0] if match else None
|
||||
|
||||
|
||||
@with_retry(max_retries=1, base_delay=2)
|
||||
|
||||
@@ -0,0 +1,381 @@
|
||||
"""Telegram OSINT — public channel web previews (t.me/s) with keyword geoparsing."""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
||||
from services.fetchers.news import resolve_coords_match
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DEFAULT_CHANNELS = (
|
||||
"osintdefender",
|
||||
"insiderpaper",
|
||||
"aljazeeraenglish",
|
||||
"nexta_live",
|
||||
"war_monitor",
|
||||
"OSINTtechnical",
|
||||
"Liveuamap",
|
||||
)
|
||||
|
||||
_MESSAGE_BLOCK_RE = re.compile(
|
||||
r'<div class="tgme_widget_message_wrap js-widget_message_wrap"[\s\S]*?</div>\s*</div>\s*</div>',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_TEXT_RE = re.compile(
|
||||
r'<div class="tgme_widget_message_text[^>]*>([\s\S]*?)</div>',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_DATE_RE = re.compile(
|
||||
r'<a class="tgme_widget_message_date" href="(https://t\.me/[^"]+)".*?<time datetime="([^"]+)"',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_HAS_VIDEO_RE = re.compile(
|
||||
r'tgme_widget_message_video|js-message_video|<video\s',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_HAS_PHOTO_RE = re.compile(r'tgme_widget_message_photo_wrap', re.IGNORECASE)
|
||||
_VIDEO_SRC_RE = re.compile(r'<video[^>]+src="([^"]+)"', re.IGNORECASE)
|
||||
_BG_IMAGE_RE = re.compile(r"background-image:url\('([^']+)'\)", re.IGNORECASE)
|
||||
|
||||
_TELEGRAM_MEDIA_HOST_SUFFIXES = (".telesco.pe", ".telegram-cdn.org")
|
||||
|
||||
# Cyrillic / Arabic aliases for war-reporting channels (merged after English resolver).
|
||||
_EXTRA_PLACE_KEYWORDS: dict[str, tuple[float, float]] = {
|
||||
"киев": (50.450, 30.523),
|
||||
"київ": (50.450, 30.523),
|
||||
"харьков": (49.993, 36.231),
|
||||
"харків": (49.993, 36.231),
|
||||
"одесса": (46.482, 30.724),
|
||||
"одеса": (46.482, 30.724),
|
||||
"донецк": (48.015, 37.803),
|
||||
"донецьк": (48.015, 37.803),
|
||||
"луганск": (48.574, 39.307),
|
||||
"луганськ": (48.574, 39.307),
|
||||
"москва": (55.755, 37.617),
|
||||
"крым": (45.000, 34.000),
|
||||
"крим": (45.000, 34.000),
|
||||
"бахмут": (48.595, 38.000),
|
||||
"запорожье": (47.838, 35.139),
|
||||
"запоріжжя": (47.838, 35.139),
|
||||
"غزة": (31.416, 34.333),
|
||||
"دمشق": (33.513, 36.276),
|
||||
"بيروت": (33.893, 35.501),
|
||||
"tel aviv": (32.085, 34.781),
|
||||
"תל אביב": (32.085, 34.781),
|
||||
}
|
||||
|
||||
# Country-level news geocodes sit on national centroids that stack with threat alerts.
|
||||
# Telegram uses major metro anchors so pins land on a different map cell than news.
|
||||
_TELEGRAM_ANCHOR_OVERRIDES: dict[str, tuple[float, float]] = {
|
||||
"israel": (32.085, 34.781), # Tel Aviv (news uses central Israel ~Jerusalem corridor)
|
||||
"middle east": (32.085, 34.781),
|
||||
"china": (39.904, 116.407), # Beijing (news uses country centroid)
|
||||
"united states": (40.712, -74.006), # New York (news uses Washington DC)
|
||||
"usa": (40.712, -74.006),
|
||||
"us": (40.712, -74.006),
|
||||
"america": (40.712, -74.006),
|
||||
"uk": (51.507, -0.127), # London
|
||||
"iran": (35.689, 51.389), # Tehran
|
||||
"russia": (55.755, 37.617), # Moscow
|
||||
"ukraine": (50.450, 30.523), # Kyiv
|
||||
"france": (48.856, 2.352), # Paris
|
||||
"germany": (52.520, 13.405), # Berlin
|
||||
"lebanon": (34.433, 35.844), # Tripoli (news uses Beirut corridor)
|
||||
}
|
||||
|
||||
_RISK_KEYWORDS = (
|
||||
"war",
|
||||
"missile",
|
||||
"strike",
|
||||
"attack",
|
||||
"crisis",
|
||||
"tension",
|
||||
"military",
|
||||
"conflict",
|
||||
"defense",
|
||||
"clash",
|
||||
"nuclear",
|
||||
"invasion",
|
||||
"bomb",
|
||||
"drone",
|
||||
"weapon",
|
||||
"sanctions",
|
||||
"ceasefire",
|
||||
"escalation",
|
||||
"killed",
|
||||
"destroyed",
|
||||
"operation",
|
||||
"casualty",
|
||||
"frontline",
|
||||
"threat",
|
||||
"explosion",
|
||||
"shelling",
|
||||
)
|
||||
|
||||
|
||||
def telegram_osint_enabled() -> bool:
|
||||
return str(os.environ.get("TELEGRAM_OSINT_ENABLED", "true")).strip().lower() not in {
|
||||
"0",
|
||||
"false",
|
||||
"no",
|
||||
"off",
|
||||
"",
|
||||
}
|
||||
|
||||
|
||||
def _configured_channels() -> list[str]:
|
||||
raw = str(os.environ.get("TELEGRAM_OSINT_CHANNELS", "")).strip()
|
||||
if raw:
|
||||
return [part.strip().lstrip("@") for part in raw.split(",") if part.strip()]
|
||||
return list(_DEFAULT_CHANNELS)
|
||||
|
||||
|
||||
def telegram_media_host_allowed(hostname: str | None) -> bool:
|
||||
host = str(hostname or "").strip().lower()
|
||||
if not host:
|
||||
return False
|
||||
return any(host.endswith(suffix) for suffix in _TELEGRAM_MEDIA_HOST_SUFFIXES)
|
||||
|
||||
|
||||
def _extract_media(block: str, link: str) -> dict[str, Any]:
|
||||
has_video = bool(_HAS_VIDEO_RE.search(block))
|
||||
has_photo = bool(_HAS_PHOTO_RE.search(block))
|
||||
media_type: str | None = None
|
||||
media_url: str | None = None
|
||||
if has_video:
|
||||
media_type = "video"
|
||||
video_match = _VIDEO_SRC_RE.search(block)
|
||||
if video_match:
|
||||
media_url = video_match.group(1).strip()
|
||||
elif has_photo:
|
||||
media_type = "photo"
|
||||
photo_match = _BG_IMAGE_RE.search(block)
|
||||
if photo_match:
|
||||
media_url = photo_match.group(1).strip()
|
||||
|
||||
embed_url: str | None = None
|
||||
if media_type and link:
|
||||
embed_url = f"{link}?embed=1"
|
||||
|
||||
return {
|
||||
"media_type": media_type,
|
||||
"media_url": media_url,
|
||||
"embed_url": embed_url,
|
||||
}
|
||||
|
||||
|
||||
def _strip_html(text: str) -> str:
|
||||
cleaned = re.sub(r"<br\s*/?>", "\n", text, flags=re.IGNORECASE)
|
||||
cleaned = re.sub(r"<[^>]+>", "", cleaned)
|
||||
return (
|
||||
cleaned.replace(""", '"')
|
||||
.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.strip()
|
||||
)
|
||||
|
||||
|
||||
def _score_risk(text: str) -> int:
|
||||
lower = text.lower()
|
||||
score = 1
|
||||
for kw in _RISK_KEYWORDS:
|
||||
if kw in lower:
|
||||
score += 2
|
||||
return min(10, score)
|
||||
|
||||
|
||||
def _refresh_post_coords(post: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Re-apply geoparsing so stored posts pick up anchor updates."""
|
||||
text = "\n".join(
|
||||
str(part).strip()
|
||||
for part in (post.get("title"), post.get("description"))
|
||||
if part and str(part).strip()
|
||||
)
|
||||
if not text:
|
||||
return post
|
||||
coords = _resolve_telegram_coords(text)
|
||||
if not coords:
|
||||
return post
|
||||
updated = dict(post)
|
||||
updated["coords"] = [coords[0], coords[1]]
|
||||
return updated
|
||||
|
||||
|
||||
def _resolve_telegram_coords(text: str) -> tuple[float, float] | None:
|
||||
lower = text.lower()
|
||||
match = resolve_coords_match(lower)
|
||||
if match:
|
||||
_coords, keyword = match
|
||||
anchor = _TELEGRAM_ANCHOR_OVERRIDES.get(keyword.strip().lower())
|
||||
if anchor:
|
||||
return anchor
|
||||
return _coords
|
||||
for keyword, coords in sorted(_EXTRA_PLACE_KEYWORDS.items(), key=lambda x: len(x[0]), reverse=True):
|
||||
if keyword in lower:
|
||||
return coords
|
||||
return None
|
||||
|
||||
|
||||
def _post_link(post: dict[str, Any]) -> str:
|
||||
return str(post.get("link") or "").strip()
|
||||
|
||||
|
||||
def _extract_new_channel_posts(
|
||||
html: str,
|
||||
channel: str,
|
||||
known_links: set[str],
|
||||
*,
|
||||
bootstrap_limit: int = 12,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Return unseen posts from a channel page; stop once we hit a stored link."""
|
||||
parsed = parse_telegram_channel_html(html, channel)
|
||||
if not parsed:
|
||||
return []
|
||||
if not known_links:
|
||||
return parsed[-bootstrap_limit:]
|
||||
|
||||
fresh: list[dict[str, Any]] = []
|
||||
for post in reversed(parsed):
|
||||
link = _post_link(post)
|
||||
if not link:
|
||||
continue
|
||||
if link in known_links:
|
||||
break
|
||||
fresh.append(post)
|
||||
fresh.reverse()
|
||||
return fresh
|
||||
|
||||
|
||||
def _merge_telegram_posts(
|
||||
existing: list[dict[str, Any]],
|
||||
incoming: list[dict[str, Any]],
|
||||
*,
|
||||
max_posts: int = 120,
|
||||
) -> tuple[list[dict[str, Any]], int]:
|
||||
known_links = {_post_link(post) for post in existing if _post_link(post)}
|
||||
added = 0
|
||||
for post in incoming:
|
||||
link = _post_link(post)
|
||||
if not link or link in known_links:
|
||||
continue
|
||||
known_links.add(link)
|
||||
existing.append(post)
|
||||
added += 1
|
||||
existing.sort(key=lambda p: str(p.get("published") or ""), reverse=True)
|
||||
return existing[:max_posts], added
|
||||
|
||||
|
||||
def parse_telegram_channel_html(html: str, channel: str) -> list[dict[str, Any]]:
|
||||
"""Parse public t.me/s channel preview HTML into post dicts."""
|
||||
posts: list[dict[str, Any]] = []
|
||||
for block in _MESSAGE_BLOCK_RE.findall(html or ""):
|
||||
text_match = _TEXT_RE.search(block)
|
||||
if not text_match:
|
||||
continue
|
||||
text = _strip_html(text_match.group(1))
|
||||
if len(text) < 10:
|
||||
continue
|
||||
|
||||
date_match = _DATE_RE.search(block)
|
||||
link = date_match.group(1) if date_match else f"https://t.me/{channel}"
|
||||
published = date_match.group(2) if date_match else datetime.now(timezone.utc).isoformat()
|
||||
title = text.split("\n", 1)[0][:160]
|
||||
risk_score = _score_risk(text)
|
||||
coords = _resolve_telegram_coords(text)
|
||||
post_id = hashlib.sha1(f"{link}|{published}".encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
media = _extract_media(block, link)
|
||||
posts.append(
|
||||
{
|
||||
"id": post_id,
|
||||
"title": title,
|
||||
"description": text[:1200],
|
||||
"link": link,
|
||||
"published": published,
|
||||
"source": f"t.me/{channel}",
|
||||
"channel": channel,
|
||||
"risk_score": risk_score,
|
||||
"coords": [coords[0], coords[1]] if coords else None,
|
||||
**media,
|
||||
}
|
||||
)
|
||||
return posts
|
||||
|
||||
|
||||
def fetch_telegram_osint() -> dict[str, Any]:
|
||||
if not is_any_active("telegram_osint"):
|
||||
return latest_data.get("telegram_osint") or {"posts": [], "total": 0, "timestamp": None}
|
||||
|
||||
if not telegram_osint_enabled():
|
||||
with _data_lock:
|
||||
latest_data["telegram_osint"] = {"posts": [], "total": 0, "timestamp": None, "disabled": True}
|
||||
_mark_fresh("telegram_osint")
|
||||
return latest_data["telegram_osint"]
|
||||
|
||||
headers = {
|
||||
"User-Agent": (
|
||||
f"Mozilla/5.0 (compatible; {outbound_user_agent('telegram-osint')}) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "text/html,application/xhtml+xml",
|
||||
}
|
||||
|
||||
with _data_lock:
|
||||
prior = latest_data.get("telegram_osint") or {}
|
||||
existing_posts = list(prior.get("posts") or [])
|
||||
|
||||
known_links = {_post_link(post) for post in existing_posts if _post_link(post)}
|
||||
incoming: list[dict[str, Any]] = []
|
||||
|
||||
for channel in _configured_channels():
|
||||
url = f"https://t.me/s/{channel}"
|
||||
try:
|
||||
resp = fetch_with_curl(url, timeout=15, headers=headers)
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.warning(
|
||||
"Telegram channel %s fetch failed: HTTP %s",
|
||||
channel,
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
continue
|
||||
channel_new = _extract_new_channel_posts(resp.text, channel, known_links)
|
||||
for post in channel_new:
|
||||
link = _post_link(post)
|
||||
if not link or link in known_links:
|
||||
continue
|
||||
known_links.add(link)
|
||||
incoming.append(post)
|
||||
except Exception as exc:
|
||||
logger.warning("Telegram channel %s parse failed: %s", channel, exc)
|
||||
|
||||
merged_posts, added = _merge_telegram_posts(existing_posts, incoming)
|
||||
merged_posts = [_refresh_post_coords(post) for post in merged_posts]
|
||||
geolocated = sum(1 for p in merged_posts if p.get("coords"))
|
||||
|
||||
payload = {
|
||||
"posts": merged_posts,
|
||||
"total": len(merged_posts),
|
||||
"geolocated": geolocated,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"channels": _configured_channels(),
|
||||
"last_fetch_new": added,
|
||||
}
|
||||
|
||||
with _data_lock:
|
||||
latest_data["telegram_osint"] = payload
|
||||
_mark_fresh("telegram_osint")
|
||||
logger.info(
|
||||
"Telegram OSINT: +%s new, %s retained (%s geolocated)",
|
||||
added,
|
||||
len(merged_posts),
|
||||
geolocated,
|
||||
)
|
||||
return payload
|
||||
@@ -0,0 +1,94 @@
|
||||
"""Country risk index (static scores + USGS quake enrichment)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
RISK_FACTORS: dict[str, dict[str, Any]] = {
|
||||
"UA": {"base": 85, "tags": ["active_conflict", "infrastructure_damage"]},
|
||||
"RU": {"base": 72, "tags": ["sanctions", "military_mobilization"]},
|
||||
"IL": {"base": 78, "tags": ["active_conflict", "regional_instability"]},
|
||||
"PS": {"base": 90, "tags": ["active_conflict", "humanitarian_crisis"]},
|
||||
"SY": {"base": 82, "tags": ["post_conflict", "infrastructure_damage"]},
|
||||
"YE": {"base": 88, "tags": ["active_conflict", "humanitarian_crisis"]},
|
||||
"MM": {"base": 76, "tags": ["civil_unrest", "military_junta"]},
|
||||
"SD": {"base": 84, "tags": ["active_conflict", "humanitarian_crisis"]},
|
||||
"AF": {"base": 80, "tags": ["post_conflict", "governance_collapse"]},
|
||||
"KP": {"base": 70, "tags": ["nuclear_risk", "isolation"]},
|
||||
"IR": {"base": 68, "tags": ["sanctions", "nuclear_program", "regional_proxy"]},
|
||||
"CN": {"base": 35, "tags": ["strategic_competition", "taiwan_tensions"]},
|
||||
"TW": {"base": 45, "tags": ["invasion_risk", "semiconductor_dependency"]},
|
||||
"VE": {"base": 60, "tags": ["economic_collapse", "political_instability"]},
|
||||
"HT": {"base": 85, "tags": ["gang_violence", "governance_collapse"]},
|
||||
"LB": {"base": 65, "tags": ["economic_crisis", "political_deadlock"]},
|
||||
"PK": {"base": 55, "tags": ["terrorism", "political_instability"]},
|
||||
"SO": {"base": 82, "tags": ["terrorism", "state_fragility"]},
|
||||
"LY": {"base": 72, "tags": ["divided_government", "militia_control"]},
|
||||
"ET": {"base": 62, "tags": ["ethnic_tensions", "regional_conflicts"]},
|
||||
}
|
||||
|
||||
EXCHANGES = [
|
||||
{"name": "NYSE", "tz": "America/New_York", "open": 9.5, "close": 16, "country": "US"},
|
||||
{"name": "NASDAQ", "tz": "America/New_York", "open": 9.5, "close": 16, "country": "US"},
|
||||
{"name": "LSE", "tz": "Europe/London", "open": 8, "close": 16.5, "country": "GB"},
|
||||
{"name": "TSE", "tz": "Asia/Tokyo", "open": 9, "close": 15, "country": "JP"},
|
||||
{"name": "SSE", "tz": "Asia/Shanghai", "open": 9.5, "close": 15, "country": "CN"},
|
||||
{"name": "HKEX", "tz": "Asia/Hong_Kong", "open": 9.5, "close": 16, "country": "HK"},
|
||||
{"name": "FRA", "tz": "Europe/Berlin", "open": 8, "close": 20, "country": "DE"},
|
||||
{"name": "TSX", "tz": "America/Toronto", "open": 9.5, "close": 16, "country": "CA"},
|
||||
{"name": "MOEX", "tz": "Europe/Moscow", "open": 10, "close": 18.5, "country": "RU"},
|
||||
]
|
||||
|
||||
|
||||
def _exchange_open(ex: dict[str, Any]) -> bool:
|
||||
try:
|
||||
now = datetime.now(ZoneInfo(ex["tz"]))
|
||||
if now.weekday() >= 5:
|
||||
return False
|
||||
decimal = now.hour + now.minute / 60
|
||||
return ex["open"] <= decimal < ex["close"]
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def build_country_risk_payload() -> dict[str, Any]:
|
||||
quake_risks: dict[str, float] = {}
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_day.geojson",
|
||||
timeout=5,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
for f in resp.json().get("features") or []:
|
||||
place = (f.get("properties") or {}).get("place") or ""
|
||||
mag = (f.get("properties") or {}).get("mag") or 0
|
||||
for code in RISK_FACTORS:
|
||||
if code.lower() in place.lower():
|
||||
quake_risks[code] = quake_risks.get(code, 0) + mag
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
countries = []
|
||||
for code, data in RISK_FACTORS.items():
|
||||
base = data["base"]
|
||||
score = min(100, base + quake_risks.get(code, 0))
|
||||
countries.append(
|
||||
{
|
||||
"code": code,
|
||||
"risk_score": score,
|
||||
"risk_level": "CRITICAL" if base >= 80 else "HIGH" if base >= 60 else "ELEVATED" if base >= 40 else "LOW",
|
||||
"tags": data["tags"],
|
||||
}
|
||||
)
|
||||
countries.sort(key=lambda c: c["risk_score"], reverse=True)
|
||||
exchanges = [{"name": e["name"], "country": e["country"], "open": _exchange_open(e)} for e in EXCHANGES]
|
||||
return {
|
||||
"countries": countries,
|
||||
"exchanges": exchanges,
|
||||
"open_exchanges": sum(1 for e in exchanges if e["open"]),
|
||||
"total_exchanges": len(exchanges),
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
"""Operator-initiated OSINT lookups (server-side proxies)."""
|
||||
@@ -0,0 +1,492 @@
|
||||
"""Server-side OSINT lookups (Osiris port, HTTPS outbound only)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import socket
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
from urllib.parse import quote
|
||||
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.sanctions.ofac import match_exact, search_sanctions
|
||||
from services.ssrf_guard import safe_get, validate_domain, validate_host
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_IPV4_RE = re.compile(r"^(\d{1,3}\.){3}\d{1,3}$")
|
||||
_IPV6_RE = re.compile(r"^[0-9a-fA-F:]+$")
|
||||
_CVE_RE = re.compile(r"^CVE-\d{4}-\d{4,}$", re.I)
|
||||
_ASN_RE = re.compile(r"^(AS)?\d+$", re.I)
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _json_get(url: str, *, timeout: float = 8.0, headers: dict[str, str] | None = None) -> Any:
|
||||
resp = fetch_with_curl(url, timeout=timeout, headers=headers or {"Accept": "application/json"})
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
try:
|
||||
return resp.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _sanctions_hits(*values: str) -> list[dict[str, Any]] | None:
|
||||
hits: list[dict[str, Any]] = []
|
||||
seen: set[str] = set()
|
||||
for value in values:
|
||||
if not value or value in seen:
|
||||
continue
|
||||
seen.add(value)
|
||||
entries = match_exact(value)
|
||||
if entries:
|
||||
hits.append({"matched_value": value, "entries": entries})
|
||||
return hits or None
|
||||
|
||||
|
||||
def lookup_ip(ip: str) -> dict[str, Any]:
|
||||
if not _IPV4_RE.match(ip) and not _IPV6_RE.match(ip):
|
||||
raise ValueError("Invalid IP format")
|
||||
check = validate_host(ip.strip("[]"))
|
||||
if not check.get("ok"):
|
||||
raise ValueError(check.get("reason", "blocked IP"))
|
||||
|
||||
results: dict[str, Any] = {"ip": ip, "timestamp": _now_iso()}
|
||||
fields = (
|
||||
"status,message,continent,country,countryCode,region,regionName,city,zip,"
|
||||
"lat,lon,timezone,isp,org,as,asname,mobile,proxy,hosting,query"
|
||||
)
|
||||
geo = _json_get(f"https://ip-api.com/json/{quote(ip)}?fields={fields}", timeout=5)
|
||||
if isinstance(geo, dict) and geo.get("status") == "success":
|
||||
results["geo"] = {
|
||||
"country": geo.get("country"),
|
||||
"country_code": geo.get("countryCode"),
|
||||
"region": geo.get("regionName"),
|
||||
"city": geo.get("city"),
|
||||
"lat": geo.get("lat"),
|
||||
"lon": geo.get("lon"),
|
||||
"timezone": geo.get("timezone"),
|
||||
"isp": geo.get("isp"),
|
||||
"org": geo.get("org"),
|
||||
"as_number": geo.get("as"),
|
||||
"as_name": geo.get("asname"),
|
||||
"is_mobile": geo.get("mobile"),
|
||||
"is_proxy": geo.get("proxy"),
|
||||
"is_hosting": geo.get("hosting"),
|
||||
}
|
||||
results["reputation"] = {
|
||||
"is_proxy": bool(geo.get("proxy")),
|
||||
"is_hosting": bool(geo.get("hosting")),
|
||||
"is_mobile": bool(geo.get("mobile")),
|
||||
"risk_level": "HIGH" if geo.get("proxy") else "MEDIUM" if geo.get("hosting") else "LOW",
|
||||
}
|
||||
sm = _sanctions_hits(geo.get("org") or "", geo.get("isp") or "", geo.get("asname") or "")
|
||||
if sm:
|
||||
results["sanctions_match"] = {"source": "OFAC SDN", "hits": sm}
|
||||
return results
|
||||
|
||||
|
||||
def lookup_dns(domain: str) -> dict[str, Any]:
|
||||
if not validate_domain(domain):
|
||||
raise ValueError("Invalid domain format")
|
||||
results: dict[str, Any] = {"domain": domain, "records": {}, "timestamp": _now_iso()}
|
||||
for rtype in ("A", "AAAA", "MX", "NS", "TXT", "CNAME", "SOA"):
|
||||
data = _json_get(
|
||||
f"https://dns.google/resolve?name={quote(domain)}&type={rtype}",
|
||||
timeout=5,
|
||||
)
|
||||
answers = []
|
||||
if isinstance(data, dict):
|
||||
for ans in data.get("Answer") or []:
|
||||
answers.append(
|
||||
{
|
||||
"name": ans.get("name"),
|
||||
"type": ans.get("type"),
|
||||
"ttl": ans.get("TTL"),
|
||||
"data": ans.get("data"),
|
||||
}
|
||||
)
|
||||
results["records"][rtype] = answers
|
||||
a_records = results["records"].get("A") or []
|
||||
mx_records = results["records"].get("MX") or []
|
||||
ns_records = results["records"].get("NS") or []
|
||||
results["summary"] = {
|
||||
"ip_addresses": [r["data"] for r in a_records if r.get("data")],
|
||||
"mail_servers": [r["data"] for r in mx_records if r.get("data")],
|
||||
"nameservers": [r["data"] for r in ns_records if r.get("data")],
|
||||
"total_records": sum(len(v) for v in results["records"].values()),
|
||||
}
|
||||
return results
|
||||
|
||||
|
||||
def lookup_whois(domain: str) -> dict[str, Any]:
|
||||
if not validate_domain(domain):
|
||||
raise ValueError("Invalid domain format")
|
||||
results: dict[str, Any] = {"domain": domain, "timestamp": _now_iso()}
|
||||
rdap = _json_get(f"https://rdap.org/domain/{quote(domain)}", timeout=8)
|
||||
if isinstance(rdap, dict):
|
||||
entities = []
|
||||
for ent in rdap.get("entities") or []:
|
||||
vcard = ent.get("vcardArray")
|
||||
name = org = None
|
||||
if isinstance(vcard, list) and len(vcard) > 1:
|
||||
for row in vcard[1]:
|
||||
if row[0] == "fn":
|
||||
name = row[3]
|
||||
if row[0] == "org":
|
||||
org = row[3]
|
||||
if name or org:
|
||||
entities.append({"handle": ent.get("handle"), "roles": ent.get("roles"), "name": name, "org": org})
|
||||
events = [
|
||||
{"action": e.get("eventAction"), "date": e.get("eventDate")}
|
||||
for e in (rdap.get("events") or [])
|
||||
]
|
||||
results["rdap"] = {
|
||||
"handle": rdap.get("handle"),
|
||||
"name": rdap.get("ldhName"),
|
||||
"status": rdap.get("status"),
|
||||
"events": events,
|
||||
"nameservers": [ns.get("ldhName") for ns in (rdap.get("nameservers") or [])],
|
||||
"entities": entities,
|
||||
}
|
||||
results["registration"] = next((e["date"] for e in events if e["action"] == "registration"), None)
|
||||
results["expiration"] = next((e["date"] for e in events if e["action"] == "expiration"), None)
|
||||
results["last_changed"] = next((e["date"] for e in events if e["action"] == "last changed"), None)
|
||||
sm = _sanctions_hits(*(e.get("name") or "" for e in entities), *(e.get("org") or "" for e in entities))
|
||||
if sm:
|
||||
results["sanctions_match"] = {"source": "OFAC SDN", "hits": sm}
|
||||
|
||||
try:
|
||||
res = safe_get(f"https://{domain}", timeout=5, headers={"User-Agent": "Shadowbroker-OSINT/1.0"})
|
||||
headers = {}
|
||||
for h in (
|
||||
"server",
|
||||
"x-powered-by",
|
||||
"x-frame-options",
|
||||
"strict-transport-security",
|
||||
"content-security-policy",
|
||||
"x-content-type-options",
|
||||
"x-xss-protection",
|
||||
"referrer-policy",
|
||||
"permissions-policy",
|
||||
):
|
||||
val = res.headers.get(h)
|
||||
if val:
|
||||
headers[h] = val
|
||||
score = sum(
|
||||
1
|
||||
for k in (
|
||||
"strict-transport-security",
|
||||
"content-security-policy",
|
||||
"x-frame-options",
|
||||
"x-content-type-options",
|
||||
"referrer-policy",
|
||||
)
|
||||
if k in headers
|
||||
) + (2 if "strict-transport-security" in headers else 0) + (2 if "content-security-policy" in headers else 0)
|
||||
results["http"] = {"status": res.status_code, "headers": headers, "final_url": res.url}
|
||||
results["security_score"] = {
|
||||
"score": score,
|
||||
"max": 7,
|
||||
"grade": "A" if score >= 5 else "B" if score >= 3 else "C" if score >= 1 else "F",
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.debug("WHOIS header probe failed for %s: %s", domain, exc)
|
||||
return results
|
||||
|
||||
|
||||
def lookup_certs(domain: str) -> dict[str, Any]:
|
||||
if not validate_domain(domain):
|
||||
raise ValueError("Invalid domain format")
|
||||
resp = fetch_with_curl(
|
||||
f"https://crt.sh/?q=%25.{quote(domain)}&output=json",
|
||||
timeout=10,
|
||||
headers={"User-Agent": "Shadowbroker-OSINT/1.0"},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return {"domain": domain, "certificates": [], "error": "crt.sh unavailable"}
|
||||
try:
|
||||
certs = resp.json()
|
||||
except Exception:
|
||||
certs = []
|
||||
seen: set[str] = set()
|
||||
subdomains: set[str] = set()
|
||||
unique: list[dict[str, Any]] = []
|
||||
for cert in (certs or [])[:200]:
|
||||
key = f"{cert.get('common_name')}-{cert.get('serial_number')}"
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
for name in (cert.get("name_value") or "").split("\n"):
|
||||
clean = name.strip().replace("*.", "")
|
||||
if clean.endswith(domain):
|
||||
subdomains.add(clean)
|
||||
unique.append(
|
||||
{
|
||||
"id": cert.get("id"),
|
||||
"issuer": cert.get("issuer_name"),
|
||||
"common_name": cert.get("common_name"),
|
||||
"not_before": cert.get("not_before"),
|
||||
"not_after": cert.get("not_after"),
|
||||
}
|
||||
)
|
||||
return {
|
||||
"domain": domain,
|
||||
"certificates": unique[:50],
|
||||
"subdomains": sorted(subdomains)[:100],
|
||||
"total_found": len(certs or []),
|
||||
"timestamp": _now_iso(),
|
||||
}
|
||||
|
||||
|
||||
def lookup_threats(query: str | None = None) -> dict[str, Any]:
|
||||
results: dict[str, Any] = {"timestamp": _now_iso()}
|
||||
pulses = _json_get("https://otx.alienvault.com/api/v1/pulses/activity?limit=10", timeout=8)
|
||||
if isinstance(pulses, dict):
|
||||
results["pulses"] = [
|
||||
{
|
||||
"name": p.get("name"),
|
||||
"description": (p.get("description") or "")[:200],
|
||||
"created": p.get("created"),
|
||||
"tags": (p.get("tags") or [])[:5],
|
||||
"adversary": p.get("adversary"),
|
||||
"indicators_count": p.get("indicator_count"),
|
||||
}
|
||||
for p in (pulses.get("results") or [])[:10]
|
||||
]
|
||||
if query:
|
||||
if _IPV4_RE.match(query):
|
||||
try:
|
||||
tor_resp = fetch_with_curl("https://check.torproject.org/torbulkexitlist", timeout=5)
|
||||
results["tor_exit_node"] = query in (tor_resp.text or "").splitlines() if tor_resp.status_code == 200 else None
|
||||
except Exception:
|
||||
results["tor_exit_node"] = None
|
||||
otx = _json_get(f"https://otx.alienvault.com/api/v1/indicators/IPv4/{quote(query)}/general", timeout=5)
|
||||
if isinstance(otx, dict):
|
||||
results["otx"] = {
|
||||
"reputation": otx.get("reputation"),
|
||||
"pulse_count": (otx.get("pulse_info") or {}).get("count", 0),
|
||||
"country": otx.get("country_name"),
|
||||
"asn": otx.get("asn"),
|
||||
}
|
||||
elif validate_domain(query):
|
||||
otx = _json_get(f"https://otx.alienvault.com/api/v1/indicators/domain/{quote(query)}/general", timeout=5)
|
||||
if isinstance(otx, dict):
|
||||
results["otx"] = {"pulse_count": (otx.get("pulse_info") or {}).get("count", 0)}
|
||||
pulse_count = (results.get("otx") or {}).get("pulse_count", 0)
|
||||
results["threat_level"] = "HIGH" if pulse_count > 5 else "MEDIUM" if pulse_count > 0 else "LOW"
|
||||
return results
|
||||
|
||||
|
||||
def lookup_bgp(query: str) -> dict[str, Any]:
|
||||
results: dict[str, Any] = {"query": query, "timestamp": _now_iso()}
|
||||
if _IPV4_RE.match(query):
|
||||
data = _json_get(f"https://api.bgpview.io/ip/{quote(query)}", timeout=8)
|
||||
if isinstance(data, dict) and data.get("status") == "ok":
|
||||
results["ip"] = data.get("data")
|
||||
results["type"] = "ip"
|
||||
return results
|
||||
if _ASN_RE.match(query):
|
||||
asn_num = re.sub(r"^AS", "", query, flags=re.I)
|
||||
asn = _json_get(f"https://api.bgpview.io/asn/{asn_num}", timeout=8)
|
||||
prefixes = _json_get(f"https://api.bgpview.io/asn/{asn_num}/prefixes", timeout=8)
|
||||
peers = _json_get(f"https://api.bgpview.io/asn/{asn_num}/peers", timeout=8)
|
||||
if isinstance(asn, dict) and asn.get("status") == "ok":
|
||||
results["asn"] = asn.get("data")
|
||||
if isinstance(prefixes, dict) and prefixes.get("status") == "ok":
|
||||
pdata = prefixes.get("data") or {}
|
||||
results["prefixes"] = {
|
||||
"ipv4": (pdata.get("ipv4_prefixes") or [])[:20],
|
||||
"ipv6": (pdata.get("ipv6_prefixes") or [])[:10],
|
||||
"total_v4": len(pdata.get("ipv4_prefixes") or []),
|
||||
"total_v6": len(pdata.get("ipv6_prefixes") or []),
|
||||
}
|
||||
if isinstance(peers, dict) and peers.get("status") == "ok":
|
||||
pdata = peers.get("data") or {}
|
||||
results["peers"] = {
|
||||
"upstream": (pdata.get("ipv4_peers") or [])[:10],
|
||||
"total": len(pdata.get("ipv4_peers") or []),
|
||||
}
|
||||
results["type"] = "asn"
|
||||
return results
|
||||
raise ValueError("Unrecognized query format. Use IP address or AS number.")
|
||||
|
||||
|
||||
def lookup_sanctions(query: str, *, schema: str | None = None, limit: int = 25) -> dict[str, Any]:
|
||||
matches = search_sanctions(query, schema=schema, limit=limit)
|
||||
return {
|
||||
"query": query,
|
||||
"schema": schema,
|
||||
"total": len(matches),
|
||||
"matches": matches,
|
||||
"source": "OpenSanctions / US OFAC SDN",
|
||||
"timestamp": _now_iso(),
|
||||
}
|
||||
|
||||
|
||||
def lookup_cve(cve: str) -> dict[str, Any]:
|
||||
if not _CVE_RE.match(cve):
|
||||
raise ValueError("Invalid CVE format")
|
||||
cve_id = cve.upper()
|
||||
data = _json_get(f"https://cveawg.mitre.org/api/cve/{quote(cve_id)}", timeout=8)
|
||||
if isinstance(data, dict) and data.get("cveMetadata"):
|
||||
meta = data["cveMetadata"]
|
||||
desc = ""
|
||||
for block in (data.get("containers") or {}).get("cna", {}).get("descriptions") or []:
|
||||
if block.get("lang") == "en":
|
||||
desc = block.get("value") or desc
|
||||
return {"id": meta.get("cveId", cve_id), "description": desc or "No description.", "timestamp": _now_iso()}
|
||||
fallback = _json_get(f"https://cve.circl.lu/api/cve/{quote(cve_id)}", timeout=8)
|
||||
if isinstance(fallback, dict):
|
||||
return {
|
||||
"id": fallback.get("id", cve_id),
|
||||
"description": fallback.get("summary") or "No description.",
|
||||
"cvss": fallback.get("cvss"),
|
||||
"references": (fallback.get("references") or [])[:5],
|
||||
"timestamp": _now_iso(),
|
||||
}
|
||||
raise ValueError("CVE not found")
|
||||
|
||||
|
||||
def lookup_mac(mac: str) -> dict[str, Any]:
|
||||
clean = mac.strip().upper()
|
||||
clean = re.sub(r"[^A-F0-9:-]", "", clean)
|
||||
data = _json_get(f"https://api.macvendors.com/{quote(clean)}", timeout=8)
|
||||
if isinstance(data, dict):
|
||||
return {"mac": clean, "vendor": data.get("company") or data.get("organization") or "Not Found"}
|
||||
if isinstance(data, str) and data:
|
||||
return {"mac": clean, "vendor": data}
|
||||
return {"mac": clean, "vendor": "Not Found"}
|
||||
|
||||
|
||||
def lookup_github(username: str) -> dict[str, Any]:
|
||||
user = _json_get(f"https://api.github.com/users/{quote(username)}", timeout=8)
|
||||
if not isinstance(user, dict) or user.get("message") == "Not Found":
|
||||
raise ValueError("GitHub user not found")
|
||||
repos = _json_get(f"https://api.github.com/users/{quote(username)}/repos?per_page=10&sort=updated", timeout=8)
|
||||
return {
|
||||
"username": username,
|
||||
"profile": {
|
||||
"name": user.get("name"),
|
||||
"bio": user.get("bio"),
|
||||
"company": user.get("company"),
|
||||
"location": user.get("location"),
|
||||
"public_repos": user.get("public_repos"),
|
||||
"followers": user.get("followers"),
|
||||
"created_at": user.get("created_at"),
|
||||
"html_url": user.get("html_url"),
|
||||
},
|
||||
"repos": [
|
||||
{"name": r.get("name"), "language": r.get("language"), "stars": r.get("stargazers_count")}
|
||||
for r in (repos or [])[:10]
|
||||
if isinstance(r, dict)
|
||||
],
|
||||
"timestamp": _now_iso(),
|
||||
}
|
||||
|
||||
|
||||
def lookup_leaks(email: str) -> dict[str, Any]:
|
||||
if "@" not in email or len(email) < 5:
|
||||
raise ValueError("Invalid email")
|
||||
# HIBP requires API key for v3; use public breach directory style via leak-lookup (rate limited)
|
||||
data = _json_get(f"https://leakcheck.io/api/public?check={quote(email)}", timeout=8)
|
||||
if isinstance(data, dict):
|
||||
return {
|
||||
"email": email,
|
||||
"found": bool(data.get("found")),
|
||||
"sources": data.get("sources") or [],
|
||||
"timestamp": _now_iso(),
|
||||
}
|
||||
return {"email": email, "found": False, "sources": [], "timestamp": _now_iso()}
|
||||
|
||||
|
||||
def sweep_init(ip: str, cidr: int = 24) -> dict[str, Any]:
|
||||
try:
|
||||
addr = ipaddress.IPv4Address(ip)
|
||||
except ValueError as exc:
|
||||
raise ValueError("Invalid IPv4 address format") from exc
|
||||
if addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_reserved:
|
||||
raise ValueError("Private and reserved IP ranges are not allowed")
|
||||
if cidr < 24 or cidr > 32:
|
||||
raise ValueError("CIDR must be between 24 and 32")
|
||||
|
||||
fields = "status,message,country,countryCode,region,regionName,city,lat,lon,isp,org,as,proxy,hosting"
|
||||
geo = _json_get(f"https://ip-api.com/json/{quote(ip)}?fields={fields}", timeout=5)
|
||||
if not isinstance(geo, dict) or geo.get("status") != "success":
|
||||
raise ValueError(f"Geolocation failed: {(geo or {}).get('message', 'unknown')}")
|
||||
return {
|
||||
"center": {
|
||||
"lat": geo.get("lat"),
|
||||
"lng": geo.get("lon"),
|
||||
"city": geo.get("city"),
|
||||
"region": geo.get("regionName"),
|
||||
"country": geo.get("country"),
|
||||
"countryCode": geo.get("countryCode"),
|
||||
"isp": geo.get("isp"),
|
||||
"asn": geo.get("as") or "",
|
||||
"org": geo.get("org") or "",
|
||||
},
|
||||
"target_ip": ip,
|
||||
"cidr": cidr,
|
||||
}
|
||||
|
||||
|
||||
def _internetdb_lookup(ip: str) -> dict[str, Any] | None:
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
f"https://internetdb.shodan.io/{quote(ip)}",
|
||||
timeout=4,
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
if resp.status_code == 404:
|
||||
return None
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
return resp.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def sweep_scan(subnet_start: str, cidr: int, *, max_workers: int = 12) -> dict[str, Any]:
|
||||
"""Scan a /24-/32 via Shodan InternetDB (server-side proxy)."""
|
||||
base = int(ipaddress.IPv4Address(subnet_start))
|
||||
host_count = 2 ** (32 - cidr)
|
||||
if host_count > 256:
|
||||
raise ValueError("Subnet too large")
|
||||
ips = [str(ipaddress.IPv4Address(base + i)) for i in range(host_count)]
|
||||
devices: list[dict[str, Any]] = []
|
||||
t0 = time.time()
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
||||
futures = {pool.submit(_internetdb_lookup, ip): ip for ip in ips}
|
||||
for fut in as_completed(futures):
|
||||
ip = futures[fut]
|
||||
data = fut.result()
|
||||
if not data:
|
||||
continue
|
||||
devices.append(
|
||||
{
|
||||
"ip": data.get("ip") or ip,
|
||||
"ports": data.get("ports") or [],
|
||||
"hostnames": data.get("hostnames") or [],
|
||||
"cpes": data.get("cpes") or [],
|
||||
"vulns": data.get("vulns") or [],
|
||||
"tags": data.get("tags") or [],
|
||||
}
|
||||
)
|
||||
return {
|
||||
"devices": devices,
|
||||
"summary": {"total_hosts": host_count, "total_responsive": len(devices)},
|
||||
"sweep_time_ms": int((time.time() - t0) * 1000),
|
||||
}
|
||||
|
||||
|
||||
def subnet_start_for(ip: str, cidr: int) -> str:
|
||||
net = ipaddress.IPv4Network(f"{ip}/{cidr}", strict=False)
|
||||
return str(net.network_address)
|
||||
@@ -0,0 +1 @@
|
||||
"""Entity graph resolution (Osiris intel layer port)."""
|
||||
@@ -0,0 +1,268 @@
|
||||
"""Entity graph resolver (Python port of Osiris intel/server.js)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
from typing import Any
|
||||
from urllib.parse import quote
|
||||
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.sanctions.ofac import match_exact, search_sanctions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ALLOWED_TYPES = frozenset({"aircraft", "vessel", "company", "person", "ip", "country"})
|
||||
_WD_CACHE: dict[str, tuple[float, dict[str, Any]]] = {}
|
||||
_WD_LOCK = threading.Lock()
|
||||
_WD_TTL = 24 * 60 * 60
|
||||
_WD_UA = "Shadowbroker-Intel/1.0 (ontology engine)"
|
||||
|
||||
|
||||
def _dedup(nodes: list[dict], links: list[dict]) -> dict[str, Any]:
|
||||
node_map: dict[str, dict] = {}
|
||||
for n in nodes:
|
||||
node_map[n["id"]] = n
|
||||
seen_links: set[str] = set()
|
||||
out_links: list[dict] = []
|
||||
for link in links:
|
||||
key = f"{link['source']}→{link['target']}→{link['label']}"
|
||||
if key in seen_links:
|
||||
continue
|
||||
seen_links.add(key)
|
||||
out_links.append(link)
|
||||
return {"nodes": list(node_map.values()), "links": out_links}
|
||||
|
||||
|
||||
def _wd_cache_get(key: str) -> dict[str, Any] | None:
|
||||
with _WD_LOCK:
|
||||
entry = _WD_CACHE.get(key)
|
||||
if not entry:
|
||||
return None
|
||||
ts, data = entry
|
||||
if time.time() - ts > _WD_TTL:
|
||||
_WD_CACHE.pop(key, None)
|
||||
return None
|
||||
return data
|
||||
|
||||
|
||||
def _wd_cache_set(key: str, data: dict[str, Any]) -> None:
|
||||
with _WD_LOCK:
|
||||
if len(_WD_CACHE) > 5000:
|
||||
oldest = next(iter(_WD_CACHE))
|
||||
_WD_CACHE.pop(oldest, None)
|
||||
_WD_CACHE[key] = (time.time(), data)
|
||||
|
||||
|
||||
def _add_sanctions(id_label: str, root_id: str, nodes: list, links: list) -> None:
|
||||
for hit in search_sanctions(id_label, limit=3):
|
||||
sid = f"sanction:{hit['id']}"
|
||||
nodes.append(
|
||||
{
|
||||
"id": sid,
|
||||
"label": hit["name"],
|
||||
"type": "sanction",
|
||||
"properties": {"programs": hit.get("programs"), "source": "OFAC SDN"},
|
||||
}
|
||||
)
|
||||
links.append({"source": root_id, "target": sid, "label": "SANCTIONS MATCH"})
|
||||
|
||||
|
||||
def _sparql(query: str) -> list[dict[str, Any]]:
|
||||
url = f"https://query.wikidata.org/sparql?query={quote(query)}&format=json"
|
||||
resp = fetch_with_curl(url, timeout=10, headers={"User-Agent": _WD_UA, "Accept": "application/sparql-results+json"})
|
||||
if resp.status_code != 200:
|
||||
return []
|
||||
try:
|
||||
data = resp.json()
|
||||
except Exception:
|
||||
return []
|
||||
return data.get("results", {}).get("bindings", [])
|
||||
|
||||
|
||||
def _wd_search(label: str) -> str | None:
|
||||
url = (
|
||||
"https://www.wikidata.org/w/api.php?action=wbsearchentities"
|
||||
f"&search={quote(label)}&language=en&limit=1&format=json"
|
||||
)
|
||||
resp = fetch_with_curl(url, timeout=5, headers={"User-Agent": _WD_UA})
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
try:
|
||||
hits = resp.json().get("search") or []
|
||||
except Exception:
|
||||
return None
|
||||
return hits[0]["id"] if hits else None
|
||||
|
||||
|
||||
def _resolve_ip(id_value: str) -> dict[str, Any]:
|
||||
cache_key = f"ip:{id_value}"
|
||||
cached = _wd_cache_get(cache_key)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
root_id = f"ip:{id_value}"
|
||||
nodes: list[dict] = [{"id": root_id, "label": id_value, "type": "ip", "properties": {}}]
|
||||
links: list[dict] = []
|
||||
|
||||
geo = fetch_with_curl(
|
||||
f"https://ip-api.com/json/{quote(id_value)}"
|
||||
"?fields=status,country,countryCode,city,lat,lon,isp,org,as,asname,proxy,hosting,mobile",
|
||||
timeout=8,
|
||||
)
|
||||
if geo.status_code == 200:
|
||||
try:
|
||||
data = geo.json()
|
||||
except Exception:
|
||||
data = {}
|
||||
if data.get("status") == "success":
|
||||
nodes[0]["properties"] = {
|
||||
"proxy": bool(data.get("proxy")),
|
||||
"hosting": bool(data.get("hosting")),
|
||||
"mobile": bool(data.get("mobile")),
|
||||
"source": "ip-api.com",
|
||||
}
|
||||
if data.get("isp"):
|
||||
iid = f"company:{data['isp']}"
|
||||
nodes.append({"id": iid, "label": data["isp"], "type": "company", "properties": {"role": "ISP"}})
|
||||
links.append({"source": root_id, "target": iid, "label": "HOSTED_BY"})
|
||||
if data.get("country"):
|
||||
cid = f"country:{data['country']}"
|
||||
nodes.append(
|
||||
{
|
||||
"id": cid,
|
||||
"label": data["country"],
|
||||
"type": "country",
|
||||
"properties": {"code": data.get("countryCode")},
|
||||
}
|
||||
)
|
||||
links.append({"source": root_id, "target": cid, "label": "LOCATED_IN"})
|
||||
for val in (data.get("isp"), data.get("org"), data.get("asname")):
|
||||
if val:
|
||||
for entry in match_exact(val):
|
||||
sid = f"sanction:{entry['id']}"
|
||||
nodes.append({"id": sid, "label": entry["name"], "type": "sanction", "properties": {}})
|
||||
links.append({"source": root_id, "target": sid, "label": "SANCTIONS MATCH"})
|
||||
|
||||
whois = fetch_with_curl(
|
||||
f"https://stat.ripe.net/data/whois/data.json?resource={quote(id_value)}",
|
||||
timeout=8,
|
||||
)
|
||||
if whois.status_code == 200:
|
||||
try:
|
||||
records = whois.json().get("data", {}).get("records") or []
|
||||
except Exception:
|
||||
records = []
|
||||
for record in records:
|
||||
for field in record:
|
||||
if field.get("key") in ("netname", "NetName"):
|
||||
nid = f"company:{field['value']}"
|
||||
nodes.append({"id": nid, "label": field["value"], "type": "company", "properties": {"role": "Network"}})
|
||||
links.append({"source": root_id, "target": nid, "label": "HOSTED_BY"})
|
||||
|
||||
result = _dedup(nodes, links)
|
||||
_wd_cache_set(cache_key, result)
|
||||
return result
|
||||
|
||||
|
||||
def _resolve_company(id_value: str) -> dict[str, Any]:
|
||||
cache_key = f"company:{id_value}"
|
||||
cached = _wd_cache_get(cache_key)
|
||||
if cached:
|
||||
return cached
|
||||
root_id = f"company:{id_value}"
|
||||
nodes = [{"id": root_id, "label": id_value, "type": "company", "properties": {}}]
|
||||
links: list[dict] = []
|
||||
safe = re.sub(r'[^a-zA-Z0-9 \-._]', '', id_value).strip()
|
||||
qid = _wd_search(safe)
|
||||
filt = f"VALUES ?item {{ wd:{qid} }}" if qid else f'?item rdfs:label "{safe}"@en . ?item wdt:P31/wdt:P279* wd:Q4830453 .'
|
||||
rows = _sparql(
|
||||
f"""
|
||||
SELECT ?countryLabel ?parentLabel ?ceoLabel WHERE {{
|
||||
{filt}
|
||||
OPTIONAL {{ ?item wdt:P17 ?country . }}
|
||||
OPTIONAL {{ ?item wdt:P749 ?parent . }}
|
||||
OPTIONAL {{ ?item wdt:P169 ?ceo . }}
|
||||
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }}
|
||||
}} LIMIT 10
|
||||
"""
|
||||
)
|
||||
for row in rows:
|
||||
if row.get("countryLabel", {}).get("value"):
|
||||
cid = f"country:{row['countryLabel']['value']}"
|
||||
nodes.append({"id": cid, "label": row["countryLabel"]["value"], "type": "country", "properties": {}})
|
||||
links.append({"source": root_id, "target": cid, "label": "HEADQUARTERED"})
|
||||
if row.get("parentLabel", {}).get("value"):
|
||||
pid = f"company:{row['parentLabel']['value']}"
|
||||
nodes.append({"id": pid, "label": row["parentLabel"]["value"], "type": "company", "properties": {}})
|
||||
links.append({"source": root_id, "target": pid, "label": "PARENT ORG"})
|
||||
if row.get("ceoLabel", {}).get("value"):
|
||||
pid = f"person:{row['ceoLabel']['value']}"
|
||||
nodes.append({"id": pid, "label": row["ceoLabel"]["value"], "type": "person", "properties": {"role": "CEO"}})
|
||||
links.append({"source": root_id, "target": pid, "label": "CEO"})
|
||||
_add_sanctions(id_value, root_id, nodes, links)
|
||||
result = _dedup(nodes, links)
|
||||
_wd_cache_set(cache_key, result)
|
||||
return result
|
||||
|
||||
|
||||
def _resolve_from_store(entity_type: str, id_value: str, props: dict[str, Any]) -> dict[str, Any]:
|
||||
from services.fetchers._store import get_latest_data_subset_refs
|
||||
|
||||
root_id = f"{entity_type}:{id_value}"
|
||||
nodes = [{"id": root_id, "label": props.get("label") or id_value, "type": entity_type, "properties": props}]
|
||||
links: list[dict] = []
|
||||
data = get_latest_data_subset_refs("flights", "ships", "military_flights", "tracked_flights")
|
||||
|
||||
if entity_type == "aircraft":
|
||||
icao = (props.get("icao24") or id_value).lower()
|
||||
for bucket in ("military_flights", "tracked_flights", "flights"):
|
||||
for f in data.get(bucket) or []:
|
||||
if str(f.get("icao24", "")).lower() == icao:
|
||||
if f.get("country"):
|
||||
cid = f"country:{f['country']}"
|
||||
nodes.append({"id": cid, "label": f["country"], "type": "country", "properties": {}})
|
||||
links.append({"source": root_id, "target": cid, "label": "REGISTERED_IN"})
|
||||
if f.get("registration"):
|
||||
nodes[0]["properties"]["registration"] = f["registration"]
|
||||
break
|
||||
elif entity_type == "vessel":
|
||||
mmsi = str(props.get("mmsi") or id_value)
|
||||
for ship in data.get("ships") or []:
|
||||
if str(ship.get("mmsi")) == mmsi:
|
||||
if ship.get("country"):
|
||||
cid = f"country:{ship['country']}"
|
||||
nodes.append({"id": cid, "label": ship["country"], "type": "country", "properties": {}})
|
||||
links.append({"source": root_id, "target": cid, "label": "FLAG"})
|
||||
break
|
||||
_add_sanctions(id_value, root_id, nodes, links)
|
||||
return _dedup(nodes, links)
|
||||
|
||||
|
||||
def resolve_entity(entity_type: str, id_value: str, properties: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||
etype = (entity_type or "").lower().strip()
|
||||
eid = (id_value or "").strip()
|
||||
if etype not in ALLOWED_TYPES:
|
||||
raise ValueError(f"Invalid type. Allowed: {', '.join(sorted(ALLOWED_TYPES))}")
|
||||
if len(eid) < 2 or len(eid) > 200:
|
||||
raise ValueError("Invalid id (2-200 chars)")
|
||||
props = properties or {}
|
||||
|
||||
if etype == "ip":
|
||||
return _resolve_ip(eid)
|
||||
if etype in ("company", "person", "country"):
|
||||
if etype == "company":
|
||||
return _resolve_company(eid)
|
||||
if etype == "person":
|
||||
root_id = f"person:{eid}"
|
||||
nodes = [{"id": root_id, "label": eid, "type": "person", "properties": {}}]
|
||||
links: list[dict] = []
|
||||
_add_sanctions(eid, root_id, nodes, links)
|
||||
return _dedup(nodes, links)
|
||||
root_id = f"country:{eid}"
|
||||
nodes = [{"id": root_id, "label": eid, "type": "country", "properties": {}}]
|
||||
links = []
|
||||
_add_sanctions(eid, root_id, nodes, links)
|
||||
return _dedup(nodes, links)
|
||||
return _resolve_from_store(etype, eid, props)
|
||||
@@ -0,0 +1 @@
|
||||
"""Sanctions screening (OpenSanctions OFAC SDN)."""
|
||||
@@ -0,0 +1,154 @@
|
||||
"""OFAC SDN index via OpenSanctions (adapted from Osiris sanctions.ts)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import io
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SDN_CSV_URL = "https://data.opensanctions.org/datasets/latest/us_ofac_sdn/targets.simple.csv"
|
||||
TTL_S = 24 * 60 * 60
|
||||
|
||||
_lock = threading.Lock()
|
||||
_cache: dict[str, Any] | None = None
|
||||
_cache_at: float = 0.0
|
||||
_inflight: threading.Event | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SanctionEntry:
|
||||
id: str
|
||||
schema: str
|
||||
name: str
|
||||
aliases: list[str] = field(default_factory=list)
|
||||
countries: list[str] = field(default_factory=list)
|
||||
programs: list[str] = field(default_factory=list)
|
||||
sanctions: str = ""
|
||||
first_seen: str | None = None
|
||||
last_seen: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"id": self.id,
|
||||
"schema": self.schema,
|
||||
"name": self.name,
|
||||
"aliases": self.aliases,
|
||||
"countries": self.countries,
|
||||
"programs": self.programs,
|
||||
"sanctions": self.sanctions,
|
||||
"first_seen": self.first_seen,
|
||||
"last_seen": self.last_seen,
|
||||
}
|
||||
|
||||
|
||||
def norm_name(s: str) -> str:
|
||||
s = re.sub(r"[^\w\s]+", " ", s.lower(), flags=re.UNICODE)
|
||||
return re.sub(r"\s+", " ", s).strip()
|
||||
|
||||
|
||||
def _split_semi(val: str) -> list[str]:
|
||||
return [x.strip() for x in (val or "").split(";") if x.strip()]
|
||||
|
||||
|
||||
def _load_list() -> dict[str, Any]:
|
||||
global _cache, _cache_at
|
||||
with _lock:
|
||||
if _cache and (time.time() - _cache_at) < TTL_S:
|
||||
return _cache
|
||||
|
||||
try:
|
||||
resp = fetch_with_curl(SDN_CSV_URL, timeout=45, headers={"Accept": "text/csv"})
|
||||
if resp.status_code != 200:
|
||||
raise RuntimeError(f"OpenSanctions HTTP {resp.status_code}")
|
||||
text = resp.text
|
||||
reader = csv.DictReader(io.StringIO(text))
|
||||
entries: list[SanctionEntry] = []
|
||||
by_norm: dict[str, list[SanctionEntry]] = {}
|
||||
for row in reader:
|
||||
name = (row.get("name") or "").strip()
|
||||
if not name:
|
||||
continue
|
||||
entry = SanctionEntry(
|
||||
id=row.get("id") or "",
|
||||
schema=row.get("schema") or "LegalEntity",
|
||||
name=name,
|
||||
aliases=_split_semi(row.get("aliases") or ""),
|
||||
countries=_split_semi(row.get("countries") or ""),
|
||||
programs=_split_semi(row.get("program_ids") or ""),
|
||||
sanctions=row.get("sanctions") or "",
|
||||
first_seen=row.get("first_seen") or None,
|
||||
last_seen=row.get("last_seen") or None,
|
||||
)
|
||||
entries.append(entry)
|
||||
for key in {norm_name(name), *(norm_name(a) for a in entry.aliases)}:
|
||||
if not key:
|
||||
continue
|
||||
by_norm.setdefault(key, []).append(entry)
|
||||
loaded = {"entries": entries, "by_norm": by_norm, "fetched_at": time.time()}
|
||||
with _lock:
|
||||
_cache = loaded
|
||||
_cache_at = time.time()
|
||||
logger.info("OFAC SDN index loaded: %s entries", len(entries))
|
||||
return loaded
|
||||
except Exception as exc:
|
||||
logger.error("OFAC SDN load failed: %s", exc)
|
||||
with _lock:
|
||||
if _cache:
|
||||
return _cache
|
||||
raise
|
||||
|
||||
|
||||
def match_exact(query: str) -> list[dict[str, Any]]:
|
||||
if not query or len(query) < 3:
|
||||
return []
|
||||
data = _load_list()
|
||||
hits = data["by_norm"].get(norm_name(query), [])
|
||||
return [e.to_dict() for e in hits]
|
||||
|
||||
|
||||
def search_sanctions(query: str, *, schema: str | None = None, limit: int = 50) -> list[dict[str, Any]]:
|
||||
if not query or len(query) < 4:
|
||||
return []
|
||||
data = _load_list()
|
||||
q = norm_name(query)
|
||||
exact_name: list[SanctionEntry] = []
|
||||
exact_alias: list[SanctionEntry] = []
|
||||
sub_name: list[SanctionEntry] = []
|
||||
sub_alias: list[SanctionEntry] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def push(bucket: list[SanctionEntry], entry: SanctionEntry) -> None:
|
||||
if entry.id in seen:
|
||||
return
|
||||
if schema and entry.schema != schema:
|
||||
return
|
||||
seen.add(entry.id)
|
||||
bucket.append(entry)
|
||||
|
||||
for entry in data["entries"]:
|
||||
name_norm = norm_name(entry.name)
|
||||
if name_norm == q:
|
||||
push(exact_name, entry)
|
||||
elif any(norm_name(a) == q for a in entry.aliases):
|
||||
push(exact_alias, entry)
|
||||
elif q in name_norm:
|
||||
push(sub_name, entry)
|
||||
elif any(q in norm_name(a) for a in entry.aliases):
|
||||
push(sub_alias, entry)
|
||||
if len(seen) >= limit * 4:
|
||||
break
|
||||
|
||||
ordered = exact_name + exact_alias + sub_name + sub_alias
|
||||
return [e.to_dict() for e in ordered[:limit]]
|
||||
|
||||
|
||||
def index_size() -> int:
|
||||
return len(_load_list()["entries"])
|
||||
@@ -0,0 +1 @@
|
||||
"""Supply-chain risk overlay."""
|
||||
@@ -0,0 +1,154 @@
|
||||
"""SCM supplier risk overlay (Osiris port, uses in-memory dashboard data)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, get_latest_data_subset_refs, is_any_active, latest_data
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
SUPPLIERS: list[dict[str, Any]] = [
|
||||
{"id": "sup-tsmc-hsinchu", "name": "TSMC Fab 12 (Tier 1)", "city": "Hsinchu", "country": "Taiwan", "lat": 24.774, "lng": 120.992, "category": "Semiconductor"},
|
||||
{"id": "sup-tsmc-tainan", "name": "TSMC Fab 14 (Tier 1)", "city": "Tainan", "country": "Taiwan", "lat": 23.111, "lng": 120.273, "category": "Semiconductor"},
|
||||
{"id": "sup-sec-giheung", "name": "Samsung Electronics (Tier 1)", "city": "Giheung", "country": "South Korea", "lat": 37.221, "lng": 127.098, "category": "Semiconductor"},
|
||||
{"id": "sup-sk-icheon", "name": "SK Hynix (Tier 1)", "city": "Icheon", "country": "South Korea", "lat": 37.256, "lng": 127.483, "category": "Semiconductor"},
|
||||
{"id": "sup-sony-kumamoto", "name": "Sony Semiconductor (Tier 2)", "city": "Kikuyo", "country": "Japan", "lat": 32.883, "lng": 130.825, "category": "Electronics"},
|
||||
{"id": "sup-mlcc-murata", "name": "Murata MLCC (Tier 2)", "city": "Izumo", "country": "Japan", "lat": 35.361, "lng": 132.756, "category": "Electronics"},
|
||||
{"id": "sup-bosch-stuttgart", "name": "Bosch Auto Parts (Tier 1)", "city": "Stuttgart", "country": "Germany", "lat": 48.815, "lng": 9.176, "category": "Automotive"},
|
||||
{"id": "sup-zf-bavaria", "name": "ZF Friedrichshafen (Tier 1)", "city": "Friedrichshafen", "country": "Germany", "lat": 47.662, "lng": 9.489, "category": "Automotive"},
|
||||
{"id": "sup-valeo-paris", "name": "Valeo R&D (Tier 2)", "city": "Paris", "country": "France", "lat": 48.878, "lng": 2.308, "category": "Automotive"},
|
||||
{"id": "sup-magna-celaya", "name": "Magna Assembly (Tier 2)", "city": "Celaya", "country": "Mexico", "lat": 20.525, "lng": -100.814, "category": "Automotive"},
|
||||
{"id": "sup-denso-monterrey", "name": "Denso Corp (Tier 1)", "city": "Monterrey", "country": "Mexico", "lat": 25.772, "lng": -100.174, "category": "Automotive"},
|
||||
{"id": "sup-catl-ningde", "name": "CATL Battery HQ (Tier 1)", "city": "Ningde", "country": "China", "lat": 26.666, "lng": 119.544, "category": "Battery"},
|
||||
{"id": "sup-byd-shenzhen", "name": "BYD Gigafactory (Tier 1)", "city": "Shenzhen", "country": "China", "lat": 22.684, "lng": 114.341, "category": "Battery"},
|
||||
{"id": "sup-panasonic-nevada", "name": "Panasonic Giga (Tier 1)", "city": "Sparks", "country": "US", "lat": 39.539, "lng": -119.439, "category": "Battery"},
|
||||
]
|
||||
|
||||
|
||||
def _distance_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float:
|
||||
dx = (lng1 - lng2) * math.cos(math.radians((lat1 + lat2) / 2))
|
||||
dy = lat1 - lat2
|
||||
return math.sqrt(dx * dx + dy * dy) * 111.32
|
||||
|
||||
|
||||
def _seismic_risk_level(distance_km: float, magnitude: float) -> str | None:
|
||||
"""Meaningful fab impact only — ignore routine micro-quakes (e.g. Taiwan M3.x)."""
|
||||
if magnitude < 4.5:
|
||||
return None
|
||||
if magnitude >= 6.0 and distance_km <= 200:
|
||||
return "CRITICAL"
|
||||
if magnitude >= 5.5 and distance_km <= 75:
|
||||
return "CRITICAL"
|
||||
if magnitude >= 5.0 and distance_km <= 100:
|
||||
return "HIGH"
|
||||
if magnitude >= 4.5 and distance_km <= 40:
|
||||
return "HIGH"
|
||||
return None
|
||||
|
||||
|
||||
def _apply_seismic_threats(suppliers: list[dict[str, Any]], earthquakes: list[dict[str, Any]]) -> None:
|
||||
for sup in suppliers:
|
||||
best: tuple[str, float] | None = None
|
||||
for eq in earthquakes:
|
||||
lat = eq.get("lat")
|
||||
lng = eq.get("lng") or eq.get("lon")
|
||||
mag = float(eq.get("mag") or eq.get("magnitude") or 0)
|
||||
if lat is None or lng is None or mag < 4.5:
|
||||
continue
|
||||
dist = _distance_km(sup["lat"], sup["lng"], float(lat), float(lng))
|
||||
level = _seismic_risk_level(dist, mag)
|
||||
if not level:
|
||||
continue
|
||||
severity = {"HIGH": 1, "CRITICAL": 2}
|
||||
if best is None:
|
||||
best = (level, mag)
|
||||
else:
|
||||
cur = severity[level]
|
||||
prev = severity[best[0]]
|
||||
if cur > prev or (cur == prev and mag > best[1]):
|
||||
best = (level, mag)
|
||||
if best:
|
||||
level, mag = best
|
||||
if sup["risk_level"] == "NORMAL" or (
|
||||
level == "CRITICAL" and sup["risk_level"] != "CRITICAL"
|
||||
):
|
||||
sup["risk_level"] = level
|
||||
elif level == "CRITICAL" and sup["risk_level"] == "HIGH":
|
||||
sup["risk_level"] = "CRITICAL"
|
||||
sup["active_threats"].append(f"SEISMIC PROXIMITY (M{mag:.1f})")
|
||||
|
||||
|
||||
def build_scm_payload() -> dict[str, Any]:
|
||||
suppliers = [{**s, "risk_level": "NORMAL", "active_threats": []} for s in SUPPLIERS]
|
||||
refs = get_latest_data_subset_refs("earthquakes", "firms_fires", "gdelt")
|
||||
|
||||
earthquakes = refs.get("earthquakes") or []
|
||||
_apply_seismic_threats(suppliers, earthquakes)
|
||||
|
||||
fires = refs.get("firms_fires") or []
|
||||
for sup in suppliers:
|
||||
count = 0
|
||||
for fire in fires:
|
||||
lat = fire.get("lat") or fire.get("latitude")
|
||||
lng = fire.get("lng") or fire.get("lon") or fire.get("longitude")
|
||||
if lat is None or lng is None:
|
||||
continue
|
||||
if _distance_km(sup["lat"], sup["lng"], float(lat), float(lng)) < 50:
|
||||
count += 1
|
||||
if count:
|
||||
if sup["risk_level"] == "NORMAL":
|
||||
sup["risk_level"] = "HIGH"
|
||||
sup["active_threats"].append(f"WILDFIRE PROXIMITY ({count} hotspots)")
|
||||
|
||||
conflicts = refs.get("gdelt") or []
|
||||
for sup in suppliers:
|
||||
for event in conflicts:
|
||||
lat = event.get("lat")
|
||||
lng = event.get("lng") or event.get("lon")
|
||||
if lat is None or lng is None:
|
||||
continue
|
||||
if _distance_km(sup["lat"], sup["lng"], float(lat), float(lng)) < 100:
|
||||
sup["risk_level"] = "CRITICAL"
|
||||
sup["active_threats"].append("ARMED CONFLICT / RIOT")
|
||||
break
|
||||
|
||||
# USGS fallback if earthquakes empty
|
||||
if not earthquakes:
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_day.geojson",
|
||||
timeout=5,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
features = resp.json().get("features") or []
|
||||
usgs_quakes = [
|
||||
{
|
||||
"lat": f.get("geometry", {}).get("coordinates", [None, None])[1],
|
||||
"lng": f.get("geometry", {}).get("coordinates", [None, None])[0],
|
||||
"mag": f.get("properties", {}).get("mag") or 0,
|
||||
}
|
||||
for f in features
|
||||
if len(f.get("geometry", {}).get("coordinates") or []) >= 2
|
||||
]
|
||||
_apply_seismic_threats(suppliers, usgs_quakes)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
critical = sum(1 for s in suppliers if s["risk_level"] == "CRITICAL")
|
||||
return {
|
||||
"suppliers": suppliers,
|
||||
"total": len(suppliers),
|
||||
"critical_count": critical,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
|
||||
def fetch_scm_suppliers() -> dict[str, Any]:
|
||||
if not is_any_active("scm_suppliers"):
|
||||
return latest_data.get("scm_suppliers") or {}
|
||||
payload = build_scm_payload()
|
||||
with _data_lock:
|
||||
latest_data["scm_suppliers"] = payload
|
||||
_mark_fresh("scm_suppliers")
|
||||
return payload
|
||||
@@ -0,0 +1,141 @@
|
||||
"""SSRF guard for operator-initiated recon (ported from Osiris ssrf-guard.ts)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import re
|
||||
import socket
|
||||
from typing import Any
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
import requests
|
||||
|
||||
_IPV4_BLOCKS = [
|
||||
ipaddress.ip_network("0.0.0.0/8"),
|
||||
ipaddress.ip_network("10.0.0.0/8"),
|
||||
ipaddress.ip_network("100.64.0.0/10"),
|
||||
ipaddress.ip_network("127.0.0.0/8"),
|
||||
ipaddress.ip_network("169.254.0.0/16"),
|
||||
ipaddress.ip_network("172.16.0.0/12"),
|
||||
ipaddress.ip_network("192.0.0.0/24"),
|
||||
ipaddress.ip_network("192.0.2.0/24"),
|
||||
ipaddress.ip_network("192.168.0.0/16"),
|
||||
ipaddress.ip_network("198.18.0.0/15"),
|
||||
ipaddress.ip_network("198.51.100.0/24"),
|
||||
ipaddress.ip_network("203.0.113.0/24"),
|
||||
ipaddress.ip_network("224.0.0.0/4"),
|
||||
ipaddress.ip_network("240.0.0.0/4"),
|
||||
]
|
||||
|
||||
_NAME_BLOCKLIST = (
|
||||
re.compile(r"^localhost$", re.I),
|
||||
re.compile(r"\.localhost$", re.I),
|
||||
re.compile(r"^host\.docker\.internal$", re.I),
|
||||
re.compile(r"\.local$", re.I),
|
||||
re.compile(r"\.internal$", re.I),
|
||||
re.compile(r"^metadata\.google\.internal$", re.I),
|
||||
)
|
||||
|
||||
_HOSTNAME_RE = re.compile(
|
||||
r"^[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?)*$"
|
||||
)
|
||||
|
||||
|
||||
def _ipv4_blocked(ip: str) -> bool:
|
||||
try:
|
||||
addr = ipaddress.ip_address(ip)
|
||||
except ValueError:
|
||||
return True
|
||||
if not isinstance(addr, ipaddress.IPv4Address):
|
||||
return False
|
||||
return any(addr in net for net in _IPV4_BLOCKS)
|
||||
|
||||
|
||||
def _ip_blocked(ip: str) -> bool:
|
||||
try:
|
||||
addr = ipaddress.ip_address(ip)
|
||||
except ValueError:
|
||||
return True
|
||||
if isinstance(addr, ipaddress.IPv4Address):
|
||||
return _ipv4_blocked(ip)
|
||||
return (
|
||||
addr.is_loopback
|
||||
or addr.is_private
|
||||
or addr.is_link_local
|
||||
or addr.is_multicast
|
||||
or addr.is_reserved
|
||||
or addr.is_unspecified
|
||||
)
|
||||
|
||||
|
||||
def validate_host(host: str) -> dict[str, Any]:
|
||||
trimmed = (host or "").strip()
|
||||
if not trimmed:
|
||||
return {"ok": False, "reason": "empty host"}
|
||||
bracketed = trimmed.strip("[]")
|
||||
lower = trimmed.lower()
|
||||
if any(p.search(lower) for p in _NAME_BLOCKLIST):
|
||||
return {"ok": False, "reason": "hostname matches reserved name pattern"}
|
||||
|
||||
try:
|
||||
ipaddress.ip_address(bracketed)
|
||||
is_literal = True
|
||||
except ValueError:
|
||||
is_literal = False
|
||||
|
||||
if is_literal:
|
||||
if _ip_blocked(bracketed):
|
||||
return {"ok": False, "reason": "IP in reserved range"}
|
||||
return {"ok": True, "resolved": [bracketed]}
|
||||
|
||||
if not _HOSTNAME_RE.match(trimmed):
|
||||
return {"ok": False, "reason": "invalid hostname syntax"}
|
||||
|
||||
try:
|
||||
infos = socket.getaddrinfo(trimmed, None, proto=socket.IPPROTO_TCP)
|
||||
except OSError as exc:
|
||||
return {"ok": False, "reason": f"DNS lookup failed: {exc}"}
|
||||
if not infos:
|
||||
return {"ok": False, "reason": "hostname has no A/AAAA records"}
|
||||
|
||||
resolved: list[str] = []
|
||||
for info in infos:
|
||||
addr = info[4][0]
|
||||
if _ip_blocked(addr):
|
||||
return {"ok": False, "reason": f"hostname resolves to reserved IP {addr}"}
|
||||
resolved.append(addr)
|
||||
return {"ok": True, "resolved": resolved}
|
||||
|
||||
|
||||
def safe_get(
|
||||
url: str,
|
||||
*,
|
||||
timeout: float = 8.0,
|
||||
headers: dict[str, str] | None = None,
|
||||
max_redirects: int = 3,
|
||||
) -> requests.Response:
|
||||
current = url
|
||||
for _ in range(max_redirects + 1):
|
||||
parsed = urlparse(current)
|
||||
if parsed.scheme not in ("http", "https"):
|
||||
raise ValueError(f"blocked protocol {parsed.scheme}")
|
||||
check = validate_host(parsed.hostname or "")
|
||||
if not check.get("ok"):
|
||||
raise ValueError(f"blocked target — {check.get('reason')}")
|
||||
res = requests.get(
|
||||
current,
|
||||
timeout=timeout,
|
||||
headers=headers or {},
|
||||
allow_redirects=False,
|
||||
)
|
||||
if 300 <= res.status_code < 400:
|
||||
loc = res.headers.get("location")
|
||||
if not loc:
|
||||
return res
|
||||
current = urljoin(current, loc)
|
||||
continue
|
||||
return res
|
||||
raise ValueError("too many redirects")
|
||||
|
||||
|
||||
def validate_domain(domain: str) -> bool:
|
||||
return bool(re.match(r"^[a-zA-Z0-9][a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$", domain or ""))
|
||||
@@ -77,3 +77,62 @@ def test_ingest_updates_existing_rows_in_persistent_data_dir(tmp_path, monkeypat
|
||||
assert len(cameras) == 1
|
||||
assert cameras[0]["media_url"] == "https://example.com/live.m3u8"
|
||||
assert cameras[0]["media_type"] == "hls"
|
||||
|
||||
|
||||
def test_scheduled_cctv_ingestors_include_asfinag_and_alpr():
|
||||
names = {ing.__class__.__name__ for ing, _ in cctv_pipeline.scheduled_cctv_ingestors()}
|
||||
assert "AsfinagIngestor" in names
|
||||
assert "OSMALPRCameraIngestor" in names
|
||||
assert "OSMTrafficCameraIngestor" in names
|
||||
assert "Ontario511Ingestor" in names
|
||||
assert "Alberta511Ingestor" in names
|
||||
assert "Florida511Ingestor" in names
|
||||
assert "AustraliaLiveTrafficIngestor" in names
|
||||
assert "NetherlandsRWSIngestor" in names
|
||||
assert len(names) == 21
|
||||
|
||||
|
||||
def test_fetch_traveliq_v2_cameras_parses_views(monkeypatch):
|
||||
class FakeResp:
|
||||
status_code = 200
|
||||
|
||||
@staticmethod
|
||||
def json():
|
||||
return [
|
||||
{
|
||||
"Id": 9,
|
||||
"Latitude": 45.0,
|
||||
"Longitude": -75.0,
|
||||
"Location": "Test Highway",
|
||||
"Views": [
|
||||
{
|
||||
"Id": 42,
|
||||
"Url": "/map/Cctv/42",
|
||||
"Status": "Enabled",
|
||||
"Description": "Northbound",
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
monkeypatch.setattr(cctv_pipeline, "fetch_with_curl", lambda *a, **k: FakeResp())
|
||||
cameras = cctv_pipeline._fetch_traveliq_v2_cameras(
|
||||
api_url="https://511on.ca/api/v2/get/cameras",
|
||||
base_url="https://511on.ca",
|
||||
id_prefix="ON511",
|
||||
source_agency="511 Ontario",
|
||||
)
|
||||
assert len(cameras) == 1
|
||||
assert cameras[0]["id"] == "ON511-9-42"
|
||||
assert cameras[0]["media_url"] == "https://511on.ca/map/Cctv/42"
|
||||
|
||||
|
||||
def test_ensure_https_upgrades_http_media_urls():
|
||||
assert (
|
||||
cctv_pipeline._ensure_https_url("http://example.com/camera.jpg")
|
||||
== "https://example.com/camera.jpg"
|
||||
)
|
||||
assert (
|
||||
cctv_pipeline._ensure_https_url("https://secure.example.com/live.m3u8")
|
||||
== "https://secure.example.com/live.m3u8"
|
||||
)
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
"""Datacenters load from static JSON regardless of layer toggle."""
|
||||
from services.fetchers import _store
|
||||
from services.fetchers.infrastructure import fetch_datacenters
|
||||
|
||||
|
||||
def test_fetch_datacenters_populates_store_when_layer_disabled(monkeypatch):
|
||||
monkeypatch.setitem(_store.active_layers, "datacenters", False)
|
||||
_store.latest_data["datacenters"] = []
|
||||
fetch_datacenters()
|
||||
assert len(_store.latest_data.get("datacenters") or []) > 0
|
||||
@@ -113,3 +113,52 @@ def test_fetch_fishing_activity_dedupes_to_latest_event_per_vessel(monkeypatch):
|
||||
assert latest_data["fishing_activity"][0]["vessel_ssvid"] == "ssvid-1"
|
||||
finally:
|
||||
latest_data["fishing_activity"] = original
|
||||
|
||||
|
||||
def test_fetch_fishing_activity_respects_max_pages(monkeypatch):
|
||||
from services.fetchers import geo
|
||||
from services.fetchers._store import latest_data
|
||||
|
||||
original = list(latest_data.get("fishing_activity") or [])
|
||||
requests: list[str] = []
|
||||
|
||||
def fake_fetch(url, timeout=30, headers=None):
|
||||
requests.append(url)
|
||||
offset = 0
|
||||
if "offset=500" in url:
|
||||
offset = 500
|
||||
payload = {
|
||||
"total": 5000,
|
||||
"entries": [
|
||||
{
|
||||
"id": f"evt-{offset + i}",
|
||||
"position": {"lat": 10.0 + i, "lon": 20.0 + i},
|
||||
"event": {"duration": 3600},
|
||||
"vessel": {
|
||||
"id": f"v-{offset + i}",
|
||||
"ssvid": f"ssvid-{offset + i}",
|
||||
"name": f"Vessel-{offset + i}",
|
||||
"flag": "US",
|
||||
},
|
||||
}
|
||||
for i in range(500)
|
||||
],
|
||||
"nextOffset": offset + 500,
|
||||
}
|
||||
return SimpleNamespace(status_code=200, json=lambda p=payload: p)
|
||||
|
||||
monkeypatch.setenv("GFW_API_TOKEN", "test-token")
|
||||
monkeypatch.setenv("GFW_EVENTS_PAGE_SIZE", "500")
|
||||
monkeypatch.setenv("GFW_EVENTS_MAX_PAGES", "2")
|
||||
monkeypatch.setattr("services.fetchers._store.is_any_active", lambda *args: True)
|
||||
monkeypatch.setattr(geo, "fetch_with_curl", fake_fetch)
|
||||
monkeypatch.setattr(geo, "_mark_fresh", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(geo, "_last_fishing_fetch_ts", 0.0)
|
||||
|
||||
try:
|
||||
geo.fetch_fishing_activity()
|
||||
assert len(latest_data["fishing_activity"]) == 1000
|
||||
assert len(requests) == 2
|
||||
assert all("offset=0" in url or "offset=500" in url for url in requests)
|
||||
finally:
|
||||
latest_data["fishing_activity"] = original
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
"""Tests for Osiris-ported security and sanctions modules."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from services.ssrf_guard import validate_host, validate_domain
|
||||
from services.sanctions.ofac import norm_name, search_sanctions
|
||||
|
||||
|
||||
def test_ssrf_blocks_localhost():
|
||||
result = validate_host("localhost")
|
||||
assert result["ok"] is False
|
||||
|
||||
|
||||
def test_ssrf_blocks_private_ip():
|
||||
result = validate_host("192.168.1.1")
|
||||
assert result["ok"] is False
|
||||
|
||||
|
||||
def test_ssrf_blocks_metadata_endpoint():
|
||||
result = validate_host("metadata.google.internal")
|
||||
assert result["ok"] is False
|
||||
|
||||
|
||||
def test_validate_domain_rejects_garbage():
|
||||
assert validate_domain("not a domain") is False
|
||||
assert validate_domain("example.com") is True
|
||||
|
||||
|
||||
def test_norm_name_strips_punctuation():
|
||||
assert norm_name("ACME, Inc.") == norm_name("acme inc")
|
||||
|
||||
|
||||
def test_search_sanctions_requires_min_length():
|
||||
assert search_sanctions("ab") == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("query", ["127.0.0.1", "10.0.0.1"])
|
||||
def test_sweep_init_rejects_private(query: str):
|
||||
from services.osint.lookups import sweep_init
|
||||
|
||||
with pytest.raises(ValueError, match="Private|reserved|Invalid"):
|
||||
sweep_init(query, 24)
|
||||
@@ -0,0 +1,13 @@
|
||||
from services.scm.suppliers import _seismic_risk_level
|
||||
|
||||
|
||||
def test_micro_quakes_ignored():
|
||||
assert _seismic_risk_level(10.0, 3.9) is None
|
||||
assert _seismic_risk_level(10.0, 4.4) is None
|
||||
|
||||
|
||||
def test_meaningful_quake_thresholds():
|
||||
assert _seismic_risk_level(30.0, 4.6) == "HIGH"
|
||||
assert _seismic_risk_level(80.0, 5.2) == "HIGH"
|
||||
assert _seismic_risk_level(50.0, 5.6) == "CRITICAL"
|
||||
assert _seismic_risk_level(150.0, 6.1) == "CRITICAL"
|
||||
@@ -0,0 +1,103 @@
|
||||
"""Telegram OSINT HTML parsing and geoparsing."""
|
||||
|
||||
from services.fetchers import telegram_osint
|
||||
|
||||
|
||||
SAMPLE_HTML = """
|
||||
<div class="tgme_widget_message_wrap js-widget_message_wrap">
|
||||
<div class="tgme_widget_message_text">Missile strike reported near Kyiv overnight.</div>
|
||||
<a class="tgme_widget_message_date" href="https://t.me/osintdefender/12345">
|
||||
<time datetime="2026-06-02T12:00:00+00:00"></time>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
SAMPLE_VIDEO_HTML = """
|
||||
<div class="tgme_widget_message_wrap js-widget_message_wrap">
|
||||
<div class="tgme_widget_message_text">Drone footage from Kharkiv.</div>
|
||||
<video src="https://cdn4.telesco.pe/file/sample.mp4?token=abc" class="tgme_widget_message_video js-message_video"></video>
|
||||
<a class="tgme_widget_message_date" href="https://t.me/osintdefender/99999">
|
||||
<time datetime="2026-06-02T13:00:00+00:00"></time>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
|
||||
def test_parse_telegram_channel_html_extracts_geolocated_post():
|
||||
posts = telegram_osint.parse_telegram_channel_html(SAMPLE_HTML, "osintdefender")
|
||||
assert len(posts) == 1
|
||||
post = posts[0]
|
||||
assert "Kyiv" in post["title"]
|
||||
assert post["coords"] == [50.45, 30.523]
|
||||
assert post["risk_score"] >= 3
|
||||
assert post["link"].startswith("https://t.me/")
|
||||
|
||||
|
||||
def test_resolve_telegram_coords_handles_cyrillic():
|
||||
coords = telegram_osint._resolve_telegram_coords("Обстріл біля Харкова")
|
||||
assert coords == (49.993, 36.231)
|
||||
|
||||
|
||||
def test_resolve_telegram_coords_uses_metro_anchors_for_country_tags():
|
||||
assert telegram_osint._resolve_telegram_coords("#Israel #Iran") == (32.085, 34.781)
|
||||
assert telegram_osint._resolve_telegram_coords("China announces policy") == (39.904, 116.407)
|
||||
assert telegram_osint._resolve_telegram_coords("#USA response") == (40.712, -74.006)
|
||||
|
||||
|
||||
def test_resolve_telegram_coords_keeps_specific_cities_over_country_anchor():
|
||||
assert telegram_osint._resolve_telegram_coords("Strike near Gaza") == (31.416, 34.333)
|
||||
assert telegram_osint._resolve_telegram_coords("Missile strike reported near Kyiv overnight") == (
|
||||
50.45,
|
||||
30.523,
|
||||
)
|
||||
|
||||
|
||||
def test_parse_telegram_channel_html_extracts_video_media():
|
||||
posts = telegram_osint.parse_telegram_channel_html(SAMPLE_VIDEO_HTML, "osintdefender")
|
||||
assert len(posts) == 1
|
||||
post = posts[0]
|
||||
assert post["media_type"] == "video"
|
||||
assert post["media_url"].startswith("https://cdn4.telesco.pe/")
|
||||
assert post["embed_url"] == "https://t.me/osintdefender/99999?embed=1"
|
||||
|
||||
|
||||
def test_telegram_media_host_allowed():
|
||||
assert telegram_osint.telegram_media_host_allowed("cdn4.telesco.pe")
|
||||
assert telegram_osint.telegram_media_host_allowed("cdn4.telegram-cdn.org")
|
||||
assert not telegram_osint.telegram_media_host_allowed("evil.example.com")
|
||||
|
||||
|
||||
def test_extract_new_channel_posts_stops_at_known_links():
|
||||
known = {"https://t.me/osintdefender/12345"}
|
||||
fresh = telegram_osint._extract_new_channel_posts(SAMPLE_HTML, "osintdefender", known)
|
||||
assert fresh == []
|
||||
|
||||
|
||||
def test_merge_telegram_posts_keeps_existing_and_adds_only_new():
|
||||
existing = [
|
||||
{
|
||||
"id": "old",
|
||||
"link": "https://t.me/osintdefender/111",
|
||||
"published": "2026-06-01T12:00:00+00:00",
|
||||
}
|
||||
]
|
||||
incoming = [
|
||||
{
|
||||
"id": "dup",
|
||||
"link": "https://t.me/osintdefender/111",
|
||||
"published": "2026-06-02T12:00:00+00:00",
|
||||
},
|
||||
{
|
||||
"id": "new",
|
||||
"link": "https://t.me/osintdefender/222",
|
||||
"published": "2026-06-03T12:00:00+00:00",
|
||||
},
|
||||
]
|
||||
merged, added = telegram_osint._merge_telegram_posts(existing, incoming)
|
||||
assert added == 1
|
||||
assert len(merged) == 2
|
||||
assert merged[0]["link"] == "https://t.me/osintdefender/222"
|
||||
Vendored
+14
@@ -0,0 +1,14 @@
|
||||
# Osiris-derived components — third-party notice
|
||||
|
||||
Portions of the recon toolkit, sanctions index, SCM overlay, entity graph,
|
||||
malware feeds, and related UI were adapted from:
|
||||
|
||||
- **OSIRIS** — MIT License — Copyright (c) 2026 simplifaisoul
|
||||
https://github.com/simplifaisoul/osiris
|
||||
|
||||
Additional data attribution:
|
||||
|
||||
- **OpenSanctions** `us_ofac_sdn` dataset — CC-BY 4.0
|
||||
https://www.opensanctions.org/
|
||||
- **TeleGeography** submarine cable map data (static GeoJSON)
|
||||
- **abuse.ch** Feodo Tracker / URLhaus (malware feeds)
|
||||
Reference in New Issue
Block a user