mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-09 07:43:59 +02:00
Compare commits
29 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 00f9e3f1fd | |||
| ffdfe0426b | |||
| 1583fd5715 | |||
| af9b3d08cc | |||
| b64b9e0962 | |||
| 76f4deb3a7 | |||
| 49d90eaf69 | |||
| 079ff7b737 | |||
| bd81a940ff | |||
| 9a0a9a116a | |||
| 80a01275ff | |||
| 3ac8442e4b | |||
| 5f322b0a79 | |||
| 363b5a49c8 | |||
| a3e5c98cd0 | |||
| 6a098e1c5f | |||
| f08781bdc9 | |||
| c3dd95f6a9 | |||
| 10a8c7b5be | |||
| f03ebbba11 | |||
| a16f22ed34 | |||
| 41e35e4da2 | |||
| be3ab5823a | |||
| ef52bd03d2 | |||
| 017f383096 | |||
| 41799f9891 | |||
| a1af9c3595 | |||
| c8a8fc56f8 | |||
| e6aba86ce1 |
+38
-2
@@ -10,6 +10,23 @@ OPENSKY_CLIENT_ID=
|
||||
OPENSKY_CLIENT_SECRET=
|
||||
AIS_API_KEY=
|
||||
|
||||
# Global Fishing Watch — fishing vessel activity events (Fishing Activity map layer).
|
||||
# Free API token from https://globalfishingwatch.org/our-apis/tokens
|
||||
# Without this the fishing_activity layer stays empty.
|
||||
# GFW_API_TOKEN=
|
||||
# Optional tuning — GFW can return 40k+ global events; defaults cap fetch for map paint.
|
||||
# GFW_EVENTS_PAGE_SIZE=500
|
||||
# GFW_EVENTS_MAX_PAGES=10
|
||||
# GFW_EVENTS_LOOKBACK_DAYS=7
|
||||
# GFW_EVENTS_TIMEOUT_S=90
|
||||
|
||||
# Windy Webcams global CCTV layer — free key from https://api.windy.com/webcams/docs
|
||||
# WINDY_API_KEY=
|
||||
|
||||
# Telegram OSINT map layer — scrapes public t.me/s channel previews (no bot token).
|
||||
# TELEGRAM_OSINT_ENABLED=true
|
||||
# TELEGRAM_OSINT_CHANNELS=osintdefender,insiderpaper,aljazeeraenglish,nexta_live,war_monitor
|
||||
|
||||
# Admin key to protect sensitive endpoints (settings, updates).
|
||||
# If blank, loopback/localhost requests still work for local single-host dev.
|
||||
# Remote/non-loopback admin access requires ADMIN_KEY, or ALLOW_INSECURE_ADMIN=true in debug-only setups.
|
||||
@@ -39,8 +56,8 @@ ADMIN_KEY=
|
||||
# NUFORC_MAPBOX_TOKEN=
|
||||
|
||||
# Optional startup-risk controls.
|
||||
# On Windows, external curl fallback and the Playwright LiveUAMap scraper are
|
||||
# disabled by default so blocked upstream feeds cannot interrupt start.bat.
|
||||
# On Windows, external curl fallback is off by default. LiveUAMap uses UI consent
|
||||
# when you enable Global Incidents (or set SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=true).
|
||||
# SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=false
|
||||
# SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=false
|
||||
# AIS starts by default when AIS_API_KEY is set. Set to 0/false to force-disable.
|
||||
@@ -77,6 +94,19 @@ ADMIN_KEY=
|
||||
# pip install earthengine-api
|
||||
# GEE_SERVICE_ACCOUNT_KEY=
|
||||
|
||||
# Copernicus CDSE — Sentinel-2 imagery (Settings → Imagery, or backend .env).
|
||||
# Free OAuth app at https://dataspace.copernicus.eu/
|
||||
# SENTINEL_CLIENT_ID=
|
||||
# SENTINEL_CLIENT_SECRET=
|
||||
|
||||
# Sentinel-2 road corridor freight trends (DrishX engine port — opt-in slow layer).
|
||||
# pip install -e backend[road-corridor] (or uv sync --extra road-corridor)
|
||||
# ROAD_CORRIDOR_SAT_ENABLED=false
|
||||
# ROAD_CORRIDOR_SCHEDULED_PRESETS=laredo_i35
|
||||
# ROAD_CORRIDOR_MONTHS=2
|
||||
# ROAD_CORRIDOR_MAX_FRAMES=6
|
||||
# ROAD_CORRIDOR_REFRESH_HOURS=24
|
||||
|
||||
# Override the backend URL the frontend uses (leave blank for auto-detect)
|
||||
# NEXT_PUBLIC_API_URL=http://192.168.1.50:8000
|
||||
|
||||
@@ -128,8 +158,14 @@ ADMIN_KEY=
|
||||
# MESH_DM_ROOT_TRANSPARENCY_LEDGER_READBACK_URI=backend/../ops/root_transparency_ledger.json
|
||||
|
||||
# ── Self Update ────────────────────────────────────────────────
|
||||
# Optional ZIP updater digest pin. The updater checks this first, then
|
||||
# backend/data/release_digests.json, then the release SHA256SUMS.txt asset.
|
||||
# MESH_UPDATE_SHA256=
|
||||
|
||||
# Optional strict nonce-only frontend CSP. Leave unset unless the exact build
|
||||
# has been verified to hydrate cleanly in your deployment.
|
||||
# SHADOWBROKER_STRICT_CSP=1
|
||||
|
||||
# ── Wormhole (Local Agent) ─────────────────────────────────────
|
||||
# WORMHOLE_URL=http://127.0.0.1:8787
|
||||
# WORMHOLE_TRANSPORT=direct
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
## Summary
|
||||
|
||||
<!-- What changed and why (1–3 bullets). -->
|
||||
|
||||
## Test plan
|
||||
|
||||
- [ ] <!-- How you verified the change -->
|
||||
|
||||
## Production hardening (data path / fetchers / unattended deploys only)
|
||||
|
||||
If this PR touches the data path, fetchers, or live-data APIs, walk through [docs/production-hardening.md](https://github.com/BigBodyCobain/Shadowbroker/blob/main/docs/production-hardening.md) and note any N/A items here.
|
||||
|
||||
- [ ] Checklist reviewed (or N/A — explain why)
|
||||
@@ -109,6 +109,9 @@ backend/data/*
|
||||
# release. Used ONLY on first-ever startup to bootstrap carrier_cache.json;
|
||||
# after that the cache reflects this install's own GDELT observations.
|
||||
!backend/data/carrier_seed.json
|
||||
# DrishX RF model weights (MIT — see backend/third_party/drishx/NOTICE.md)
|
||||
!backend/data/drishx/
|
||||
!backend/data/drishx/rf_model.pickle
|
||||
|
||||
# OS generated files
|
||||
.DS_Store
|
||||
@@ -198,6 +201,8 @@ graphify-out/
|
||||
# Internal docs & brainstorming (never commit)
|
||||
# ========================
|
||||
docs/*
|
||||
!docs/OUTBOUND_DATA.md
|
||||
!docs/production-hardening.md
|
||||
!docs/mesh/
|
||||
docs/mesh/*
|
||||
!docs/mesh/threat-model.md
|
||||
|
||||
+42
-12
@@ -13,13 +13,22 @@
|
||||
# 2. Reverse-mirrors main back to GitHub (only if commits land directly
|
||||
# on GitLab) so the two sources stay in sync.
|
||||
#
|
||||
# Pipelines on this repo were instant-failing for free-tier accounts until
|
||||
# identity verification was added — the May 2026 bump in this comment is
|
||||
# the marker commit that confirms runner allocation after verification.
|
||||
#
|
||||
# Auth notes:
|
||||
# - The image build/push uses $CI_JOB_TOKEN, which GitLab provides
|
||||
# automatically. No credentials need to be configured.
|
||||
# - The reverse mirror requires a GitHub personal access token stored
|
||||
# as the GitLab CI/CD variable GITHUB_MIRROR_TOKEN (Protected + Masked).
|
||||
# Scope: public_repo (or repo for private). If the variable isn't
|
||||
# set the mirror job is skipped — image builds still run.
|
||||
# - The reverse mirror authenticates to GitHub via a per-repo SSH
|
||||
# deploy key. The private half is stored as the File-type GitLab
|
||||
# CI/CD variable GITHUB_MIRROR_SSH_KEY (Protected). The matching
|
||||
# public key is added to github.com/BigBodyCobain/Shadowbroker/
|
||||
# settings/keys with write access. This is a tighter-scoped
|
||||
# replacement for a personal access token: it can ONLY push to
|
||||
# Shadowbroker, never expires, and rotating it is a one-click
|
||||
# delete on GitHub's deploy-keys page. If the variable isn't set,
|
||||
# the mirror job is skipped — image builds still run.
|
||||
|
||||
stages:
|
||||
- build
|
||||
@@ -48,7 +57,11 @@ variables:
|
||||
- docker info
|
||||
- docker login -u "$CI_REGISTRY_USER" -p "$CI_JOB_TOKEN" "$CI_REGISTRY"
|
||||
- docker run --privileged --rm tonistiigi/binfmt --install all
|
||||
- docker buildx create --use --name multiarch --driver docker-container
|
||||
# buildx --driver docker-container can't read TLS from the env vars
|
||||
# the GitLab dind service exports. Wrap them in a docker context and
|
||||
# bind buildx to it. See https://docs.gitlab.com/ee/ci/docker/using_docker_build.html#use-docker-buildx
|
||||
- docker context create tls-env
|
||||
- docker buildx create --use --name multiarch --driver docker-container tls-env
|
||||
|
||||
# ── Backend image ────────────────────────────────────────────────────────
|
||||
build-backend:
|
||||
@@ -93,18 +106,35 @@ build-frontend:
|
||||
- .gitlab-ci.yml
|
||||
|
||||
# ── Reverse mirror to GitHub ─────────────────────────────────────────────
|
||||
# Pushes refs/heads/main to github.com/BigBodyCobain/Shadowbroker.
|
||||
# Fast-forward-only — if GitLab main and GitHub main have diverged, this
|
||||
# fails loudly rather than silently overwriting either side.
|
||||
# Pushes refs/heads/main to github.com/BigBodyCobain/Shadowbroker via SSH
|
||||
# using a per-repo deploy key. Fast-forward-only by default — if GitLab
|
||||
# main and GitHub main have diverged, the push fails loudly rather than
|
||||
# silently overwriting either side.
|
||||
#
|
||||
# Only runs if GITHUB_MIRROR_TOKEN is set as a CI/CD variable. See the
|
||||
# header comment of this file for setup instructions.
|
||||
# Only runs if GITHUB_MIRROR_SSH_KEY is set as a File-type CI/CD variable.
|
||||
# See the header comment of this file for setup instructions.
|
||||
mirror-to-github:
|
||||
stage: mirror
|
||||
image: alpine:3.20
|
||||
needs: []
|
||||
before_script:
|
||||
- apk add --no-cache git openssh-client ca-certificates
|
||||
- mkdir -p ~/.ssh
|
||||
- chmod 700 ~/.ssh
|
||||
# Install the deploy key. File-type CI variable exposes the path; copy
|
||||
# to ~/.ssh/id_ed25519 with restrictive perms so ssh accepts it.
|
||||
- cp "$GITHUB_MIRROR_SSH_KEY" ~/.ssh/id_ed25519
|
||||
- chmod 600 ~/.ssh/id_ed25519
|
||||
# Pin github.com's current host keys so we never trust a man-in-the-
|
||||
# middle. Sourced from https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/githubs-ssh-key-fingerprints
|
||||
# (rotated 2023-03-24 after the previous RSA key leak).
|
||||
- |
|
||||
cat > ~/.ssh/known_hosts <<'EOF'
|
||||
github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl
|
||||
github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=
|
||||
github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=
|
||||
EOF
|
||||
- chmod 644 ~/.ssh/known_hosts
|
||||
script:
|
||||
- git config --global user.email "ci-mirror@gitlab.com"
|
||||
- git config --global user.name "GitLab CI Mirror"
|
||||
@@ -115,7 +145,7 @@ mirror-to-github:
|
||||
- cd repo
|
||||
- >
|
||||
git push
|
||||
"https://x-access-token:${GITHUB_MIRROR_TOKEN}@github.com/BigBodyCobain/Shadowbroker.git"
|
||||
"git@github.com:BigBodyCobain/Shadowbroker.git"
|
||||
"${CI_COMMIT_SHA}:refs/heads/main"
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main" && $GITHUB_MIRROR_TOKEN
|
||||
- if: $CI_COMMIT_BRANCH == "main" && $GITHUB_MIRROR_SSH_KEY
|
||||
|
||||
+2
-1
@@ -44,7 +44,8 @@ These sources have their own terms; consult each link before redistributing.
|
||||
| aisstream.io | https://aisstream.io | Free-tier API terms (attribution required) | AIS vessel positions |
|
||||
| Global Fishing Watch | https://globalfishingwatch.org | CC BY 4.0 (for public data) | Fishing activity events |
|
||||
| Microsoft Planetary Computer | https://planetarycomputer.microsoft.com | Sentinel-2 / ESA Copernicus terms | Sentinel-2 imagery |
|
||||
| Copernicus CDSE (Sentinel Hub) | https://dataspace.copernicus.eu | ESA Copernicus open data terms | SAR + optical imagery |
|
||||
| Copernicus CDSE (Sentinel Hub) | https://dataspace.copernicus.eu | ESA Copernicus open data terms | SAR + optical imagery, optional road-corridor truck trends |
|
||||
| DrishX / Fisser et al. 2022 | https://github.com/sparkyniner/DRISH-X-Satellite-powered-freight-intelligence- | MIT (engine); research methodology attribution | Sentinel-2 motion-smear truck detection on major roads (opt-in) |
|
||||
| Shodan | https://www.shodan.io | Operator-supplied API key, Shodan ToS | Internet device search |
|
||||
| Smithsonian GVP | https://volcano.si.edu | Attribution required | Volcanoes |
|
||||
| OpenAQ | https://openaq.org | CC BY 4.0 | Air quality stations |
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
|
||||
**ShadowBroker** is a decentralized intelligence platform that aggregates real-time, multi-domain OSINT telemetry from 60+ live intelligence feeds into a single dark-ops map interface. Aircraft, ships, satellites, conflict zones, CCTV networks, GPS jamming, internet-connected devices, police scanners, mesh radio nodes, and breaking geopolitical events — all updating in real time on one screen as well as an obfuscated communications protocol and information exchange infrastructure.
|
||||
|
||||
Built with **Next.js**, **MapLibre GL**, **FastAPI**, and **Python**. 35+ toggleable data layers, including SAR ground-change detection. Multiple visual modes (DEFAULT / SATELLITE / FLIR / NVG / CRT). Right-click any point on Earth for a country dossier, head-of-state lookup, and the latest Sentinel-2 satellite photo. No user data is collected or transmitted — the dashboard runs entirely in your browser against a self-hosted backend.
|
||||
Built with **Next.js**, **MapLibre GL**, **FastAPI**, and **Python**. 40+ toggleable data layers, including SAR ground-change detection, **Telegram OSINT** (public channel previews geoparsed onto the map), a **server-side recon toolkit** (DNS, WHOIS, sanctions, BGP, IP sweep, and more), supply-chain risk overlays, and malware/C2 + CISA KEV cyber threat feeds. Multiple visual modes (DEFAULT / SATELLITE / FLIR / NVG / CRT). Right-click any point on Earth for a country dossier, head-of-state lookup, entity-graph expansion, and the latest Sentinel-2 satellite photo. ShadowBroker has no accounts, product telemetry, or analytics; the dashboard talks to your self-hosted backend. Sensitive recon and Shodan queries never hit third-party APIs from the browser — they are proxied through the backend with SSRF guards and local-operator auth. The **OpenClaw / agent command channel** exposes the same recon backends plus full telemetry search — no separate API integration required.
|
||||
|
||||
Designed for analysts, researchers, radio operators, and anyone who wants to see what the world looks like when every public signal is on the same map.
|
||||
|
||||
@@ -28,18 +28,20 @@ Designed for analysts, researchers, radio operators, and anyone who wants to see
|
||||
|
||||
A surprising amount of global telemetry is already public — aircraft ADS-B broadcasts, maritime AIS signals, satellite orbital data, earthquake sensors, mesh radio networks, police scanner feeds, environmental monitoring stations, internet infrastructure telemetry, and more. This data is scattered across dozens of tools and APIs. ShadowBroker combines all of it into a single interface.
|
||||
|
||||
The project does not introduce new surveillance capabilities — it aggregates and visualizes existing public datasets. It is fully open-source so anyone can audit exactly what data is accessed and how. No user data is collected or transmitted — everything runs locally against a self-hosted backend. No telemetry, no analytics, no accounts.
|
||||
The project does not introduce new surveillance capabilities — it aggregates and visualizes existing public datasets. It is fully open-source so anyone can audit exactly what data is accessed and how. ShadowBroker does not include product telemetry, analytics, or accounts. Operator-supplied keys stay in your local deployment, but live OSINT features necessarily make outbound requests to the public data providers you enable or query.
|
||||
|
||||
### Shodan Connector
|
||||
### Shodan & Recon (security-first)
|
||||
|
||||
ShadowBroker includes an optional Shodan connector for operator-supplied API access. Shodan results are fetched with your own `SHODAN_API_KEY`, rendered as a local investigative overlay (not merged into core feeds), and remain subject to Shodan’s terms of service.
|
||||
ShadowBroker includes an optional **Shodan connector** for operator-supplied API access (`SHODAN_API_KEY`) and a **Recon Toolkit** panel for keyless OSINT lookups. Both run **server-side only**: the browser calls your self-hosted `/api/osint/*` and `/api/tools/shodan/*` routes; outbound requests are made by the backend after SSRF validation. Recon requires **local-operator** access (same trust model as layer toggles and admin routes). Shodan results render as a separate map overlay and remain subject to Shodan’s terms of service.
|
||||
|
||||
> **Not included:** embedded live-news YouTube grids or a built-in Gemini AI analyst panel — use the **OpenClaw / agent channel** for AI-assisted analysis instead.
|
||||
|
||||
---
|
||||
|
||||
## Interesting Use Cases
|
||||
|
||||
* **Track Air Force One**, the private jets of billionaires and dictators, and every military tanker, ISR, and fighter broadcasting ADS-B. Air Force One and all of the accompanying Presidential/Vice Presidential planes are highlighted and monitored from the moment they leave the ground.
|
||||
* **Connect an AI agent as a co-analyst** through ShadowBroker's HMAC-signed agentic command channel — supports OpenClaw and any other agent that speaks the protocol (Claude, GPT, LangChain, custom). The agent gets full read/write access to all 35+ data layers, pin placement, map control, SAR ground-change, mesh networking, and alert delivery. It sees everything the operator sees and can take actions on the map in real time.
|
||||
* **Connect an AI agent as a co-analyst** through ShadowBroker's HMAC-signed agentic command channel — supports OpenClaw and any other agent that speaks the protocol (Claude, GPT, LangChain, custom). The agent gets full read/write access to all 40+ data layers, compact cross-layer search (`search_telemetry`, `search_news`), the full recon toolkit (`osint_lookup` for IP/DNS/WHOIS/sanctions/CVE/etc.), entity-graph expansion, pin placement, map control, SAR ground-change, mesh networking, and alert delivery. It sees everything the operator sees and can take actions on the map in real time.
|
||||
* **Communicate on the InfoNet testnet** — The first decentralized intelligence mesh built into an OSINT tool. Obfuscated messaging with gate personas, Dead Drop peer-to-peer exchange, and a built-in terminal CLI. No accounts, no signup. Privacy is not guaranteed yet — this is an experimental testnet — but the protocol is live and being hardened.
|
||||
* **Right-click anywhere on Earth** for a country dossier (head of state, population, languages), Wikipedia summary, and the latest Sentinel-2 satellite photo at 10m resolution
|
||||
* **Click a KiwiSDR node** and tune into live shortwave radio directly in the dashboard. Click a police scanner feed and eavesdrop in one click.
|
||||
@@ -55,6 +57,12 @@ ShadowBroker includes an optional Shodan connector for operator-supplied API acc
|
||||
* **Track trains** across the US (Amtrak) and Europe (DigiTraffic) in real time
|
||||
* **Estimate where US aircraft carriers are** using automated GDELT news scraping — no other open tool does this
|
||||
* **Search internet-connected devices worldwide** via Shodan — cameras, SCADA systems, databases — plotted as a live overlay on the map
|
||||
* **Run a full recon toolkit** from the left sidebar — IP geolocation, DNS, RDAP/WHOIS, certificate transparency, BGP/ASN, OFAC sanctions search, CVE lookup, Tor/OTX threat checks, and subnet sweeps (InternetDB proxied server-side)
|
||||
* **Expand an entity graph** when you select an aircraft, vessel, company, or IP — Wikidata + OFAC + live store cross-links rendered in the Entity Graph panel
|
||||
* **Monitor supply-chain risk** — Tier 1/2 semiconductor and battery fabs scored against nearby earthquakes, wildfires, and conflict events (SCM panel)
|
||||
* **Toggle malware C2 hotspots** — abuse.ch Feodo Tracker + URLhaus feeds mapped by country (opt-in layer)
|
||||
* **Monitor Telegram OSINT channels** — public `t.me/s` war/conflict feeds (OSINTdefender, NEXTA, etc.) scraped hourly, risk-scored, geoparsed to metro anchors, and plotted as clickable map pins with inline media
|
||||
* **Overlay global submarine cables** — static TeleGeography-derived cable routes (opt-in layer)
|
||||
|
||||
|
||||
---
|
||||
@@ -113,6 +121,20 @@ That's it. `pull` grabs the latest images, `up -d` restarts the containers.
|
||||
>
|
||||
> Podman users should run the equivalent provider command, for example `podman-compose pull` and `podman-compose up -d`, or use `./compose.sh --engine podman pull` and `./compose.sh --engine podman up -d` from a bash-compatible shell.
|
||||
|
||||
### Update Integrity
|
||||
|
||||
Docker updates are delivered through signed container registries. The legacy ZIP self-updater verifies release archives through this chain, in order:
|
||||
|
||||
* `MESH_UPDATE_SHA256` when an operator pins a digest explicitly.
|
||||
* `backend/data/release_digests.json` for bundled release pins.
|
||||
* The release `SHA256SUMS.txt` asset on GitHub when a bundled pin is not present.
|
||||
|
||||
Release maintainers should run `python backend/scripts/release_helper.py hash <ShadowBroker_vX.Y.Z.zip>` before publishing, then publish `SHA256SUMS.txt` and update `backend/data/release_digests.json` when shipping a ZIP updater target. The updater keeps the operator override path intact instead of failing closed on missing bundled digests, so existing installs do not get stranded by a release-process mistake.
|
||||
|
||||
### CSP Hardening
|
||||
|
||||
The production frontend ships with a hydration-compatible CSP and a strict nonce-only CSP in `Content-Security-Policy-Report-Only`. Set `SHADOWBROKER_STRICT_CSP=1` only after verifying the exact build hydrates correctly in your deployment. Runtime Google Fonts are not required; the bundled Next font pipeline serves the dashboard font from the app build.
|
||||
|
||||
### ⚠️ **Stuck on the old version?**
|
||||
|
||||
**If `git pull` fails or `docker compose up` keeps building from source instead of pulling images**, your clone predates a March 2026 repository migration that rewrote commit history. A normal `git pull` cannot fix this. Run:
|
||||
@@ -219,17 +241,34 @@ The first decentralized intelligence communication and governance layer built di
|
||||
|
||||
**Privacy primitive runway (NEW in v0.9.7):**
|
||||
|
||||
* **Function Keys — Anonymous Citizenship Proof** — A citizen proves "I am an Infonet citizen" without revealing their Infonet identity. 5 of 6 pieces shipped: nullifiers, challenge-response, two-phase commit receipts, enumerated denial codes, batched settlement. Issuance via blind signatures waits on a primitive decision (RSA blind sigs vs BBS+ vs U-Prove vs Idemix).
|
||||
* **Function Keys — Anonymous Credential Scaffolding** — The plumbing is in place for nullifiers, challenge-response, two-phase commit receipts, enumerated denial codes, and batched settlement. Today's challenge-response is an HMAC-based placeholder for integration testing, not a production anonymous or zero-knowledge citizenship proof. True unlinkable issuance still waits on a primitive decision (RSA blind sigs vs BBS+ vs U-Prove vs Idemix).
|
||||
* **Locked Protocol Contracts** — Stable interfaces in `services/infonet/privacy/contracts.py` for ring signatures, stealth addresses, Pedersen commitments, range proofs, and DEX matching. The `privacy-core` Rust crate is the integration target — no caller of the privacy module needs to know which scheme is active.
|
||||
* **Sprint 11+ Path** — When the cryptographic scheme is chosen, primitives wire into the locked Protocols without API churn.
|
||||
|
||||
> **Experimental Testnet — No Privacy Guarantee:** InfoNet messages are obfuscated but NOT end-to-end encrypted. The Mesh network (Meshtastic/APRS) is NOT private — radio transmissions are inherently public. The privacy primitive contracts are scaffolded but not yet wired. Do not send anything sensitive on any channel. Treat all channels as open and public for now.
|
||||
|
||||
### 🔍 Shodan Device Search (NEW in v0.9.6)
|
||||
### 🔍 Recon Toolkit & Shodan (Osiris-derived, security-first)
|
||||
|
||||
* **Internet Device Search** — Query Shodan directly from ShadowBroker. Search by keyword, CVE, port, or service — results plotted as a live overlay on the map
|
||||
Adapted from the [OSIRIS](https://github.com/simplifaisoul/osiris) recon stack (MIT) with ShadowBroker’s proxy model. Attribution: `backend/third_party/osiris/NOTICE.md`.
|
||||
|
||||
**Recon Toolkit** (left sidebar — local operator only):
|
||||
|
||||
* **IP / DNS / WHOIS** — ip-api.com geolocation, Google DNS-over-HTTPS, RDAP registrant data with optional HTTP security header scoring
|
||||
* **Certificates & BGP** — crt.sh subdomain discovery, bgpview.io ASN/prefix lookups
|
||||
* **Threat intel** — AlienVault OTX pulses, Tor exit-node checks, optional per-IP/domain reputation
|
||||
* **Sanctions** — OpenSanctions `us_ofac_sdn` index (CC-BY); cross-checks on WHOIS entities and IP ISP/org strings
|
||||
* **CVE / MAC / GitHub / leaks** — MITRE CVE API, MAC vendor lookup, GitHub profile recon, public breach checks
|
||||
* **IP sweep** — `/api/osint/sweep/scan` geolocates a target /24–/32 and proxies Shodan InternetDB host discovery server-side (browser never contacts InternetDB directly)
|
||||
* **SSRF guard** — Private, loopback, link-local, and metadata hostnames are blocked before any user-supplied fetch
|
||||
|
||||
**Entity graph** — Select any map entity to open the Entity Graph panel (`GET /api/entity/expand`). Resolves aircraft, vessels, companies, persons, IPs, and countries into a node/link graph (Wikidata SPARQL + OFAC + in-memory flight/ship store).
|
||||
|
||||
**OpenClaw / agent access** — The same recon backends are available on the HMAC command channel (no browser local-operator gate): `osint_lookup` (passive IP/DNS/WHOIS/certs/BGP/sanctions/CVE/MAC/GitHub/leaks/threats), `entity_expand` (relationship graph), and `osint_sweep` (active subnet scan — **full** access tier only). Call `osint_tools` to list supported lookup types. Skill package: `openclaw-skills/shadowbroker/` (`SKILL.md` + `sb_query.py`).
|
||||
|
||||
**Shodan overlay** (unchanged):
|
||||
|
||||
* **Internet Device Search** — Query Shodan with your own API key; results plotted as a live overlay
|
||||
* **Configurable Markers** — Shape, color, and size customization for Shodan results
|
||||
* **Operator-Supplied API** — Uses your own `SHODAN_API_KEY`; results rendered as a local investigative overlay
|
||||
|
||||
### 🛩️ Aviation Tracking
|
||||
|
||||
@@ -317,11 +356,12 @@ The first decentralized intelligence communication and governance layer built di
|
||||
|
||||
### 📷 Surveillance
|
||||
|
||||
* **CCTV Mesh** — 11,000+ live traffic cameras from 13 sources across 6 countries:
|
||||
* **CCTV Mesh** — 22,000+ live traffic cameras from 21 ingestors across 10 countries (US, UK, Canada, Australia, Austria, Spain, Singapore, Netherlands when NDW feed is up, plus OSM):
|
||||
* 🇬🇧 Transport for London JamCams
|
||||
* 🇺🇸 NYC DOT, Austin TX (TxDOT)
|
||||
* 🇺🇸 California (12 Caltrans districts), Washington State (WSDOT), Georgia DOT, Illinois DOT, Michigan DOT
|
||||
* 🇪🇸 Spain DGT National (20 cities), Madrid City (357 cameras via KML)
|
||||
* 🇦🇹 Austria ASFINAG motorway webcams
|
||||
* 🇸🇬 Singapore LTA
|
||||
* 🌍 Windy Webcams
|
||||
* **Feed Rendering** — Automatic detection & rendering of video, MJPEG, HLS, embed, satellite tile, and image feeds
|
||||
@@ -342,6 +382,12 @@ The first decentralized intelligence communication and governance layer built di
|
||||
* **Data Center Mapping** — 2,000+ global data centers plotted from a curated dataset. Clustered purple markers with server-rack icons. Click for operator, location, and automatic internet outage cross-referencing by country.
|
||||
* **Military Bases** — Global military installation and missile facility database (NEW)
|
||||
* **Power Plants** — 35,000+ global power plants from the WRI database (NEW)
|
||||
* **Submarine Cables** — Global undersea cable routes from static TeleGeography-derived GeoJSON (`frontend/public/data/submarine-cables.json`). Opt-in line overlay.
|
||||
* **Malware C2 Layer** — Botnet C2 servers (Feodo Tracker) and recent malware URLs (URLhaus) from abuse.ch, refreshed on the slow tier when the layer is enabled.
|
||||
* **SCM Supplier Risk** — Tier 1/2 fabs and battery plants (TSMC, Samsung, CATL, etc.) cross-referenced against earthquakes, FIRMS fires, and GDELT conflict proximity. Alerts in the SCM panel; optional map layer.
|
||||
* **Cyber Threats Feed** — Recent CISA Known Exploited Vulnerabilities (KEV) entries exposed via `/api/cyber-threats` and the layer toggle.
|
||||
* **Country Risk Index** — Static geopolitical risk scores with USGS earthquake enrichment via `/api/country-risk`.
|
||||
* **Telegram OSINT** — Public channel web previews (`t.me/s/*`) from configurable war/OSINT feeds. Hourly incremental merge (no redundant re-scrape), keyword risk scoring, Cyrillic/Arabic place aliases, metro-anchor geocoding (separate from news centroids), inline photo/video via `/api/telegram/media` proxy. Layer key: `telegram_osint`.
|
||||
|
||||
### 🌐 Additional Layers & Tools
|
||||
|
||||
@@ -367,7 +413,9 @@ v0.9.7 turns ShadowBroker from a dashboard a human watches into an intelligence
|
||||
|
||||
**Capabilities:**
|
||||
|
||||
* **Full Telemetry Access** — The agent queries all 35+ data layers: flights, ships, satellites, SIGINT, conflict events, earthquakes, fires, wastewater, prediction markets, and more. Fast and slow tier endpoints return enriched data with geographic coordinates, timestamps, and source attribution.
|
||||
* **Full Telemetry Access** — The agent queries all 40+ data layers: flights, ships, satellites, SIGINT, conflict events, earthquakes, fires, wastewater, **Telegram OSINT**, malware/C2, **CISA KEV cyber threats**, SCM overlays, fishing activity (GFW), prediction markets, and more. Fast and slow tier endpoints return enriched data with geographic coordinates, timestamps, and source attribution.
|
||||
* **Compact Search (preferred over full dumps)** — `get_summary` → `get_layer_slice` with per-layer `since_layer_versions` (SSE `layer_changed` push tells the agent exactly which layers updated). `search_telemetry` is the Google-style cross-layer keyword index. `search_news` covers news, GDELT, CrowdThreat, LiveUAMap, frontlines, and Telegram posts. `entities_near`, `brief_area`, `find_flights`/`find_ships`/`find_entity`, and `correlate_entity` answer targeted questions without multi-megabyte pulls.
|
||||
* **Recon Toolkit on the Channel** — `osint_lookup` runs the same SSRF-guarded backends as the Recon panel (`ip`, `dns`, `whois`, `certs`, `bgp`, `sanctions`, `cve`, `mac`, `github`, `leaks`, `threats`, `sweep_init`). `entity_expand` builds Wikidata + OFAC relationship graphs. `osint_sweep` runs Shodan InternetDB subnet discovery (**full** tier). Layer aliases: `telegram`, `malware`/`botnet`, `cyber`/`cisa`/`kev`, `scm`/`suppliers`, `gfw`/`fishing`.
|
||||
* **AI Intel Pins** — Place color-coded investigation markers directly on the operator's map. 14 pin categories (threat, anomaly, military, maritime, aviation, SIGINT, infrastructure, etc.) with confidence scores, TTL expiry, source URLs, and batch placement up to 100 pins at once.
|
||||
* **Map Control** — Fly the operator's map view to any coordinate, trigger satellite imagery lookups, and open region dossiers. The agent can direct the operator's attention to specific locations in real time.
|
||||
* **SAR Ground-Change** — Query SAR anomaly feeds, inspect pin details, manage AOIs, and fly the map to watch areas. The agent can monitor for ground deformation, flood extent, or damage and promote anomalies to pins.
|
||||
@@ -380,7 +428,7 @@ v0.9.7 turns ShadowBroker from a dashboard a human watches into an intelligence
|
||||
* **Intelligence Reports** — Generate structured reports with summary stats, top military flights, correlations, earthquake activity, SIGINT counts, and pin inventories.
|
||||
* **Auditable** — Every channel call is logged; the operator can introspect what the agent has done.
|
||||
|
||||
**Connect an agent:** Open the AI Intel panel in the left sidebar, click **Connect Agent**, and copy the HMAC secret. From there, point any compatible agent at the channel — for OpenClaw, import `ShadowBrokerClient` from the OpenClaw skill package; for any other agent, use the same HMAC contract documented above (timestamp + nonce + body digest, tier-gated). The channel is the protocol, not the agent.
|
||||
**Connect an agent:** Open the AI Intel panel in the left sidebar, click **Connect Agent**, and copy the HMAC secret. From there, point any compatible agent at the channel — for OpenClaw, import `ShadowBrokerClient` from `openclaw-skills/shadowbroker/sb_query.py` (see `SKILL.md` for examples); for any other agent, use the same HMAC contract documented above (timestamp + nonce + body digest, tier-gated). Discovery: `GET /api/ai/tools` and `GET /api/ai/capabilities`. The channel is the protocol, not the agent.
|
||||
|
||||
### ⏱️ Time Machine — Snapshot Playback (NEW in v0.9.7)
|
||||
|
||||
@@ -529,9 +577,20 @@ ShadowBroker v0.9.7 is composed of three vertically-stacked planes — the **Ope
|
||||
| [GDELT Project](https://www.gdeltproject.org) | Global conflict events | ~6h | No |
|
||||
| [DeepState Map](https://deepstatemap.live) | Ukraine frontline | ~30min | No |
|
||||
| [Shodan](https://www.shodan.io) | Internet-connected device search | On-demand | **Yes** |
|
||||
| [OpenSanctions](https://www.opensanctions.org) | OFAC SDN sanctions index (recon + entity graph) | 24h cache | No |
|
||||
| [abuse.ch Feodo + URLhaus](https://abuse.ch) | Malware C2 / distribution URLs | ~5min (opt-in layer) | No |
|
||||
| [CISA KEV](https://www.cisa.gov/known-exploited-vulnerabilities-catalog) | Known exploited CVEs | ~5min (opt-in layer) | No |
|
||||
| [ip-api.com](https://ip-api.com) | IP geolocation (recon, entity graph) | On-demand | No |
|
||||
| [Google Public DNS](https://dns.google) | DNS-over-HTTPS lookups (recon) | On-demand | No |
|
||||
| [RDAP.org](https://rdap.org) | Domain registration data (recon) | On-demand | No |
|
||||
| [crt.sh](https://crt.sh) | Certificate transparency (recon) | On-demand | No |
|
||||
| [bgpview.io](https://bgpview.io) | BGP/ASN routing (recon) | On-demand | No |
|
||||
| TeleGeography (static) | Submarine cable routes | Static | No |
|
||||
| [ASFINAG](https://www.asfinag.at) | Austria motorway webcams | ~10min | No |
|
||||
| [Amtrak](https://www.amtrak.com) | US train positions | ~60s | No |
|
||||
| [DigiTraffic](https://www.digitraffic.fi) | European rail positions | ~60s | No |
|
||||
| [Global Fishing Watch](https://globalfishingwatch.org) | Fishing vessel activity events | ~10min | No |
|
||||
| [Global Fishing Watch](https://globalfishingwatch.org) | Fishing vessel activity events | ~1hr | **Yes** (`GFW_API_TOKEN`) |
|
||||
| [Telegram public previews](https://t.me/s) | War/OSINT channel posts (`telegram_osint`) | ~1hr | No (optional `TELEGRAM_OSINT_CHANNELS`) |
|
||||
| Transport for London, NYC DOT, TxDOT | CCTV cameras (UK, US) | ~10min | No |
|
||||
| Caltrans, WSDOT, GDOT, IDOT, MDOT | CCTV cameras (5 US states) | ~10min | No |
|
||||
| Spain DGT, Madrid City | CCTV cameras (Spain) | ~10min | No |
|
||||
@@ -563,6 +622,8 @@ ShadowBroker v0.9.7 is composed of three vertically-stacked planes — the **Ope
|
||||
| [OSM Nominatim](https://nominatim.openstreetmap.org) | Place name geocoding (LOCATE bar) | On-demand | No |
|
||||
| [CARTO Basemaps](https://carto.com) | Dark map tiles | Continuous | No |
|
||||
|
||||
**Outbound privacy & audit (#348–#366):** Each self-hosted install uses its own backend IP and per-install User-Agent handle. See [docs/OUTBOUND_DATA.md](docs/OUTBOUND_DATA.md) for what contacts third parties, opt-in/env controls, and accepted tradeoffs (CCTV Referer, basemap CDN, LiveUAMap, etc.).
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Getting Started
|
||||
@@ -584,9 +645,16 @@ Open `http://localhost:3000` to view the dashboard.
|
||||
> **Deploying publicly or on a LAN?** No configuration needed for most setups.
|
||||
> The frontend proxies all API calls through the Next.js server to `BACKEND_URL`,
|
||||
> which defaults to `http://backend:8000` (Docker internal networking).
|
||||
> Host port `8000` is only published for local API/debug access. If it conflicts
|
||||
> with another service, set `BACKEND_PORT=8001` in `.env`; leave `BACKEND_URL`
|
||||
> as `http://backend:8000` because that is the Docker-internal port.
|
||||
> Host port `8000` is only published for local API/debug access (`127.0.0.1:8000`
|
||||
> in `docker-compose.yml`). If it conflicts with another service, set
|
||||
> `BACKEND_PORT=8001` in `.env`; leave `BACKEND_URL` as `http://backend:8000`
|
||||
> because that is the Docker-internal port.
|
||||
>
|
||||
> **Running the backend outside Docker** (`cd backend && python main.py`):
|
||||
> the dev server binds **loopback only** (`127.0.0.1:8000`) so other machines on
|
||||
> your LAN cannot hit admin/local-trust routes with an empty `ADMIN_KEY`. Set
|
||||
> `SHADOWBROKER_DEV_BIND_ALL=true` in `.env` only when you deliberately need
|
||||
> `0.0.0.0` and use a strong `ADMIN_KEY` for any non-local callers.
|
||||
> The backend memory cap is controlled by `BACKEND_MEMORY_LIMIT` and defaults
|
||||
> to `4G`. If Docker reports OOM events, the backend will restart and slow
|
||||
> layers can look empty until they repopulate.
|
||||
@@ -798,7 +866,7 @@ AIS-catcher decodes VHF radio signals on 161.975 MHz and 162.025 MHz and POSTs d
|
||||
|
||||
## 🎛️ Data Layers
|
||||
|
||||
All 37 layers are independently toggleable from the left panel:
|
||||
All 41 layers are independently toggleable from the left panel:
|
||||
|
||||
| Layer | Default | Description |
|
||||
|---|---|---|
|
||||
@@ -840,6 +908,24 @@ All 37 layers are independently toggleable from the left panel:
|
||||
| VIIRS Nightlights | ❌ OFF | Night-time light change detection |
|
||||
| Power Plants | ❌ OFF | 35,000+ global power plants |
|
||||
| Shodan Overlay | ❌ OFF | Internet device search results |
|
||||
| Road Freight Trends | ❌ OFF | Sentinel-2 truck-motion trends on major highways (Analyze Here) |
|
||||
| Submarine Cables | ❌ OFF | Global undersea cable routes (static GeoJSON) |
|
||||
| Malware C2 | ❌ OFF | abuse.ch Feodo + URLhaus threat points |
|
||||
| SCM Suppliers | ❌ OFF | Tier 1/2 supply-chain risk markers + panel alerts |
|
||||
| Cyber Threats | ❌ OFF | Recent CISA KEV entries (stats in slow-tier payload) |
|
||||
| Telegram OSINT | ✅ ON | Public war/OSINT Telegram channels — hourly scrape, geoparsed pins |
|
||||
| SAR | ✅ ON | Synthetic aperture radar catalog + anomaly alerts |
|
||||
|
||||
**Recon & entity tools** (not map layers — left sidebar / selection):
|
||||
|
||||
| Tool | Dashboard access | OpenClaw command | Description |
|
||||
|---|---|---|---|
|
||||
| Recon Toolkit | Local operator (`/api/osint/*`) | `osint_lookup`, `osint_sweep`† | IP, DNS, WHOIS, certs, BGP, sanctions, CVE, MAC, GitHub, leaks, threats, subnet sweep |
|
||||
| Entity Graph | Local operator (`/api/entity/expand`) | `entity_expand` | Wikidata + OFAC + live-store relationship graph |
|
||||
| SCM Risk panel | Local operator (`/api/scm-suppliers`) | `get_layer_slice(["scm_suppliers"])` | Supplier threat rollup + map markers |
|
||||
| Tool discovery | — | `osint_tools` | Lists recon lookup types and entity-expand schemas |
|
||||
|
||||
† `osint_sweep` (active InternetDB scan) requires `OPENCLAW_ACCESS_TIER=full`.
|
||||
|
||||
---
|
||||
|
||||
@@ -863,6 +949,7 @@ The platform is optimized for handling massive real-time datasets:
|
||||
|
||||
```
|
||||
Shadowbroker/
|
||||
├── openclaw-skills/shadowbroker/ # OpenClaw skill — SKILL.md, sb_query.py client, alerts/monitor helpers
|
||||
├── backend/
|
||||
│ ├── main.py # FastAPI app, middleware, API routes (~4,000 lines)
|
||||
│ ├── cctv.db # SQLite CCTV camera database (auto-generated)
|
||||
@@ -872,7 +959,18 @@ Shadowbroker/
|
||||
│ │ ├── data_fetcher.py # Core scheduler — orchestrates all data sources
|
||||
│ │ ├── ais_stream.py # AIS WebSocket client (25K+ vessels)
|
||||
│ │ ├── carrier_tracker.py # OSINT carrier position estimator (GDELT news scraping)
|
||||
│ │ ├── cctv_pipeline.py # 13-source CCTV camera ingestion pipeline
|
||||
│ │ ├── cctv_pipeline.py # 14-source CCTV camera ingestion pipeline
|
||||
│ │ ├── ssrf_guard.py # SSRF validation for operator recon fetches
|
||||
│ │ ├── sanctions/ofac.py # OpenSanctions OFAC SDN index
|
||||
│ │ ├── osint/lookups.py # Server-side recon lookups (Osiris port)
|
||||
│ │ ├── osint/openclaw_recon.py # OpenClaw dispatch for recon + entity_expand
|
||||
│ │ ├── osint_intel/resolve.py # Entity graph resolver (Wikidata + OFAC)
|
||||
│ │ ├── scm/suppliers.py # Supply-chain risk overlay
|
||||
│ │ ├── intel_feeds/ # Country risk index helpers
|
||||
│ │ ├── fetchers/malware.py # abuse.ch Feodo + URLhaus
|
||||
│ │ ├── fetchers/cyber_status.py # CISA KEV feed
|
||||
│ │ ├── fetchers/telegram_osint.py # Public Telegram channel scrape + geoparse
|
||||
│ │ ├── third_party/osiris/ # MIT attribution for Osiris-derived code
|
||||
│ │ ├── geopolitics.py # GDELT + Ukraine frontline + air alerts
|
||||
│ │ ├── region_dossier.py # Right-click country/city intelligence
|
||||
│ │ ├── radio_intercept.py # Police scanner feeds + OpenMHZ
|
||||
@@ -910,7 +1008,14 @@ Shadowbroker/
|
||||
│ │ ├── mesh_reputation.py # Node reputation scoring
|
||||
│ │ ├── mesh_oracle.py # Oracle consensus protocol
|
||||
│ │ └── mesh_secure_storage.py # Secure credential storage
|
||||
│ ├── routers/
|
||||
│ │ ├── osint.py # /api/osint/* recon routes (local operator)
|
||||
│ │ ├── entity_graph.py # /api/entity/expand
|
||||
│ │ ├── scm.py # /api/scm-suppliers
|
||||
│ │ └── intel_feeds.py # /api/malware, /api/cyber-threats, /api/telegram-feed, /api/country-risk
|
||||
├── frontend/
|
||||
│ ├── public/data/
|
||||
│ │ └── submarine-cables.json # Static undersea cable GeoJSON
|
||||
│ ├── src/
|
||||
│ │ ├── app/
|
||||
│ │ │ └── page.tsx # Main dashboard — state, polling, layout
|
||||
@@ -919,7 +1024,12 @@ Shadowbroker/
|
||||
│ │ ├── MeshChat.tsx # InfoNet / Mesh / Dead Drop chat panel
|
||||
│ │ ├── MeshTerminal.tsx # Draggable CLI terminal
|
||||
│ │ ├── NewsFeed.tsx # SIGINT feed + entity detail panels
|
||||
│ │ ├── WorldviewLeftPanel.tsx # Data layer toggles (35+ layers)
|
||||
│ │ ├── WorldviewLeftPanel.tsx # Data layer toggles (40+ layers)
|
||||
│ │ ├── ShodanPanel.tsx # Shodan device search overlay
|
||||
│ │ ├── ReconPanel.tsx # Server-side OSINT recon toolkit
|
||||
│ │ ├── ScmPanel.tsx # Supply-chain risk command panel
|
||||
│ │ ├── EntityGraphPanel.tsx # Entity graph on map selection
|
||||
│ │ ├── MaplibreViewer/popups/TelegramOsintPopup.tsx # Threat-intercept styled Telegram pin popups
|
||||
│ │ ├── WorldviewRightPanel.tsx # Search + filter sidebar
|
||||
│ │ ├── AdvancedFilterModal.tsx # Airport/country/owner filtering
|
||||
│ │ ├── MapLegend.tsx # Dynamic legend with all icons
|
||||
@@ -956,6 +1066,9 @@ MESH_SAR_EARTHDATA_TOKEN= # NASA Earthdata token (paired wit
|
||||
MESH_SAR_COPERNICUS_USER= # Copernicus Data Space user (SAR Mode B — EGMS / EMS)
|
||||
MESH_SAR_COPERNICUS_TOKEN= # Copernicus token (paired with user above)
|
||||
OPENCLAW_ACCESS_TIER=restricted # OpenClaw agent tier: "restricted" (read-only) or "full"
|
||||
GFW_API_TOKEN=your_gfw_token # Global Fishing Watch — fishing_activity layer (Settings → Maritime)
|
||||
TELEGRAM_OSINT_ENABLED=true # Telegram OSINT layer (default on)
|
||||
TELEGRAM_OSINT_CHANNELS=osintdefender,... # Comma-separated public channel slugs (see .env.example)
|
||||
|
||||
# Private-lane privacy-core pinning (required when Arti or RNS is enabled)
|
||||
PRIVACY_CORE_MIN_VERSION=0.1.0
|
||||
|
||||
+56
-13
@@ -18,6 +18,15 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# AISHUB_USERNAME=
|
||||
# AISHUB_POLL_INTERVAL_MINUTES=20
|
||||
|
||||
# `python main.py` (uvicorn reload) binds 127.0.0.1:8000 by default so LAN clients
|
||||
# cannot reach a dev server with empty ADMIN_KEY (#375). Set true only when you
|
||||
# intentionally need 0.0.0.0 and understand the local-trust implications.
|
||||
# SHADOWBROKER_DEV_BIND_ALL=false
|
||||
#
|
||||
# Thread pool for GDELT, LiveUAMap, CCTV ingest, and slow-tier refresh batches.
|
||||
# Keeps heavy jobs from starving fast flight/ship workers (default 2).
|
||||
# SHADOWBROKER_HEAVY_FETCH_WORKERS=2
|
||||
|
||||
# Override allowed CORS origins (comma-separated). Defaults to localhost + LAN auto-detect.
|
||||
# CORS_ORIGINS=http://192.168.1.50:3000,https://my-domain.com
|
||||
|
||||
@@ -31,11 +40,9 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# Requires MESH_DEBUG_MODE=true; do not enable this for ordinary use.
|
||||
# ALLOW_INSECURE_ADMIN=false
|
||||
|
||||
# Per-install operator handle. Round 7a: every outbound third-party API
|
||||
# call (Wikipedia, Wikidata, Nominatim, GDELT, OpenMHz, Broadcastify,
|
||||
# weather.gov, NUFORC, etc.) includes this handle in the User-Agent so
|
||||
# upstreams can rate-limit / contact the specific install instead of
|
||||
# treating every Shadowbroker user as one entity.
|
||||
# Per-install operator handle. Round 7a: outbound third-party API calls send
|
||||
# this handle as the User-Agent (e.g. operator-7f3a92), not a shared app name,
|
||||
# so upstreams rate-limit one install instead of blocking every user.
|
||||
#
|
||||
# Default empty -> a stable pseudonymous handle (e.g. "operator-7f3a92") is
|
||||
# auto-generated on first run and persisted to backend/data/operator_handle.json.
|
||||
@@ -43,10 +50,8 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# set it here. Special characters are sanitized to dashes.
|
||||
# OPERATOR_HANDLE=
|
||||
|
||||
# Default outbound User-Agent for all third-party HTTP fetchers. Operators
|
||||
# who run a public relay and want a completely custom UA can set this; it
|
||||
# bypasses the per-operator helper entirely. Most installs should leave it
|
||||
# unset and use OPERATOR_HANDLE instead.
|
||||
# Full User-Agent override (replaces the operator handle entirely). Rare;
|
||||
# most installs should use OPERATOR_HANDLE only.
|
||||
# SHADOWBROKER_USER_AGENT=
|
||||
|
||||
# Nominatim-specific User-Agent override (OSM usage policy). Leave unset to
|
||||
@@ -66,20 +71,48 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# FIMI_ENABLED=false
|
||||
#
|
||||
# Polymarket + Kalshi — US political/election prediction markets.
|
||||
# Default off; enable from Global Threat Intercept (MKT toggle) or set true here.
|
||||
# PREDICTION_MARKETS_ENABLED=false
|
||||
# When enabled, polls use a jittered schedule (not the fixed 5-minute slow tier):
|
||||
# PREDICTION_MARKETS_INTERVAL_MINUTES=7
|
||||
# PREDICTION_MARKETS_SCHEDULER_JITTER_S=240
|
||||
# PREDICTION_MARKETS_INITIAL_DELAY_MAX_S=180
|
||||
# PREDICTION_MARKETS_PRE_FETCH_JITTER_S=90
|
||||
# PREDICTION_MARKETS_PROVIDER_GAP_JITTER_S=45
|
||||
# MESH_POLYMARKET_PAGE_DELAY_JITTER_S=0.08
|
||||
# MESH_KALSHI_PAGE_DELAY_JITTER_S=0.2
|
||||
#
|
||||
# Finnhub fallback / yfinance — financial market data.
|
||||
# Set FINNHUB_API_KEY to enable Finnhub, or set FINANCIAL_ENABLED=true to allow
|
||||
# the unauthenticated yfinance fallback to call Yahoo Finance.
|
||||
# FINANCIAL_ENABLED=false
|
||||
#
|
||||
# NUFORC UAP sightings — huggingface.co dataset download.
|
||||
# NUFORC UAP map layer — live scrape from nuforc.org (rolling window, default 60 days).
|
||||
# Refreshed weekly (Mon 12:00 UTC); cache reused for up to 7 days between runs.
|
||||
# NUFORC_RECENT_DAYS=60
|
||||
# NUFORC_CACHE_TTL_HOURS=168
|
||||
# On Windows, live scrape uses Python requests by default; optional:
|
||||
# SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=true
|
||||
# NUFORC enrichment index (HF dataset) is separate — opt-in only:
|
||||
# NUFORC_ENABLED=false
|
||||
#
|
||||
# News RSS aggregator — defaults ON. Set to "false" to disable all
|
||||
# configured news feeds (kill switch for the news layer).
|
||||
# NEWS_ENABLED=true
|
||||
|
||||
# Global Fishing Watch — fishing vessel activity events (Fishing Activity map layer).
|
||||
# Free API token from https://globalfishingwatch.org/our-apis/tokens
|
||||
# Without this the fishing_activity layer stays empty.
|
||||
# GFW_API_TOKEN=
|
||||
# Optional tuning — GFW can return 40k+ global events; defaults cap fetch for map paint.
|
||||
# GFW_EVENTS_PAGE_SIZE=500
|
||||
# GFW_EVENTS_MAX_PAGES=10
|
||||
# GFW_EVENTS_LOOKBACK_DAYS=7
|
||||
# GFW_EVENTS_TIMEOUT_S=90
|
||||
|
||||
# Windy Webcams global CCTV layer — free key from https://api.windy.com/webcams/docs
|
||||
# WINDY_API_KEY=
|
||||
|
||||
# LTA Singapore traffic cameras — leave blank to skip this data source.
|
||||
# LTA_ACCOUNT_KEY=
|
||||
|
||||
@@ -87,6 +120,12 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# Free MAP_KEY from https://firms.modaps.eosdis.nasa.gov/map/#d:24hrs;@0.0,0.0,3.0z
|
||||
# FIRMS_MAP_KEY=
|
||||
|
||||
# Ukraine frontline mirror (GitHub). Default follows cyterat/deepstate-map-data@main.
|
||||
# Pin an immutable commit SHA so ingest cannot silently change if main is force-pushed (#362).
|
||||
# Example (verify on GitHub before use): main @ b479954e94696bc5622c7818fd20a64a699f4fe8
|
||||
# DEEPSTATE_MIRROR_COMMIT=b479954e94696bc5622c7818fd20a64a699f4fe8
|
||||
# DEEPSTATE_MIRROR_REPO=cyterat/deepstate-map-data
|
||||
|
||||
# Ukraine air raid alerts from alerts.in.ua — free token from https://alerts.in.ua/
|
||||
# ALERTS_IN_UA_TOKEN=
|
||||
|
||||
@@ -116,12 +155,16 @@ AIS_API_KEY= # https://aisstream.io/ — free tier WebSocket key
|
||||
# can identify per-install traffic instead of aggregated "ShadowBroker" hits.
|
||||
# Leave blank to send a generic UA. If you set MESHTASTIC_OPERATOR_CALLSIGN,
|
||||
# it is included in outbound headers to meshtastic.org by default so they
|
||||
# can rate-limit per-operator. Set MESHTASTIC_SEND_CALLSIGN_HEADER=false to
|
||||
# suppress the callsign while still using it locally (e.g. for APRS).
|
||||
# can rate-limit per-operator. Callsign is NOT sent upstream unless you opt in.
|
||||
# MESHTASTIC_OPERATOR_CALLSIGN=
|
||||
# MESHTASTIC_SEND_CALLSIGN_HEADER=true
|
||||
# MESHTASTIC_SEND_CALLSIGN_HEADER=false
|
||||
# MESH_MQTT_PSK= # hex-encoded, empty = default LongFast key
|
||||
|
||||
# LiveUAMap Playwright scraper (#348). Linux/macOS: on by default when Global
|
||||
# Incidents layer is active. Windows: off until the operator enables Global
|
||||
# Incidents in the UI (consent dialog) or sets SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=true.
|
||||
# SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=false forces off on all platforms.
|
||||
|
||||
# ── Mesh / Reticulum (RNS) ─────────────────────────────────────
|
||||
# Full-node / participant-node posture for public Infonet sync.
|
||||
# MESH_NODE_MODE=participant # participant | relay | perimeter
|
||||
|
||||
+1
-1
@@ -45,7 +45,7 @@ COPY uv.lock /workspace/uv.lock
|
||||
COPY backend/pyproject.toml /workspace/backend/pyproject.toml
|
||||
|
||||
# Install Python dependencies using the lockfile
|
||||
RUN cd /workspace/backend && uv sync --frozen --no-dev \
|
||||
RUN cd /workspace/backend && uv sync --frozen --no-dev --extra road-corridor \
|
||||
&& playwright install --with-deps chromium
|
||||
|
||||
# Copy backend source code
|
||||
|
||||
+31
-30
@@ -113,8 +113,14 @@ def _scoped_admin_tokens() -> dict[str, list[str]]:
|
||||
return normalized
|
||||
|
||||
|
||||
def _request_scope_path(request: Request) -> str:
|
||||
"""Return the ASGI request-line path, not the Host-derived URL path."""
|
||||
scope = getattr(request, "scope", {}) or {}
|
||||
return str(scope.get("path") or "")
|
||||
|
||||
|
||||
def _required_scope_for_request(request: Request) -> str:
|
||||
path = str(request.url.path or "")
|
||||
path = _request_scope_path(request)
|
||||
if path.startswith("/api/wormhole/gate/"):
|
||||
return "gate"
|
||||
if path.startswith("/api/wormhole/dm/"):
|
||||
@@ -443,7 +449,7 @@ async def _verify_openclaw_hmac(request: Request) -> bool:
|
||||
|
||||
# Compute expected signature: HMAC-SHA256(secret, METHOD|path|ts|nonce|body_digest)
|
||||
method = str(request.method or "").upper()
|
||||
path = str(request.url.path or "")
|
||||
path = _request_scope_path(request)
|
||||
message = f"{method}|{path}|{ts_str}|{nonce}|{body_digest}"
|
||||
expected = hmac.new(
|
||||
secret.encode("utf-8"),
|
||||
@@ -515,33 +521,32 @@ _KNOWN_COMPROMISED_PEER_PUSH_SECRET_SHA256 = (
|
||||
def _validate_admin_startup() -> None:
|
||||
admin_key = _current_admin_key()
|
||||
|
||||
if not admin_key or len(admin_key) < 32:
|
||||
import secrets
|
||||
if not admin_key:
|
||||
logger.warning(
|
||||
"ADMIN_KEY is not set. Local-operator/admin endpoints will reject "
|
||||
"remote callers until ADMIN_KEY is configured."
|
||||
)
|
||||
return
|
||||
|
||||
reason = "not set" if not admin_key else f"too short ({len(admin_key)} chars, minimum 32)"
|
||||
new_key = secrets.token_hex(32) # 64-char hex string
|
||||
if len(admin_key) < 32:
|
||||
reason = f"too short ({len(admin_key)} chars, minimum 32)"
|
||||
try:
|
||||
from routers.ai_intel import _write_env_value
|
||||
|
||||
_write_env_value("ADMIN_KEY", new_key)
|
||||
os.environ["ADMIN_KEY"] = new_key
|
||||
logger.info(
|
||||
"ADMIN_KEY was %s — auto-generated a strong 64-character key and "
|
||||
"saved it to .env. Admin/mesh endpoints are now secured.",
|
||||
reason,
|
||||
)
|
||||
# Clear settings cache so the rest of startup picks up the new key
|
||||
try:
|
||||
get_settings.cache_clear()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
debug_mode = bool(getattr(get_settings(), "MESH_DEBUG_MODE", False))
|
||||
except Exception:
|
||||
debug_mode = False
|
||||
if debug_mode:
|
||||
logger.warning(
|
||||
"ADMIN_KEY is %s and could not auto-generate: %s. "
|
||||
"Admin/mesh endpoints may be unavailable.",
|
||||
"ADMIN_KEY is %s. Debug mode is enabled, so startup will continue, "
|
||||
"but production deployments must use a 32+ character key.",
|
||||
reason,
|
||||
exc,
|
||||
)
|
||||
return
|
||||
logger.error(
|
||||
"ADMIN_KEY is %s. Refusing to start because auto-generating a backend-only "
|
||||
"replacement would desynchronize the frontend and backend containers.",
|
||||
reason,
|
||||
)
|
||||
raise SystemExit(1)
|
||||
|
||||
|
||||
def _validate_insecure_admin_startup() -> None:
|
||||
@@ -744,8 +749,7 @@ def _is_debug_test_request(request: Request) -> bool:
|
||||
if not _debug_mode_enabled():
|
||||
return False
|
||||
client_host = (request.client.host or "").lower() if request.client else ""
|
||||
url_host = (request.url.hostname or "").lower() if request.url else ""
|
||||
return client_host == "test" or url_host == "test"
|
||||
return client_host == "test"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1397,10 +1401,7 @@ def _peer_hmac_url_from_request(request: Request) -> str:
|
||||
header_url = normalize_peer_url(str(request.headers.get("x-peer-url", "") or ""))
|
||||
if header_url:
|
||||
return header_url
|
||||
if not request.url:
|
||||
return ""
|
||||
base_url = f"{request.url.scheme}://{request.url.netloc}".rstrip("/")
|
||||
return normalize_peer_url(base_url)
|
||||
return ""
|
||||
|
||||
|
||||
def _verify_peer_push_hmac(request: Request, body_bytes: bytes) -> bool:
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
},
|
||||
{
|
||||
"name": "BBC",
|
||||
"url": "http://feeds.bbci.co.uk/news/world/rss.xml",
|
||||
"url": "https://feeds.bbci.co.uk/news/world/rss.xml",
|
||||
"weight": 3
|
||||
},
|
||||
{
|
||||
@@ -47,7 +47,7 @@
|
||||
},
|
||||
{
|
||||
"name": "Xinhua",
|
||||
"url": "http://www.news.cn/english/rss/worldrss.xml",
|
||||
"url": "https://www.news.cn/english/rss/worldrss.xml",
|
||||
"weight": 2
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:72b69418aa860a0d92ccae398a08722bc85e64a992b5515dd7bf9ae9f79f2fd1
|
||||
size 107194128
|
||||
@@ -46,5 +46,10 @@
|
||||
"ShadowBroker_v0.9.81.zip": "f81f454bdc88e9a32c351df38212b8cfa624704d65764b971bb091eef62259c6",
|
||||
"ShadowBroker_0.9.81_x64-setup.exe": "25e9a95d0d8ce959a7d08fe8e7406772ae24b596652793e81d1de5d02510a5a6",
|
||||
"ShadowBroker_0.9.81_x64_en-US.msi": "34e655fc0c0f195ee4ac978f228a4b2b9d5565253b8771aca9ef4693409e9e70"
|
||||
},
|
||||
"v0.9.82": {
|
||||
"ShadowBroker_v0.9.82.zip": "202ab043465741dcc06de57c19ec8314904332f8e818b891d7174655719d084c",
|
||||
"ShadowBroker_0.9.82_x64-setup.exe": "0eb9f2bda02ab691b39687641abc97e6bfb507b42f48de21970ad7dfb4ea15fc",
|
||||
"ShadowBroker_0.9.82_x64_en-US.msi": "ced08f930171c0c08009a958cc30b0171a09f982230fc217c6808c2ed7ab2e30"
|
||||
}
|
||||
}
|
||||
|
||||
+142
-448
@@ -15,7 +15,7 @@ from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
from json import JSONDecodeError
|
||||
|
||||
APP_VERSION = "0.9.81"
|
||||
APP_VERSION = "0.9.82"
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -310,6 +310,7 @@ from auth import (
|
||||
_private_plane_access_denied_payload,
|
||||
_private_infonet_policy_snapshot,
|
||||
_private_plane_refusal_response,
|
||||
_request_scope_path,
|
||||
_scoped_admin_tokens,
|
||||
_scoped_view_authenticated as _scoped_view_authenticated_auth,
|
||||
_security_headers,
|
||||
@@ -364,6 +365,11 @@ wormhole_router = _load_optional_router("routers.wormhole")
|
||||
ai_intel_router = _load_optional_router("routers.ai_intel")
|
||||
sar_router = _load_optional_router("routers.sar")
|
||||
infonet_router = _load_optional_router("routers.infonet")
|
||||
road_corridors_router = _load_optional_router("routers.road_corridors")
|
||||
osint_router = _load_optional_router("routers.osint")
|
||||
scm_router = _load_optional_router("routers.scm")
|
||||
entity_graph_router = _load_optional_router("routers.entity_graph")
|
||||
intel_feeds_router = _load_optional_router("routers.intel_feeds")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -2728,7 +2734,7 @@ async def json_decode_error_handler(_request: Request, _exc: JSONDecodeError):
|
||||
|
||||
@app.exception_handler(StarletteHTTPException)
|
||||
async def private_plane_http_exception_handler(request: Request, exc: StarletteHTTPException):
|
||||
if exc.status_code == 403 and _is_private_plane_access_path(request.url.path, request.method):
|
||||
if exc.status_code == 403 and _is_private_plane_access_path(_request_scope_path(request), request.method):
|
||||
return await _private_plane_refusal_response(
|
||||
request,
|
||||
status_code=403,
|
||||
@@ -2762,7 +2768,7 @@ async def mesh_security_headers(request: Request, call_next):
|
||||
@app.middleware("http")
|
||||
async def mesh_no_store_headers(request: Request, call_next):
|
||||
response = await call_next(request)
|
||||
if request.url.path.startswith("/api/mesh/"):
|
||||
if _request_scope_path(request).startswith("/api/mesh/"):
|
||||
response.headers["Cache-Control"] = "no-store, max-age=0"
|
||||
response.headers["Pragma"] = "no-cache"
|
||||
return response
|
||||
@@ -2911,24 +2917,16 @@ def _request_appears_private_infonet_transport(request) -> bool:
|
||||
if not _infonet_private_transport_required() or request is None:
|
||||
return False
|
||||
|
||||
client = getattr(request, "client", None)
|
||||
client_host = str(getattr(client, "host", "") or "")
|
||||
if not (_is_loopback_host(client_host) or _is_onion_host(client_host)):
|
||||
return False
|
||||
|
||||
forwarded_hosts = _forwarded_for_hosts(request)
|
||||
if forwarded_hosts and any(not (_is_loopback_host(host) or _is_onion_host(host)) for host in forwarded_hosts):
|
||||
return False
|
||||
|
||||
client = getattr(request, "client", None)
|
||||
client_host = str(getattr(client, "host", "") or "")
|
||||
headers = getattr(request, "headers", {}) or {}
|
||||
host_header = str(headers.get("host", "") or "")
|
||||
url_host = str(getattr(getattr(request, "url", None), "hostname", "") or "")
|
||||
return any(
|
||||
(
|
||||
_is_loopback_host(client_host),
|
||||
_is_loopback_host(host_header),
|
||||
_is_loopback_host(url_host),
|
||||
_is_onion_host(host_header),
|
||||
_is_onion_host(url_host),
|
||||
)
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def _infonet_sync_response_events(events: list[dict], request=None) -> list[dict]:
|
||||
@@ -3464,7 +3462,7 @@ def _refresh_lookup_handle_rotation_background(*, reason: str) -> dict[str, Any]
|
||||
|
||||
@app.middleware("http")
|
||||
async def enforce_high_privacy_mesh(request: Request, call_next):
|
||||
path = request.url.path
|
||||
path = _request_scope_path(request)
|
||||
private_mesh_path = path.startswith("/api/mesh") and not _is_public_meshtastic_lane_path(
|
||||
path,
|
||||
request.method,
|
||||
@@ -3624,7 +3622,7 @@ async def enforce_high_privacy_mesh(request: Request, call_next):
|
||||
@app.middleware("http")
|
||||
async def apply_no_store_to_sensitive_paths(request: Request, call_next):
|
||||
response = await call_next(request)
|
||||
if _is_sensitive_no_store_path(request.url.path):
|
||||
if _is_sensitive_no_store_path(_request_scope_path(request)):
|
||||
for key, value in _NO_STORE_HEADERS.items():
|
||||
response.headers[key] = value
|
||||
return response
|
||||
@@ -3648,6 +3646,11 @@ app.include_router(wormhole_router)
|
||||
app.include_router(ai_intel_router)
|
||||
app.include_router(sar_router)
|
||||
app.include_router(infonet_router)
|
||||
app.include_router(road_corridors_router)
|
||||
app.include_router(osint_router)
|
||||
app.include_router(scm_router)
|
||||
app.include_router(entity_graph_router)
|
||||
app.include_router(intel_feeds_router)
|
||||
|
||||
from services.data_fetcher import update_all_data
|
||||
|
||||
@@ -3779,6 +3782,8 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
old_mesh = is_any_active("sigint_meshtastic")
|
||||
old_aprs = is_any_active("sigint_aprs")
|
||||
old_viirs = is_any_active("viirs_nightlights")
|
||||
old_datacenters = is_any_active("datacenters")
|
||||
old_fishing = is_any_active("fishing_activity")
|
||||
|
||||
# Update only known keys
|
||||
changed = False
|
||||
@@ -3797,6 +3802,8 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
new_mesh = is_any_active("sigint_meshtastic")
|
||||
new_aprs = is_any_active("sigint_aprs")
|
||||
new_viirs = is_any_active("viirs_nightlights")
|
||||
new_datacenters = is_any_active("datacenters")
|
||||
new_fishing = is_any_active("fishing_activity")
|
||||
|
||||
# Start/stop AIS stream on transition
|
||||
if old_ships and not new_ships:
|
||||
@@ -3852,435 +3859,21 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
_queue_viirs_change_refresh()
|
||||
logger.info("VIIRS change refresh queued (layer enabled)")
|
||||
|
||||
if not old_datacenters and new_datacenters:
|
||||
from services.fetchers.infrastructure import fetch_datacenters
|
||||
|
||||
fetch_datacenters()
|
||||
logger.info("Datacenters loaded (layer enabled)")
|
||||
|
||||
if not old_fishing and new_fishing:
|
||||
from services.fetchers.geo import fetch_fishing_activity
|
||||
|
||||
fetch_fishing_activity()
|
||||
logger.info("Fishing activity refresh queued (layer enabled)")
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.get("/api/live-data")
|
||||
@limiter.limit("120/minute")
|
||||
async def live_data(request: Request):
|
||||
return get_latest_data()
|
||||
|
||||
|
||||
def _etag_response(request: Request, payload: dict, prefix: str = "", default=None):
|
||||
"""Serialize once, use data version for ETag, return 304 or full response.
|
||||
|
||||
Uses a monotonic version counter instead of MD5-hashing the full payload.
|
||||
The 304 fast path avoids serialization entirely.
|
||||
"""
|
||||
etag = _current_etag(prefix)
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
content = json_mod.dumps(_json_safe(payload), default=default, allow_nan=False)
|
||||
return Response(
|
||||
content=content,
|
||||
media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
|
||||
|
||||
def _current_etag(prefix: str = "") -> str:
|
||||
from services.fetchers._store import get_active_layers_version, get_data_version
|
||||
|
||||
return f"{prefix}v{get_data_version()}-l{get_active_layers_version()}"
|
||||
|
||||
|
||||
def _json_safe(value):
|
||||
"""Recursively replace non-finite floats with None so responses stay valid JSON."""
|
||||
if isinstance(value, float):
|
||||
return value if math.isfinite(value) else None
|
||||
if isinstance(value, dict):
|
||||
# Snapshot mutable mappings first so background fetcher updates do not
|
||||
# invalidate iteration while we serialize a response.
|
||||
return {k: _json_safe(v) for k, v in list(value.items())}
|
||||
if isinstance(value, list):
|
||||
return [_json_safe(v) for v in list(value)]
|
||||
if isinstance(value, tuple):
|
||||
return [_json_safe(v) for v in list(value)]
|
||||
return value
|
||||
|
||||
|
||||
def _sanitize_payload(value):
|
||||
"""Thread-safe snapshot with NaN→None. Cheaper than _json_safe: only deep-
|
||||
copies dicts (for thread safety) and replaces non-finite floats. Lists are
|
||||
shallow-copied — orjson handles the leaf serialisation natively."""
|
||||
if isinstance(value, float):
|
||||
return value if math.isfinite(value) else None
|
||||
if isinstance(value, dict):
|
||||
return {k: _sanitize_payload(v) for k, v in list(value.items())}
|
||||
if isinstance(value, (list, tuple)):
|
||||
return list(value)
|
||||
return value
|
||||
|
||||
|
||||
def _bbox_filter(
|
||||
items: list, s: float, w: float, n: float, e: float, lat_key: str = "lat", lng_key: str = "lng"
|
||||
) -> list:
|
||||
"""Filter a list of dicts to those within the bounding box (with 20% padding).
|
||||
Handles antimeridian crossing (e.g. w=170, e=-170)."""
|
||||
pad_lat = (n - s) * 0.2
|
||||
pad_lng = (e - w) * 0.2 if e > w else ((e + 360 - w) * 0.2)
|
||||
s2, n2 = s - pad_lat, n + pad_lat
|
||||
w2, e2 = w - pad_lng, e + pad_lng
|
||||
crosses_antimeridian = w2 > e2
|
||||
out = []
|
||||
for item in items:
|
||||
lat = item.get(lat_key)
|
||||
lng = item.get(lng_key)
|
||||
if lat is None or lng is None:
|
||||
out.append(item) # Keep items without coords (don't filter them out)
|
||||
continue
|
||||
if not (s2 <= lat <= n2):
|
||||
continue
|
||||
if crosses_antimeridian:
|
||||
if lng >= w2 or lng <= e2:
|
||||
out.append(item)
|
||||
else:
|
||||
if w2 <= lng <= e2:
|
||||
out.append(item)
|
||||
return out
|
||||
|
||||
|
||||
def _bbox_filter_geojson_points(items: list, s: float, w: float, n: float, e: float) -> list:
|
||||
"""Filter GeoJSON Point features to a padded bounding box."""
|
||||
pad_lat = (n - s) * 0.2
|
||||
pad_lng = (e - w) * 0.2 if e > w else ((e + 360 - w) * 0.2)
|
||||
s2, n2 = s - pad_lat, n + pad_lat
|
||||
w2, e2 = w - pad_lng, e + pad_lng
|
||||
crosses_antimeridian = w2 > e2
|
||||
out = []
|
||||
for item in items:
|
||||
geometry = item.get("geometry") if isinstance(item, dict) else None
|
||||
coords = geometry.get("coordinates") if isinstance(geometry, dict) else None
|
||||
if not isinstance(coords, (list, tuple)) or len(coords) < 2:
|
||||
out.append(item)
|
||||
continue
|
||||
lng, lat = coords[0], coords[1]
|
||||
if lat is None or lng is None:
|
||||
out.append(item)
|
||||
continue
|
||||
if not (s2 <= lat <= n2):
|
||||
continue
|
||||
if crosses_antimeridian:
|
||||
if lng >= w2 or lng <= e2:
|
||||
out.append(item)
|
||||
else:
|
||||
if w2 <= lng <= e2:
|
||||
out.append(item)
|
||||
return out
|
||||
|
||||
|
||||
def _bbox_spans(s: float | None, w: float | None, n: float | None, e: float | None) -> tuple[float, float]:
|
||||
if None in (s, w, n, e):
|
||||
return 180.0, 360.0
|
||||
lat_span = max(0.0, float(n) - float(s))
|
||||
lng_span = float(e) - float(w)
|
||||
if lng_span < 0:
|
||||
lng_span += 360.0
|
||||
if lng_span == 0 and w == -180 and e == 180:
|
||||
lng_span = 360.0
|
||||
return lat_span, max(0.0, lng_span)
|
||||
|
||||
|
||||
def _downsample_points(items: list, max_items: int) -> list:
|
||||
if max_items <= 0 or len(items) <= max_items:
|
||||
return items
|
||||
step = len(items) / float(max_items)
|
||||
return [items[min(len(items) - 1, int(i * step))] for i in range(max_items)]
|
||||
|
||||
|
||||
def _world_and_continental_scale(
|
||||
has_bbox: bool, s: float | None, w: float | None, n: float | None, e: float | None
|
||||
) -> tuple[bool, bool]:
|
||||
lat_span, lng_span = _bbox_spans(s, w, n, e)
|
||||
world_scale = (not has_bbox) or lng_span >= 300 or lat_span >= 120
|
||||
continental_scale = has_bbox and not world_scale and (lng_span >= 120 or lat_span >= 55)
|
||||
return world_scale, continental_scale
|
||||
|
||||
|
||||
def _filter_sigint_by_layers(items: list, active_layers: dict[str, bool]) -> list:
|
||||
allow_aprs = bool(active_layers.get("sigint_aprs", True))
|
||||
allow_mesh = bool(active_layers.get("sigint_meshtastic", True))
|
||||
if allow_aprs and allow_mesh:
|
||||
return items
|
||||
|
||||
allowed_sources: set[str] = {"js8call"}
|
||||
if allow_aprs:
|
||||
allowed_sources.add("aprs")
|
||||
if allow_mesh:
|
||||
allowed_sources.update({"meshtastic", "meshtastic-map"})
|
||||
return [item for item in items if str(item.get("source") or "").lower() in allowed_sources]
|
||||
|
||||
|
||||
def _sigint_totals_for_items(items: list) -> dict[str, int]:
|
||||
totals = {
|
||||
"total": len(items),
|
||||
"meshtastic": 0,
|
||||
"meshtastic_live": 0,
|
||||
"meshtastic_map": 0,
|
||||
"aprs": 0,
|
||||
"js8call": 0,
|
||||
}
|
||||
for item in items:
|
||||
source = str(item.get("source") or "").lower()
|
||||
if source == "meshtastic":
|
||||
totals["meshtastic"] += 1
|
||||
if bool(item.get("from_api")):
|
||||
totals["meshtastic_map"] += 1
|
||||
else:
|
||||
totals["meshtastic_live"] += 1
|
||||
elif source == "aprs":
|
||||
totals["aprs"] += 1
|
||||
elif source == "js8call":
|
||||
totals["js8call"] += 1
|
||||
return totals
|
||||
|
||||
|
||||
def _cap_startup_items(items: list | None, max_items: int) -> list:
|
||||
if not items:
|
||||
return []
|
||||
if len(items) <= max_items:
|
||||
return items
|
||||
return items[:max_items]
|
||||
|
||||
|
||||
def _cap_fast_startup_payload(payload: dict) -> dict:
|
||||
"""Trim high-volume layers for the first dashboard paint.
|
||||
|
||||
The full fast payload can legitimately contain tens of thousands of AIS,
|
||||
ADS-B, SIGINT, and CCTV records. Returning all of that during app startup
|
||||
blocks the first map render behind serialization/proxy/network pressure.
|
||||
This startup payload paints representative live data immediately; the next
|
||||
normal poll replaces it with the full dataset.
|
||||
"""
|
||||
capped = dict(payload)
|
||||
capped["commercial_flights"] = _cap_startup_items(capped.get("commercial_flights"), 800)
|
||||
capped["private_flights"] = _cap_startup_items(capped.get("private_flights"), 300)
|
||||
capped["private_jets"] = _cap_startup_items(capped.get("private_jets"), 150)
|
||||
capped["ships"] = _cap_startup_items(capped.get("ships"), 1500)
|
||||
capped["cctv"] = []
|
||||
capped["sigint"] = _cap_startup_items(capped.get("sigint"), 500)
|
||||
capped["trains"] = _cap_startup_items(capped.get("trains"), 100)
|
||||
capped["startup_payload"] = True
|
||||
return capped
|
||||
|
||||
|
||||
def _cap_fast_dashboard_payload(payload: dict) -> dict:
|
||||
capped = dict(payload)
|
||||
capped["commercial_flights"] = _downsample_points(capped.get("commercial_flights") or [], 6000)
|
||||
capped["private_flights"] = _downsample_points(capped.get("private_flights") or [], 1500)
|
||||
capped["private_jets"] = _downsample_points(capped.get("private_jets") or [], 1500)
|
||||
capped["ships"] = _downsample_points(capped.get("ships") or [], 8000)
|
||||
capped["cctv"] = _downsample_points(capped.get("cctv") or [], 2500)
|
||||
capped["sigint"] = _downsample_points(capped.get("sigint") or [], 5000)
|
||||
return capped
|
||||
|
||||
|
||||
@app.get("/api/live-data/fast")
|
||||
@limiter.limit("120/minute")
|
||||
async def live_data_fast(
|
||||
request: Request,
|
||||
# bbox params accepted for backward compat but no longer used for filtering —
|
||||
# all cached data is returned and the frontend culls off-screen entities via MapLibre.
|
||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
||||
initial: bool = Query(False, description="Return a capped startup payload for first paint"),
|
||||
):
|
||||
etag = _current_etag(prefix="fast|initial|" if initial else "fast|full|")
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
|
||||
from services.fetchers._store import (
|
||||
active_layers,
|
||||
get_latest_data_subset_refs,
|
||||
get_source_timestamps_snapshot,
|
||||
)
|
||||
|
||||
d = get_latest_data_subset_refs(
|
||||
"last_updated",
|
||||
"commercial_flights",
|
||||
"military_flights",
|
||||
"private_flights",
|
||||
"private_jets",
|
||||
"tracked_flights",
|
||||
"ships",
|
||||
"cctv",
|
||||
"uavs",
|
||||
"liveuamap",
|
||||
"gps_jamming",
|
||||
"satellites",
|
||||
"satellite_source",
|
||||
"sigint",
|
||||
"sigint_totals",
|
||||
"trains",
|
||||
)
|
||||
freshness = get_source_timestamps_snapshot()
|
||||
|
||||
ships_enabled = any(
|
||||
active_layers.get(key, True)
|
||||
for key in (
|
||||
"ships_military",
|
||||
"ships_cargo",
|
||||
"ships_civilian",
|
||||
"ships_passenger",
|
||||
"ships_tracked_yachts",
|
||||
)
|
||||
)
|
||||
cctv_total = len(d.get("cctv") or [])
|
||||
sigint_items = _filter_sigint_by_layers(d.get("sigint") or [], active_layers)
|
||||
sigint_totals = _sigint_totals_for_items(sigint_items)
|
||||
|
||||
payload = {
|
||||
"commercial_flights": (d.get("commercial_flights") or []) if active_layers.get("flights", True) else [],
|
||||
"military_flights": (d.get("military_flights") or []) if active_layers.get("military", True) else [],
|
||||
"private_flights": (d.get("private_flights") or []) if active_layers.get("private", True) else [],
|
||||
"private_jets": (d.get("private_jets") or []) if active_layers.get("jets", True) else [],
|
||||
"tracked_flights": (d.get("tracked_flights") or []) if active_layers.get("tracked", True) else [],
|
||||
"ships": (d.get("ships") or []) if ships_enabled else [],
|
||||
"cctv": (d.get("cctv") or []) if active_layers.get("cctv", True) else [],
|
||||
"uavs": (d.get("uavs") or []) if active_layers.get("military", True) else [],
|
||||
"liveuamap": (d.get("liveuamap") or []) if active_layers.get("global_incidents", True) else [],
|
||||
"gps_jamming": (d.get("gps_jamming") or []) if active_layers.get("gps_jamming", True) else [],
|
||||
"satellites": (d.get("satellites") or []) if active_layers.get("satellites", True) else [],
|
||||
"satellite_source": d.get("satellite_source", "none"),
|
||||
"sigint": sigint_items
|
||||
if (active_layers.get("sigint_meshtastic", True) or active_layers.get("sigint_aprs", True))
|
||||
else [],
|
||||
"sigint_totals": sigint_totals,
|
||||
"cctv_total": cctv_total,
|
||||
"trains": (d.get("trains") or []) if active_layers.get("trains", True) else [],
|
||||
"freshness": freshness,
|
||||
}
|
||||
if initial:
|
||||
payload = _cap_fast_startup_payload(payload)
|
||||
else:
|
||||
payload = _cap_fast_dashboard_payload(payload)
|
||||
return Response(
|
||||
content=orjson.dumps(_sanitize_payload(payload)),
|
||||
media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/live-data/slow")
|
||||
@limiter.limit("60/minute")
|
||||
async def live_data_slow(
|
||||
request: Request,
|
||||
# bbox params accepted for backward compat but no longer used for filtering.
|
||||
s: float = Query(None, description="South bound (ignored)", ge=-90, le=90),
|
||||
w: float = Query(None, description="West bound (ignored)", ge=-180, le=180),
|
||||
n: float = Query(None, description="North bound (ignored)", ge=-90, le=90),
|
||||
e: float = Query(None, description="East bound (ignored)", ge=-180, le=180),
|
||||
):
|
||||
etag = _current_etag(prefix="slow|full|")
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
|
||||
from services.fetchers._store import (
|
||||
active_layers,
|
||||
get_latest_data_subset_refs,
|
||||
get_source_timestamps_snapshot,
|
||||
)
|
||||
|
||||
d = get_latest_data_subset_refs(
|
||||
"last_updated",
|
||||
"news",
|
||||
"stocks",
|
||||
"financial_source",
|
||||
"oil",
|
||||
"weather",
|
||||
"traffic",
|
||||
"earthquakes",
|
||||
"frontlines",
|
||||
"gdelt",
|
||||
"airports",
|
||||
"kiwisdr",
|
||||
"satnogs_stations",
|
||||
"satnogs_observations",
|
||||
"tinygs_satellites",
|
||||
"space_weather",
|
||||
"internet_outages",
|
||||
"firms_fires",
|
||||
"datacenters",
|
||||
"military_bases",
|
||||
"power_plants",
|
||||
"viirs_change_nodes",
|
||||
"scanners",
|
||||
"weather_alerts",
|
||||
"ukraine_alerts",
|
||||
"air_quality",
|
||||
"volcanoes",
|
||||
"fishing_activity",
|
||||
"psk_reporter",
|
||||
"crowdthreat",
|
||||
"correlations",
|
||||
"threat_level",
|
||||
"trending_markets",
|
||||
"fimi",
|
||||
"uap_sightings",
|
||||
"wastewater",
|
||||
"sar_scenes",
|
||||
"sar_anomalies",
|
||||
"sar_aoi_coverage",
|
||||
)
|
||||
freshness = get_source_timestamps_snapshot()
|
||||
|
||||
payload = {
|
||||
"last_updated": d.get("last_updated"),
|
||||
"threat_level": d.get("threat_level"),
|
||||
"trending_markets": d.get("trending_markets", []),
|
||||
"fimi": d.get("fimi", {}),
|
||||
"news": d.get("news", []),
|
||||
"stocks": d.get("stocks", {}),
|
||||
"financial_source": d.get("financial_source", ""),
|
||||
"oil": d.get("oil", {}),
|
||||
"weather": d.get("weather"),
|
||||
"traffic": d.get("traffic", []),
|
||||
"earthquakes": (d.get("earthquakes") or []) if active_layers.get("earthquakes", True) else [],
|
||||
"frontlines": d.get("frontlines") if active_layers.get("ukraine_frontline", True) else None,
|
||||
"gdelt": (d.get("gdelt") or []) if active_layers.get("global_incidents", True) else [],
|
||||
"airports": d.get("airports") or [],
|
||||
"kiwisdr": (d.get("kiwisdr") or []) if active_layers.get("kiwisdr", True) else [],
|
||||
"satnogs_stations": (d.get("satnogs_stations") or []) if active_layers.get("satnogs", True) else [],
|
||||
"satnogs_total": len(d.get("satnogs_stations") or []),
|
||||
"satnogs_observations": (d.get("satnogs_observations") or []) if active_layers.get("satnogs", True) else [],
|
||||
"tinygs_satellites": (d.get("tinygs_satellites") or []) if active_layers.get("tinygs", True) else [],
|
||||
"tinygs_total": len(d.get("tinygs_satellites") or []),
|
||||
"psk_reporter": (d.get("psk_reporter") or []) if active_layers.get("psk_reporter", True) else [],
|
||||
"space_weather": d.get("space_weather"),
|
||||
"internet_outages": (d.get("internet_outages") or []) if active_layers.get("internet_outages", True) else [],
|
||||
"firms_fires": (d.get("firms_fires") or []) if active_layers.get("firms", True) else [],
|
||||
"datacenters": (d.get("datacenters") or []) if active_layers.get("datacenters", True) else [],
|
||||
"military_bases": (d.get("military_bases") or []) if active_layers.get("military_bases", True) else [],
|
||||
"power_plants": (d.get("power_plants") or []) if active_layers.get("power_plants", True) else [],
|
||||
"viirs_change_nodes": (d.get("viirs_change_nodes") or []) if active_layers.get("viirs_nightlights", True) else [],
|
||||
"scanners": (d.get("scanners") or []) if active_layers.get("scanners", True) else [],
|
||||
"weather_alerts": d.get("weather_alerts", []) if active_layers.get("weather_alerts", True) else [],
|
||||
"ukraine_alerts": d.get("ukraine_alerts", []) if active_layers.get("ukraine_alerts", True) else [],
|
||||
"air_quality": (d.get("air_quality") or []) if active_layers.get("air_quality", True) else [],
|
||||
"volcanoes": (d.get("volcanoes") or []) if active_layers.get("volcanoes", True) else [],
|
||||
"fishing_activity": (d.get("fishing_activity") or []) if active_layers.get("fishing_activity", True) else [],
|
||||
"crowdthreat": (d.get("crowdthreat") or []) if active_layers.get("crowdthreat", True) else [],
|
||||
"correlations": (d.get("correlations") or []) if active_layers.get("correlations", True) else [],
|
||||
"uap_sightings": (d.get("uap_sightings") or []) if active_layers.get("uap_sightings", True) else [],
|
||||
"wastewater": (d.get("wastewater") or []) if active_layers.get("wastewater", True) else [],
|
||||
"sar_scenes": (d.get("sar_scenes") or []) if active_layers.get("sar", True) else [],
|
||||
"sar_anomalies": (d.get("sar_anomalies") or []) if active_layers.get("sar", True) else [],
|
||||
"sar_aoi_coverage": (d.get("sar_aoi_coverage") or []) if active_layers.get("sar", True) else [],
|
||||
"freshness": freshness,
|
||||
}
|
||||
return Response(
|
||||
content=orjson.dumps(
|
||||
_sanitize_payload(payload),
|
||||
default=str,
|
||||
option=orjson.OPT_NON_STR_KEYS,
|
||||
),
|
||||
media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/oracle/region-intel")
|
||||
@limiter.limit("30/minute")
|
||||
async def oracle_region_intel(
|
||||
@@ -8265,6 +7858,8 @@ _CCTV_PROXY_ALLOWED_HOSTS = {
|
||||
"www.tripcheck.com",
|
||||
"infocar.dgt.es", # Spain DGT
|
||||
"informo.madrid.es", # Madrid
|
||||
"webcams2.asfinag.at", # Austria ASFINAG motorway cameras
|
||||
"odo.asfinag.at", # ASFINAG catalog API host
|
||||
"www.windy.com",
|
||||
"imgproxy.windy.com", # Windy preview image CDN
|
||||
"www.lakecountypassage.com", # Illinois Lake County PASSAGE snapshots
|
||||
@@ -8273,6 +7868,14 @@ _CCTV_PROXY_ALLOWED_HOSTS = {
|
||||
"www.nps.gov", # WSDOT-linked Mount Rainier camera
|
||||
"home.lewiscounty.com", # WSDOT partner public camera
|
||||
"www.seattle.gov", # Seattle traffic camera media linked from WSDOT
|
||||
"511on.ca", # Ontario 511 cameras
|
||||
"511.alberta.ca", # Alberta 511 cameras
|
||||
"fl511.com", # Florida 511 cameras
|
||||
"www.fl511.com",
|
||||
"webcams.transport.nsw.gov.au", # NSW Live Traffic camera snapshots
|
||||
"www.livetraffic.com",
|
||||
"livetraffic.com",
|
||||
"opendata.ndw.nu", # Netherlands RWS legacy open-data host
|
||||
}
|
||||
|
||||
|
||||
@@ -8368,7 +7971,7 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
cache_seconds=15,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "http://navigator-c2c.dot.ga.gov/",
|
||||
"Referer": "https://navigator-c2c.dot.ga.gov/",
|
||||
},
|
||||
)
|
||||
if host == "511ga.org":
|
||||
@@ -8388,7 +7991,7 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
cache_seconds=10,
|
||||
headers={
|
||||
"Accept": "application/vnd.apple.mpegurl,application/x-mpegURL,video/*,*/*;q=0.8",
|
||||
"Referer": "http://navigator-c2c.dot.ga.gov/",
|
||||
"Referer": "https://navigator-c2c.dot.ga.gov/",
|
||||
},
|
||||
)
|
||||
if host in {"gettingaroundillinois.com", "cctv.travelmidwest.com"}:
|
||||
@@ -8470,6 +8073,16 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
"Referer": "https://informo.madrid.es/",
|
||||
},
|
||||
)
|
||||
if host in {"webcams2.asfinag.at", "odo.asfinag.at"}:
|
||||
return _CCTVProxyProfile(
|
||||
name="asfinag-austria",
|
||||
timeout=(5.0, 15.0),
|
||||
cache_seconds=60,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.asfinag.at/",
|
||||
},
|
||||
)
|
||||
if host in {"www.windy.com", "imgproxy.windy.com"}:
|
||||
return _CCTVProxyProfile(
|
||||
name="windy-webcams",
|
||||
@@ -8480,6 +8093,56 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
"Referer": "https://www.windy.com/",
|
||||
},
|
||||
)
|
||||
if host == "511on.ca":
|
||||
return _CCTVProxyProfile(
|
||||
name="ontario-511",
|
||||
timeout=(5.0, 15.0),
|
||||
cache_seconds=30,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://511on.ca/",
|
||||
},
|
||||
)
|
||||
if host == "511.alberta.ca":
|
||||
return _CCTVProxyProfile(
|
||||
name="alberta-511",
|
||||
timeout=(5.0, 15.0),
|
||||
cache_seconds=30,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://511.alberta.ca/",
|
||||
},
|
||||
)
|
||||
if host in {"fl511.com", "www.fl511.com"}:
|
||||
return _CCTVProxyProfile(
|
||||
name="florida-511",
|
||||
timeout=(5.0, 15.0),
|
||||
cache_seconds=30,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://fl511.com/",
|
||||
},
|
||||
)
|
||||
if host == "webcams.transport.nsw.gov.au":
|
||||
return _CCTVProxyProfile(
|
||||
name="nsw-live-traffic",
|
||||
timeout=(5.0, 12.0),
|
||||
cache_seconds=60,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.livetraffic.com/",
|
||||
},
|
||||
)
|
||||
if host in {"opendata.ndw.nu", "www.ndw.nu"}:
|
||||
return _CCTVProxyProfile(
|
||||
name="ndw-netherlands",
|
||||
timeout=(5.0, 12.0),
|
||||
cache_seconds=120,
|
||||
headers={
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.ndw.nu/",
|
||||
},
|
||||
)
|
||||
if host in {
|
||||
"webcam.forkswa.com",
|
||||
"webcam.sunmountainlodge.com",
|
||||
@@ -12051,5 +11714,36 @@ async def system_update(request: Request):
|
||||
return result
|
||||
|
||||
|
||||
def _dev_uvicorn_bind_host() -> str:
|
||||
"""Default loopback for `python main.py` so LAN clients cannot reach a dev server (#375).
|
||||
|
||||
Docker compose still publishes 127.0.0.1:8000; the dashboard stays on :3000.
|
||||
Set SHADOWBROKER_DEV_BIND_ALL=true only when you intentionally need LAN access
|
||||
(and use ADMIN_KEY for remote callers).
|
||||
"""
|
||||
if str(os.environ.get("SHADOWBROKER_DEV_BIND_ALL", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}:
|
||||
return "0.0.0.0"
|
||||
return "127.0.0.1"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True, timeout_keep_alive=120)
|
||||
_host = _dev_uvicorn_bind_host()
|
||||
_port = int(os.environ.get("BACKEND_PORT", "8000"))
|
||||
if _host == "127.0.0.1":
|
||||
logger.info(
|
||||
"Dev server binding %s:%s (loopback). Set SHADOWBROKER_DEV_BIND_ALL=true for 0.0.0.0.",
|
||||
_host,
|
||||
_port,
|
||||
)
|
||||
uvicorn.run(
|
||||
"main:app",
|
||||
host=_host,
|
||||
port=_port,
|
||||
reload=True,
|
||||
timeout_keep_alive=120,
|
||||
)
|
||||
|
||||
+18
-6
@@ -7,15 +7,15 @@ py-modules = []
|
||||
|
||||
[project]
|
||||
name = "backend"
|
||||
version = "0.9.81"
|
||||
version = "0.9.82"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"apscheduler==3.10.3",
|
||||
"beautifulsoup4>=4.9.0",
|
||||
"cachetools==5.5.2",
|
||||
"cryptography>=41.0.0",
|
||||
"cryptography>=46.0.7",
|
||||
"defusedxml>=0.7.1",
|
||||
"fastapi==0.115.12",
|
||||
"fastapi==0.136.3",
|
||||
"feedparser==6.0.10",
|
||||
"httpx==0.28.1",
|
||||
"playwright==1.59.0",
|
||||
@@ -24,7 +24,7 @@ dependencies = [
|
||||
"pydantic-settings==2.8.1",
|
||||
"pystac-client==0.8.6",
|
||||
"python-dotenv==1.2.2",
|
||||
"requests==2.31.0",
|
||||
"requests==2.33.0",
|
||||
"PySocks==1.7.1",
|
||||
"reverse-geocoder==1.5.1",
|
||||
"sgp4==2.25",
|
||||
@@ -33,17 +33,29 @@ dependencies = [
|
||||
"paho-mqtt>=1.6.0,<2.0.0",
|
||||
"PyNaCl>=1.5.0",
|
||||
"slowapi==0.1.9",
|
||||
"starlette==1.0.1",
|
||||
"vaderSentiment>=3.3.0",
|
||||
"uvicorn==0.34.0",
|
||||
"yfinance==1.3.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
road-corridor = [
|
||||
"geopandas>=1.0.0",
|
||||
"imageio>=2.34.0",
|
||||
"osmnx>=2.0.0",
|
||||
"rasterio>=1.4.0",
|
||||
"scikit-learn>=1.5.0",
|
||||
"sentinelhub>=3.10.0",
|
||||
"shapely>=2.0.0",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
dev = ["pytest>=8.3.4", "pytest-asyncio==0.25.0", "ruff>=0.9.0", "black>=24.0.0"]
|
||||
dev = ["pytest>=9.0.3", "pytest-asyncio>=1.4.0", "ruff>=0.9.0", "black>=24.0.0"]
|
||||
|
||||
[tool.ruff.lint]
|
||||
# The current backend carries historical style debt in large legacy modules.
|
||||
# Keep CI focused on actionable correctness checks for the v0.9.81 release.
|
||||
# Keep CI focused on actionable correctness checks for the v0.9.82 release.
|
||||
ignore = ["E401", "E402", "E701", "E731", "E741", "F401", "F402", "F541", "F811", "F841"]
|
||||
|
||||
[tool.black]
|
||||
|
||||
@@ -1590,7 +1590,7 @@ async def agent_tool_manifest(request: Request):
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"version": "0.9.81",
|
||||
"version": "0.9.82",
|
||||
"access_tier": access_tier,
|
||||
"available_commands": available_commands,
|
||||
"transport": {
|
||||
@@ -1705,11 +1705,12 @@ async def agent_tool_manifest(request: Request):
|
||||
{
|
||||
"name": "search_news",
|
||||
"type": "read",
|
||||
"description": "Search news and event layers server-side by keyword. Includes news, GDELT, CrowdThreat, and major incident/event feeds without pulling the full slow telemetry feed.",
|
||||
"description": "Search news and event layers server-side by keyword. Includes news, GDELT, CrowdThreat, Telegram OSINT, and major incident/event feeds without pulling the full slow telemetry feed.",
|
||||
"parameters": {
|
||||
"query": {"type": "string", "required": True, "description": "Keyword or phrase to search for"},
|
||||
"limit": {"type": "integer", "required": False, "description": "Max results (default 10, max 50)"},
|
||||
"include_gdelt": {"type": "boolean", "required": False, "description": "Include GDELT matches (default true)"},
|
||||
"include_telegram": {"type": "boolean", "required": False, "description": "Include Telegram OSINT channel posts (default true)"},
|
||||
"compact": {"type": "boolean", "required": False, "description": "If true, strips empty/None fields from each result and rounds lat/lng to 3 decimals. Response includes format: 'compressed_v1'."},
|
||||
},
|
||||
"returns": "{results: [{source_layer, title, summary, source, link, lat, lng, risk_score}], version: int, truncated: bool}",
|
||||
@@ -1743,6 +1744,55 @@ async def agent_tool_manifest(request: Request):
|
||||
},
|
||||
"returns": "{center, radius_km, nearby, topic_news, context_layers}",
|
||||
},
|
||||
{
|
||||
"name": "osint_lookup",
|
||||
"type": "read",
|
||||
"description": "Run a passive OSINT recon lookup server-side (same backends as the Recon panel). SSRF-guarded outbound proxies for IP geolocation, DNS, WHOIS, certs, BGP/ASN, sanctions, CVE, MAC vendor, GitHub profile, breach checks, and threat feeds.",
|
||||
"parameters": {
|
||||
"tool": {"type": "string", "required": True, "description": "Lookup type: ip, dns, whois, certs, threats, bgp, sanctions, cve, mac, github, leaks, sweep_init"},
|
||||
"ip": {"type": "string", "required": False, "description": "IPv4/IPv6 for ip or sweep_init"},
|
||||
"domain": {"type": "string", "required": False, "description": "Domain for dns, whois, certs"},
|
||||
"query": {"type": "string", "required": False, "description": "Generic query (BGP ASN, sanctions name, optional threats filter)"},
|
||||
"cve": {"type": "string", "required": False, "description": "CVE id for cve lookup"},
|
||||
"mac": {"type": "string", "required": False, "description": "MAC address for mac lookup"},
|
||||
"username": {"type": "string", "required": False, "description": "GitHub username"},
|
||||
"email": {"type": "string", "required": False, "description": "Email for breach/leak lookup"},
|
||||
"schema": {"type": "string", "required": False, "description": "Sanctions schema filter: Person, Organization, Company, Vessel, Airplane, LegalEntity"},
|
||||
"limit": {"type": "integer", "required": False, "description": "Sanctions result cap (default 25, max 100)"},
|
||||
"cidr": {"type": "integer", "required": False, "description": "CIDR mask for sweep_init (24-32, default 24)"},
|
||||
},
|
||||
"returns": "Tool-specific JSON (geo, DNS records, WHOIS, sanctions hits, CVE details, etc.)",
|
||||
},
|
||||
{
|
||||
"name": "osint_tools",
|
||||
"type": "read",
|
||||
"description": "List available OSINT recon tools, entity-expand types, and sanctions schemas.",
|
||||
"parameters": {},
|
||||
"returns": "{tools: [...], entity_types: [...], sanctions_schemas: [...], notes: {...}}",
|
||||
},
|
||||
{
|
||||
"name": "entity_expand",
|
||||
"type": "read",
|
||||
"description": "Expand an entity relationship graph around an aircraft, vessel, IP, company, person, or country. Same backend as /api/entity/expand.",
|
||||
"parameters": {
|
||||
"type": {"type": "string", "required": True, "description": "Entity type: aircraft, vessel, company, person, ip, country"},
|
||||
"id": {"type": "string", "required": True, "description": "Entity identifier (tail number, MMSI, IP, company name, etc.)"},
|
||||
"registration": {"type": "string", "required": False, "description": "Aircraft registration hint"},
|
||||
"model": {"type": "string", "required": False, "description": "Aircraft model hint"},
|
||||
"icao24": {"type": "string", "required": False, "description": "ICAO24 hex for aircraft"},
|
||||
},
|
||||
"returns": "{nodes: [...], links: [...]}",
|
||||
},
|
||||
{
|
||||
"name": "osint_sweep",
|
||||
"type": "write",
|
||||
"description": "Active subnet device discovery via Shodan InternetDB (ports, vulns, hostnames). Requires full OpenClaw access tier. Private/reserved IPs blocked.",
|
||||
"parameters": {
|
||||
"ip": {"type": "string", "required": True, "description": "Public IPv4 anchor for the sweep"},
|
||||
"cidr": {"type": "integer", "required": False, "description": "Subnet size /24-/32 (default 24)"},
|
||||
},
|
||||
"returns": "{center, target_ip, cidr, subnet, devices, summary, sweep_time_ms}",
|
||||
},
|
||||
{
|
||||
"name": "what_changed",
|
||||
"type": "read",
|
||||
@@ -2194,6 +2244,11 @@ async def agent_tool_manifest(request: Request):
|
||||
"Prefer compact lookups first: search_telemetry, find_flights, find_ships, search_news, entities_near, get_layer_slice. Use get_telemetry/get_slow_telemetry/get_report only when focused commands are insufficient.",
|
||||
"ShadowBroker does expose UAP sightings, wastewater, and tracked_flights/VIP aircraft when those layers are populated. Verify with get_summary or get_layer_slice before claiming a layer is absent.",
|
||||
"ShadowBroker also exposes fishing_activity, which is the fishing-vessel activity layer backed by Global Fishing Watch data when GFW_API_TOKEN is configured. Do not confuse it with the AIS ships layer.",
|
||||
"telegram_osint, malware_threats, cyber_threats, and scm_suppliers are live map layers. Use get_summary or get_layer_slice(['telegram_osint']) before claiming they are absent. Aliases: telegram, malware/botnet, cyber/cisa/kev, scm/suppliers.",
|
||||
"search_telemetry and search_news both index Telegram OSINT posts. For malware C2, botnet IPs, CISA KEV CVEs, or semiconductor suppliers, use search_telemetry or get_layer_slice on the matching layer.",
|
||||
"The Recon toolkit is available via osint_lookup: IP geolocation, DNS, WHOIS, certs, BGP, sanctions, CVE, MAC vendor, GitHub, breach checks, threat feeds. Call osint_tools first to list supported tools.",
|
||||
"entity_expand builds relationship graphs for aircraft, vessels, IPs, companies, people, and countries — use after resolving an entity from telemetry or osint_lookup.",
|
||||
"osint_sweep runs active subnet discovery (Shodan InternetDB) and requires full OpenClaw access tier. Use osint_lookup tool=sweep_init for passive geolocation context only.",
|
||||
"Use search_telemetry as the Google-style entry point whenever the user gives you a person, place, company, topic, owner, nickname, or natural-language phrase and you do not already know the source layer.",
|
||||
"Example: for 'Where is Jerry Jones yacht?' search 'Jerry Jones' across all telemetry first, identify the ship match, then refine with find_ships or raw layer context only if needed.",
|
||||
"For fuzzy natural-language lookups like 'Patriots jet' or 'Jerry Jones yacht', use search_telemetry first and inspect the ranked candidate list before making a hard claim.",
|
||||
@@ -2226,7 +2281,7 @@ async def api_capabilities(request: Request):
|
||||
access_tier = str(get_settings().OPENCLAW_ACCESS_TIER or "restricted").strip().lower()
|
||||
return {
|
||||
"ok": True,
|
||||
"version": "0.9.81",
|
||||
"version": "0.9.82",
|
||||
"auth": {
|
||||
"method": "HMAC-SHA256",
|
||||
"headers": ["X-SB-Timestamp", "X-SB-Nonce", "X-SB-Signature"],
|
||||
@@ -2354,13 +2409,29 @@ async def api_capabilities(request: Request):
|
||||
"description": "Universal compact search across telemetry when the entity type or source layer is not obvious.",
|
||||
},
|
||||
"search_news": {
|
||||
"args": {"query": "str", "limit": "int (default 10)", "include_gdelt": "bool (default true)"},
|
||||
"description": "Search news and event layers by keyword without pulling the whole slow feed.",
|
||||
"args": {"query": "str", "limit": "int (default 10)", "include_gdelt": "bool (default true)", "include_telegram": "bool (default true)"},
|
||||
"description": "Search news and event layers by keyword without pulling the whole slow feed. Includes Telegram OSINT when include_telegram is true.",
|
||||
},
|
||||
"entities_near": {
|
||||
"args": {"lat": "float", "lng": "float", "radius_km": "float (default 50)", "entity_types": "list[str] (optional)", "limit": "int (default 25)"},
|
||||
"description": "Compact proximity search around a point across selected layers.",
|
||||
},
|
||||
"osint_lookup": {
|
||||
"args": {"tool": "str (ip|dns|whois|certs|threats|bgp|sanctions|cve|mac|github|leaks|sweep_init)", "...": "tool-specific params"},
|
||||
"description": "Passive OSINT recon lookup — same backends as the Recon panel.",
|
||||
},
|
||||
"osint_tools": {
|
||||
"args": {},
|
||||
"description": "List available recon tools and entity-expand types.",
|
||||
},
|
||||
"entity_expand": {
|
||||
"args": {"type": "str", "id": "str", "registration": "str (optional)", "icao24": "str (optional)"},
|
||||
"description": "Entity relationship graph expansion.",
|
||||
},
|
||||
"osint_sweep": {
|
||||
"args": {"ip": "str", "cidr": "int (default 24)"},
|
||||
"description": "Active subnet scan — requires full access tier.",
|
||||
},
|
||||
"brief_area": {
|
||||
"args": {"lat": "float", "lng": "float", "radius_km": "float (default 50)", "entity_types": "list[str] (optional)", "query": "str (optional)", "limit": "int (default 25)", "context_limit": "int (default 10)"},
|
||||
"description": "One compact area brief: nearby aircraft/ships/entities, optional topic news, and selected context layers.",
|
||||
|
||||
+36
-2
@@ -47,6 +47,8 @@ _CCTV_PROXY_ALLOWED_HOSTS = {
|
||||
"www.tripcheck.com",
|
||||
"infocar.dgt.es",
|
||||
"informo.madrid.es",
|
||||
"webcams2.asfinag.at",
|
||||
"odo.asfinag.at",
|
||||
"www.windy.com",
|
||||
"imgproxy.windy.com",
|
||||
"www.lakecountypassage.com",
|
||||
@@ -55,6 +57,14 @@ _CCTV_PROXY_ALLOWED_HOSTS = {
|
||||
"www.nps.gov",
|
||||
"home.lewiscounty.com",
|
||||
"www.seattle.gov",
|
||||
"511on.ca",
|
||||
"511.alberta.ca",
|
||||
"fl511.com",
|
||||
"www.fl511.com",
|
||||
"webcams.transport.nsw.gov.au",
|
||||
"www.livetraffic.com",
|
||||
"livetraffic.com",
|
||||
"opendata.ndw.nu",
|
||||
}
|
||||
|
||||
|
||||
@@ -120,7 +130,7 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
read_timeout = 18.0 if "/snapshots/" in path else 12.0
|
||||
return _CCTVProxyProfile(name="gdot-snapshot", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, read_timeout), cache_seconds=15,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "http://navigator-c2c.dot.ga.gov/"})
|
||||
"Referer": "https://navigator-c2c.dot.ga.gov/"})
|
||||
if host == "511ga.org":
|
||||
return _CCTVProxyProfile(name="gdot-511ga-image", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=15,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
@@ -128,7 +138,7 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
if host.startswith("vss") and host.endswith("dot.ga.gov"):
|
||||
return _CCTVProxyProfile(name="gdot-hls", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 20.0), cache_seconds=10,
|
||||
headers={"Accept": "application/vnd.apple.mpegurl,application/x-mpegURL,video/*,*/*;q=0.8",
|
||||
"Referer": "http://navigator-c2c.dot.ga.gov/"})
|
||||
"Referer": "https://navigator-c2c.dot.ga.gov/"})
|
||||
if host in {"gettingaroundillinois.com", "cctv.travelmidwest.com"}:
|
||||
return _CCTVProxyProfile(name="illinois-dot", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8"})
|
||||
@@ -156,10 +166,34 @@ def _cctv_proxy_profile_for_url(target_url: str) -> _CCTVProxyProfile:
|
||||
return _CCTVProxyProfile(name="madrid-city", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://informo.madrid.es/"})
|
||||
if host in {"webcams2.asfinag.at", "odo.asfinag.at"}:
|
||||
return _CCTVProxyProfile(name="asfinag-austria", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 15.0), cache_seconds=60,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.asfinag.at/"})
|
||||
if host in {"www.windy.com", "imgproxy.windy.com"}:
|
||||
return _CCTVProxyProfile(name="windy-webcams", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=60,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.windy.com/"})
|
||||
if host == "511on.ca":
|
||||
return _CCTVProxyProfile(name="ontario-511", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 15.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://511on.ca/"})
|
||||
if host == "511.alberta.ca":
|
||||
return _CCTVProxyProfile(name="alberta-511", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 15.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://511.alberta.ca/"})
|
||||
if host in {"fl511.com", "www.fl511.com"}:
|
||||
return _CCTVProxyProfile(name="florida-511", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 15.0), cache_seconds=30,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://fl511.com/"})
|
||||
if host == "webcams.transport.nsw.gov.au":
|
||||
return _CCTVProxyProfile(name="nsw-live-traffic", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=60,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.livetraffic.com/"})
|
||||
if host in {"opendata.ndw.nu", "www.ndw.nu"}:
|
||||
return _CCTVProxyProfile(name="ndw-netherlands", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 12.0), cache_seconds=120,
|
||||
headers={"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Referer": "https://www.ndw.nu/"})
|
||||
return _CCTVProxyProfile(name="generic-cctv", timeout=(_CCTV_PROXY_CONNECT_TIMEOUT_S, 8.0), cache_seconds=30,
|
||||
headers={"Accept": "*/*"})
|
||||
|
||||
|
||||
+167
-7
@@ -1,6 +1,7 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import threading
|
||||
from typing import Any
|
||||
from fastapi import APIRouter, Request, Response, Query, Depends
|
||||
@@ -8,7 +9,7 @@ from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
from limiter import limiter
|
||||
from auth import require_admin, require_local_operator
|
||||
from services.data_fetcher import get_latest_data, update_all_data
|
||||
from services.data_fetcher import update_all_data
|
||||
import orjson
|
||||
import json as json_mod
|
||||
|
||||
@@ -30,6 +31,14 @@ class LayerUpdate(BaseModel):
|
||||
layers: dict[str, bool]
|
||||
|
||||
|
||||
class LiveUamapOptInUpdate(BaseModel):
|
||||
opted_in: bool
|
||||
|
||||
|
||||
class PredictionMarketsOptInUpdate(BaseModel):
|
||||
opted_in: bool
|
||||
|
||||
|
||||
_LAST_VIEWPORT_UPDATE: tuple | None = None
|
||||
_LAST_VIEWPORT_UPDATE_TS = 0.0
|
||||
_VIEWPORT_UPDATE_LOCK = threading.Lock()
|
||||
@@ -202,6 +211,15 @@ def _sanitize_payload(value):
|
||||
return value
|
||||
|
||||
|
||||
def _live_data_json_bytes(payload: dict) -> bytes:
|
||||
"""Serialize dashboard payloads with the same defensive orjson options everywhere."""
|
||||
return orjson.dumps(
|
||||
_sanitize_payload(payload),
|
||||
default=str,
|
||||
option=orjson.OPT_NON_STR_KEYS,
|
||||
)
|
||||
|
||||
|
||||
def _bbox_filter(items: list, s: float, w: float, n: float, e: float,
|
||||
lat_key: str = "lat", lng_key: str = "lng") -> list:
|
||||
pad_lat = (n - s) * 0.2
|
||||
@@ -386,6 +404,95 @@ async def update_viewport(vp: ViewportUpdate, request: Request): # noqa: ARG001
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@router.get("/api/liveuamap/scraper-status", dependencies=[Depends(require_local_operator)])
|
||||
async def api_liveuamap_scraper_status():
|
||||
"""Whether LiveUAMap Playwright may run (Windows needs UI opt-in unless env forces)."""
|
||||
from services.liveuamap_settings import liveuamap_scraper_status
|
||||
|
||||
return liveuamap_scraper_status()
|
||||
|
||||
|
||||
@router.post("/api/liveuamap/scraper-opt-in", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def api_liveuamap_scraper_opt_in(body: LiveUamapOptInUpdate, request: Request):
|
||||
"""Persist operator consent for LiveUAMap scraper (#348)."""
|
||||
from services.liveuamap_settings import liveuamap_scraper_status, set_liveuamap_ui_opt_in
|
||||
|
||||
set_liveuamap_ui_opt_in(body.opted_in)
|
||||
if body.opted_in:
|
||||
from services.fetchers._store import is_any_active
|
||||
|
||||
if is_any_active("global_incidents"):
|
||||
threading.Thread(target=_run_liveuamap_refresh, daemon=True).start()
|
||||
return liveuamap_scraper_status()
|
||||
|
||||
|
||||
def _run_liveuamap_refresh() -> None:
|
||||
try:
|
||||
from services.fetchers.geo import update_liveuamap
|
||||
|
||||
update_liveuamap()
|
||||
except Exception as e:
|
||||
logger.warning("LiveUAMap refresh after opt-in failed: %s", e)
|
||||
|
||||
|
||||
@router.get("/api/prediction-markets/status", dependencies=[Depends(require_local_operator)])
|
||||
async def api_prediction_markets_status():
|
||||
"""Whether Polymarket/Kalshi fetches and news market correlation are enabled."""
|
||||
from services.prediction_markets_settings import prediction_markets_status
|
||||
|
||||
return prediction_markets_status()
|
||||
|
||||
|
||||
@router.post("/api/prediction-markets/opt-in", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("10/minute")
|
||||
async def api_prediction_markets_opt_in(body: PredictionMarketsOptInUpdate, request: Request):
|
||||
"""Enable or disable prediction market fetches + intercept story correlation."""
|
||||
from services.config import get_settings
|
||||
from services.prediction_markets_settings import (
|
||||
prediction_markets_status,
|
||||
set_prediction_markets_ui_opt_in,
|
||||
)
|
||||
from routers.ai_intel import _write_env_value
|
||||
|
||||
set_prediction_markets_ui_opt_in(body.opted_in)
|
||||
_write_env_value("PREDICTION_MARKETS_ENABLED", "true" if body.opted_in else "false")
|
||||
os.environ["PREDICTION_MARKETS_ENABLED"] = "true" if body.opted_in else "false"
|
||||
get_settings.cache_clear()
|
||||
|
||||
if body.opted_in:
|
||||
threading.Thread(target=_run_prediction_markets_refresh, daemon=True).start()
|
||||
else:
|
||||
threading.Thread(target=_run_prediction_markets_disable, daemon=True).start()
|
||||
|
||||
return prediction_markets_status()
|
||||
|
||||
|
||||
def _run_prediction_markets_refresh() -> None:
|
||||
try:
|
||||
from services.fetchers.prediction_markets import fetch_prediction_markets
|
||||
from services.fetchers.news import fetch_news
|
||||
|
||||
fetch_prediction_markets()
|
||||
fetch_news()
|
||||
except Exception as e:
|
||||
logger.warning("Prediction markets refresh after opt-in failed: %s", e)
|
||||
|
||||
|
||||
def _run_prediction_markets_disable() -> None:
|
||||
try:
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, latest_data
|
||||
from services.fetchers.news import fetch_news
|
||||
|
||||
with _data_lock:
|
||||
latest_data["prediction_markets"] = []
|
||||
latest_data["trending_markets"] = []
|
||||
_mark_fresh("prediction_markets")
|
||||
fetch_news()
|
||||
except Exception as e:
|
||||
logger.warning("Prediction markets disable cleanup failed: %s", e)
|
||||
|
||||
|
||||
@router.post("/api/layers", dependencies=[Depends(require_local_operator)])
|
||||
@limiter.limit("30/minute")
|
||||
async def update_layers(update: LayerUpdate, request: Request):
|
||||
@@ -395,6 +502,8 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
old_mesh = is_any_active("sigint_meshtastic")
|
||||
old_aprs = is_any_active("sigint_aprs")
|
||||
old_viirs = is_any_active("viirs_nightlights")
|
||||
old_datacenters = is_any_active("datacenters")
|
||||
old_fishing = is_any_active("fishing_activity")
|
||||
changed = False
|
||||
for key, value in update.layers.items():
|
||||
if key in active_layers:
|
||||
@@ -407,6 +516,8 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
new_mesh = is_any_active("sigint_meshtastic")
|
||||
new_aprs = is_any_active("sigint_aprs")
|
||||
new_viirs = is_any_active("viirs_nightlights")
|
||||
new_datacenters = is_any_active("datacenters")
|
||||
new_fishing = is_any_active("fishing_activity")
|
||||
if old_ships and not new_ships:
|
||||
from services.ais_stream import stop_ais_stream
|
||||
stop_ais_stream()
|
||||
@@ -450,13 +561,33 @@ async def update_layers(update: LayerUpdate, request: Request):
|
||||
if not old_viirs and new_viirs:
|
||||
_queue_viirs_change_refresh()
|
||||
logger.info("VIIRS change refresh queued (layer enabled)")
|
||||
if not old_datacenters and new_datacenters:
|
||||
from services.fetchers.infrastructure import fetch_datacenters
|
||||
|
||||
fetch_datacenters()
|
||||
logger.info("Datacenters loaded (layer enabled)")
|
||||
if not old_fishing and new_fishing:
|
||||
from services.fetchers.geo import fetch_fishing_activity
|
||||
|
||||
fetch_fishing_activity()
|
||||
logger.info("Fishing activity refresh queued (layer enabled)")
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@router.get("/api/live-data")
|
||||
@limiter.limit("120/minute")
|
||||
async def live_data(request: Request):
|
||||
return get_latest_data()
|
||||
etag = _current_etag(prefix="live|full|")
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return Response(status_code=304, headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
from services.fetchers._store import get_latest_data_deepcopy_snapshot
|
||||
|
||||
payload = get_latest_data_deepcopy_snapshot()
|
||||
return Response(
|
||||
content=_live_data_json_bytes(payload),
|
||||
media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/bootstrap/critical")
|
||||
@@ -551,7 +682,7 @@ async def bootstrap_critical(request: Request):
|
||||
"bootstrap_payload": True,
|
||||
}
|
||||
return Response(
|
||||
content=orjson.dumps(_sanitize_payload(payload), default=str, option=orjson.OPT_NON_STR_KEYS),
|
||||
content=_live_data_json_bytes(payload),
|
||||
media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
@@ -613,8 +744,11 @@ async def live_data_fast(
|
||||
# to the pre-#288 implementation.
|
||||
if _has_full_bbox(s, w, n, e):
|
||||
payload = _apply_bbox_to_payload(payload, _FAST_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||
return Response(content=orjson.dumps(_sanitize_payload(payload)), media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"})
|
||||
return Response(
|
||||
content=_live_data_json_bytes(payload),
|
||||
media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/live-data/slow")
|
||||
@@ -638,7 +772,8 @@ async def live_data_slow(
|
||||
"firms_fires", "datacenters", "military_bases", "power_plants", "viirs_change_nodes",
|
||||
"scanners", "weather_alerts", "ukraine_alerts", "air_quality", "volcanoes",
|
||||
"fishing_activity", "psk_reporter", "correlations", "uap_sightings", "wastewater",
|
||||
"crowdthreat", "threat_level", "trending_markets",
|
||||
"crowdthreat", "threat_level", "trending_markets", "road_corridor_trends",
|
||||
"malware_threats", "cyber_threats", "scm_suppliers", "telegram_osint",
|
||||
)
|
||||
freshness = get_source_timestamps_snapshot()
|
||||
payload = {
|
||||
@@ -679,6 +814,31 @@ async def live_data_slow(
|
||||
"uap_sightings": (d.get("uap_sightings") or []) if active_layers.get("uap_sightings", True) else [],
|
||||
"wastewater": (d.get("wastewater") or []) if active_layers.get("wastewater", True) else [],
|
||||
"crowdthreat": (d.get("crowdthreat") or []) if active_layers.get("crowdthreat", True) else [],
|
||||
"road_corridor_trends": (
|
||||
d.get("road_corridor_trends") or {"updated_at": None, "corridors": []}
|
||||
)
|
||||
if active_layers.get("road_corridor_trends", False)
|
||||
else {"updated_at": None, "corridors": []},
|
||||
"malware_threats": (
|
||||
d.get("malware_threats") or {"threats": [], "total": 0}
|
||||
)
|
||||
if active_layers.get("malware_c2", False)
|
||||
else {"threats": [], "total": 0},
|
||||
"cyber_threats": (
|
||||
d.get("cyber_threats") or {"threats": [], "stats": {}}
|
||||
)
|
||||
if active_layers.get("cyber_threats", False)
|
||||
else {"threats": [], "stats": {}},
|
||||
"scm_suppliers": (
|
||||
d.get("scm_suppliers") or {"suppliers": [], "total": 0, "critical_count": 0}
|
||||
)
|
||||
if active_layers.get("scm_suppliers", False)
|
||||
else {"suppliers": [], "total": 0, "critical_count": 0},
|
||||
"telegram_osint": (
|
||||
d.get("telegram_osint") or {"posts": [], "total": 0, "geolocated": 0}
|
||||
)
|
||||
if active_layers.get("telegram_osint", True)
|
||||
else {"posts": [], "total": 0, "geolocated": 0},
|
||||
"freshness": freshness,
|
||||
}
|
||||
# Issue #288: bbox filter heavy/dense layers only when all four bounds
|
||||
@@ -688,7 +848,7 @@ async def live_data_slow(
|
||||
if _has_full_bbox(s, w, n, e):
|
||||
payload = _apply_bbox_to_payload(payload, _SLOW_BBOX_HEAVY_KEYS, s, w, n, e)
|
||||
return Response(
|
||||
content=orjson.dumps(_sanitize_payload(payload), default=str, option=orjson.OPT_NON_STR_KEYS),
|
||||
content=_live_data_json_bytes(payload),
|
||||
media_type="application/json",
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
"""Entity graph expansion (intel layer)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
|
||||
from auth import require_local_operator
|
||||
from limiter import limiter
|
||||
from services.osint_intel.resolve import resolve_entity
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/api/entity/expand")
|
||||
@limiter.limit("30/minute")
|
||||
async def entity_expand(
|
||||
request: Request,
|
||||
_: None = Depends(require_local_operator),
|
||||
type: str = Query(..., min_length=3, max_length=32),
|
||||
id: str = Query(..., min_length=2, max_length=200),
|
||||
registration: str | None = Query(default=None, max_length=32),
|
||||
model: str | None = Query(default=None, max_length=64),
|
||||
icao24: str | None = Query(default=None, max_length=16),
|
||||
) -> dict:
|
||||
props = {"label": id, "registration": registration, "model": model, "icao24": icao24}
|
||||
try:
|
||||
return resolve_entity(type, id, props)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail="Intelligence layer unavailable") from exc
|
||||
@@ -8,7 +8,7 @@ from services.data_fetcher import get_latest_data
|
||||
from services.schemas import HealthResponse
|
||||
import os
|
||||
|
||||
APP_VERSION = os.environ.get("_HEALTH_APP_VERSION", "0.9.81")
|
||||
APP_VERSION = os.environ.get("_HEALTH_APP_VERSION", "0.9.82")
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@@ -0,0 +1,122 @@
|
||||
"""Malware, cyber threats, and country risk feeds."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from fastapi import APIRouter, HTTPException, Query, Request
|
||||
from fastapi.responses import StreamingResponse
|
||||
from starlette.background import BackgroundTask
|
||||
|
||||
from limiter import limiter
|
||||
from services.fetchers._store import get_latest_data_subset_refs
|
||||
from services.fetchers.telegram_osint import telegram_media_host_allowed
|
||||
from services.intel_feeds.country_risk import build_country_risk_payload
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/api/malware")
|
||||
@limiter.limit("60/minute")
|
||||
async def malware_feed(request: Request) -> dict:
|
||||
snap = get_latest_data_subset_refs("malware_threats")
|
||||
payload = snap.get("malware_threats")
|
||||
if isinstance(payload, dict) and payload.get("threats") is not None:
|
||||
return payload
|
||||
return {"threats": [], "total": 0, "timestamp": None, "source": "abuse.ch"}
|
||||
|
||||
|
||||
@router.get("/api/cyber-threats")
|
||||
@limiter.limit("60/minute")
|
||||
async def cyber_threats(request: Request) -> dict:
|
||||
snap = get_latest_data_subset_refs("cyber_threats")
|
||||
return snap.get("cyber_threats") or {"threats": [], "stats": {}}
|
||||
|
||||
|
||||
@router.get("/api/country-risk")
|
||||
@limiter.limit("30/minute")
|
||||
async def country_risk(request: Request) -> dict:
|
||||
return build_country_risk_payload()
|
||||
|
||||
|
||||
@router.get("/api/telegram-feed")
|
||||
@limiter.limit("30/minute")
|
||||
async def telegram_feed(request: Request) -> dict:
|
||||
snap = get_latest_data_subset_refs("telegram_osint")
|
||||
payload = snap.get("telegram_osint")
|
||||
if isinstance(payload, dict) and payload.get("posts") is not None:
|
||||
return payload
|
||||
return {"posts": [], "total": 0, "geolocated": 0, "timestamp": None}
|
||||
|
||||
|
||||
def _infer_telegram_media_type(target_url: str, content_type: str) -> str:
|
||||
clean_type = str(content_type or "").split(";", 1)[0].strip().lower()
|
||||
if clean_type and clean_type not in {"application/octet-stream", "binary/octet-stream"}:
|
||||
return content_type
|
||||
path = str(urlparse(target_url).path or "").lower()
|
||||
if path.endswith((".jpg", ".jpeg")):
|
||||
return "image/jpeg"
|
||||
if path.endswith(".png"):
|
||||
return "image/png"
|
||||
if path.endswith(".webp"):
|
||||
return "image/webp"
|
||||
if path.endswith(".gif"):
|
||||
return "image/gif"
|
||||
if path.endswith(".mp4"):
|
||||
return "video/mp4"
|
||||
if path.endswith(".webm"):
|
||||
return "video/webm"
|
||||
return content_type or "application/octet-stream"
|
||||
|
||||
|
||||
@router.get("/api/telegram/media")
|
||||
@limiter.limit("60/minute")
|
||||
async def telegram_media_proxy(request: Request, url: str = Query(...)) -> StreamingResponse:
|
||||
"""Stream Telegram CDN media for in-app playback (host allowlist only)."""
|
||||
parsed = urlparse(url)
|
||||
if parsed.scheme not in ("http", "https"):
|
||||
raise HTTPException(status_code=400, detail="Invalid scheme")
|
||||
if not telegram_media_host_allowed(parsed.hostname):
|
||||
raise HTTPException(status_code=403, detail="Host not allowed")
|
||||
|
||||
headers = {
|
||||
"User-Agent": (
|
||||
f"Mozilla/5.0 (compatible; {outbound_user_agent('telegram-media')}) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "*/*",
|
||||
}
|
||||
if range_header := request.headers.get("range"):
|
||||
headers["Range"] = range_header
|
||||
|
||||
try:
|
||||
resp = requests.get(url, stream=True, timeout=(3, 45), headers=headers)
|
||||
except requests.RequestException as exc:
|
||||
logger.warning("Telegram media upstream failure %s: %s", url, exc)
|
||||
raise HTTPException(status_code=502, detail="Upstream fetch failed") from exc
|
||||
|
||||
if resp.status_code >= 400:
|
||||
resp.close()
|
||||
raise HTTPException(status_code=int(resp.status_code), detail=f"Upstream returned {resp.status_code}")
|
||||
|
||||
media_type = _infer_telegram_media_type(url, resp.headers.get("Content-Type", "application/octet-stream"))
|
||||
response_headers = {
|
||||
"Cache-Control": "private, max-age=300",
|
||||
"Accept-Ranges": resp.headers.get("Accept-Ranges", "bytes"),
|
||||
}
|
||||
if content_length := resp.headers.get("Content-Length"):
|
||||
response_headers["Content-Length"] = content_length
|
||||
if content_range := resp.headers.get("Content-Range"):
|
||||
response_headers["Content-Range"] = content_range
|
||||
|
||||
return StreamingResponse(
|
||||
resp.iter_content(chunk_size=65536),
|
||||
status_code=resp.status_code,
|
||||
media_type=media_type,
|
||||
headers=response_headers,
|
||||
background=BackgroundTask(resp.close),
|
||||
)
|
||||
@@ -0,0 +1,151 @@
|
||||
"""Operator OSINT recon routes (server-side proxies, SSRF guarded)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from auth import require_local_operator
|
||||
from limiter import limiter
|
||||
from services.osint import lookups
|
||||
|
||||
router = APIRouter(dependencies=[Depends(require_local_operator)])
|
||||
|
||||
_ALLOWED_SCHEMAS = {
|
||||
"Person",
|
||||
"Organization",
|
||||
"Company",
|
||||
"Vessel",
|
||||
"Airplane",
|
||||
"LegalEntity",
|
||||
}
|
||||
|
||||
|
||||
class SweepScanRequest(BaseModel):
|
||||
ip: str = Field(min_length=7, max_length=45)
|
||||
cidr: int = Field(default=24, ge=24, le=32)
|
||||
|
||||
|
||||
def _bad_request(exc: ValueError) -> HTTPException:
|
||||
return HTTPException(status_code=400, detail=str(exc))
|
||||
|
||||
|
||||
@router.get("/api/osint/ip")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_ip(request: Request, ip: str = Query(..., min_length=7, max_length=45)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_ip(ip)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/dns")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_dns(request: Request, domain: str = Query(..., min_length=4, max_length=253)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_dns(domain)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/whois")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_whois(request: Request, domain: str = Query(..., min_length=4, max_length=253)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_whois(domain)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/certs")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_certs(request: Request, domain: str = Query(..., min_length=4, max_length=253)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_certs(domain)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/threats")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_threats(request: Request, query: str | None = Query(default=None, max_length=253)) -> dict:
|
||||
return lookups.lookup_threats(query)
|
||||
|
||||
|
||||
@router.get("/api/osint/bgp")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_bgp(request: Request, query: str = Query(..., min_length=2, max_length=64)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_bgp(query)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/sanctions")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_sanctions(
|
||||
request: Request,
|
||||
query: str = Query(..., min_length=4, max_length=200),
|
||||
schema: str | None = Query(default=None),
|
||||
limit: int = Query(default=25, ge=1, le=100),
|
||||
) -> dict:
|
||||
if schema and schema not in _ALLOWED_SCHEMAS:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid schema. Allowed: {', '.join(sorted(_ALLOWED_SCHEMAS))}")
|
||||
return lookups.lookup_sanctions(query, schema=schema, limit=limit)
|
||||
|
||||
|
||||
@router.get("/api/osint/cve")
|
||||
@limiter.limit("30/minute")
|
||||
async def osint_cve(request: Request, cve: str = Query(..., min_length=10, max_length=32)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_cve(cve)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404 if "not found" in str(exc).lower() else 400, detail=str(exc)) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/mac")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_mac(request: Request, mac: str = Query(..., min_length=5, max_length=32)) -> dict:
|
||||
return lookups.lookup_mac(mac)
|
||||
|
||||
|
||||
@router.get("/api/osint/github")
|
||||
@limiter.limit("20/minute")
|
||||
async def osint_github(request: Request, username: str = Query(..., min_length=1, max_length=64)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_github(username)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/leaks")
|
||||
@limiter.limit("10/minute")
|
||||
async def osint_leaks(request: Request, email: str = Query(..., min_length=5, max_length=254)) -> dict:
|
||||
try:
|
||||
return lookups.lookup_leaks(email)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.get("/api/osint/sweep")
|
||||
@limiter.limit("5/minute")
|
||||
async def osint_sweep_init(
|
||||
request: Request,
|
||||
ip: str = Query(..., min_length=7, max_length=45),
|
||||
cidr: int = Query(default=24, ge=24, le=32),
|
||||
) -> dict:
|
||||
try:
|
||||
return lookups.sweep_init(ip, cidr)
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
|
||||
|
||||
@router.post("/api/osint/sweep/scan")
|
||||
@limiter.limit("3/minute")
|
||||
async def osint_sweep_scan(request: Request, payload: SweepScanRequest) -> dict:
|
||||
try:
|
||||
subnet = lookups.subnet_start_for(payload.ip, payload.cidr)
|
||||
scan = lookups.sweep_scan(subnet, payload.cidr)
|
||||
init = lookups.sweep_init(payload.ip, payload.cidr)
|
||||
return {**init, **scan, "subnet": f"{subnet}/{payload.cidr}"}
|
||||
except ValueError as exc:
|
||||
raise _bad_request(exc) from exc
|
||||
@@ -0,0 +1,105 @@
|
||||
"""Road corridor Sentinel-2 freight trend endpoints (opt-in slow layer)."""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, Request
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from limiter import limiter
|
||||
from services.road_corridor_sat.config import optional_deps_available, road_corridor_sat_enabled
|
||||
from services.road_corridor_sat.credentials import sentinel_credentials_configured
|
||||
from services.road_corridor_sat.jobs import enqueue_analyze, get_job, get_latest_job, job_to_dict
|
||||
from services.road_corridor_sat.presets import CORRIDOR_PRESETS, get_preset
|
||||
from services.road_corridor_sat.storage import build_trends_payload, preset_metadata
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def _status_payload() -> dict:
|
||||
latest = get_latest_job()
|
||||
return {
|
||||
"enabled": road_corridor_sat_enabled(),
|
||||
"deps_installed": optional_deps_available(),
|
||||
"credentials_configured": sentinel_credentials_configured(),
|
||||
"preset_count": len(CORRIDOR_PRESETS),
|
||||
"attribution": "backend/third_party/drishx/NOTICE.md",
|
||||
"active_job": job_to_dict(latest) if latest and latest.status in {"queued", "running"} else None,
|
||||
}
|
||||
|
||||
|
||||
def _require_analyze_ready() -> None:
|
||||
if not optional_deps_available():
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Install optional road-corridor dependencies (uv sync --extra road-corridor)",
|
||||
)
|
||||
if not sentinel_credentials_configured():
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Set SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET in Imagery settings",
|
||||
)
|
||||
|
||||
|
||||
class AnalyzeRequest(BaseModel):
|
||||
lat: float = Field(ge=-90, le=90)
|
||||
lon: float = Field(ge=-180, le=180)
|
||||
label: str | None = Field(default=None, max_length=120)
|
||||
|
||||
|
||||
@router.get("/api/road-corridors/status")
|
||||
@limiter.limit("60/minute")
|
||||
async def road_corridors_status(request: Request) -> dict:
|
||||
return {"ok": True, **_status_payload()}
|
||||
|
||||
|
||||
@router.get("/api/road-corridors")
|
||||
@limiter.limit("60/minute")
|
||||
async def list_road_corridors(request: Request) -> dict:
|
||||
return {
|
||||
"ok": True,
|
||||
"status": _status_payload(),
|
||||
"presets": CORRIDOR_PRESETS,
|
||||
"trends": build_trends_payload(),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/road-corridors/analyze")
|
||||
@limiter.limit("6/minute")
|
||||
async def analyze_road_corridor_here(request: Request, payload: AnalyzeRequest) -> dict:
|
||||
"""Start an on-demand Sentinel-2 corridor analysis at map center."""
|
||||
_require_analyze_ready()
|
||||
try:
|
||||
job = enqueue_analyze(payload.lat, payload.lon, payload.label)
|
||||
except RuntimeError as exc:
|
||||
if str(exc) == "analysis_already_running":
|
||||
active = get_latest_job()
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="Analysis already in progress",
|
||||
headers={"X-Job-Id": active.job_id if active else ""},
|
||||
) from exc
|
||||
raise
|
||||
return {"ok": True, **job_to_dict(job)}
|
||||
|
||||
|
||||
@router.get("/api/road-corridors/analyze/status")
|
||||
@limiter.limit("120/minute")
|
||||
async def analyze_road_corridor_status(
|
||||
request: Request,
|
||||
job_id: str | None = Query(default=None),
|
||||
) -> dict:
|
||||
job = get_job(job_id) if job_id else get_latest_job()
|
||||
if job is None:
|
||||
return {"ok": True, "job": None}
|
||||
return {"ok": True, "job": job_to_dict(job)}
|
||||
|
||||
|
||||
@router.get("/api/road-corridors/{preset_id}")
|
||||
@limiter.limit("60/minute")
|
||||
async def get_road_corridor(preset_id: str, request: Request) -> dict:
|
||||
meta = preset_metadata(preset_id)
|
||||
if meta is None:
|
||||
raise HTTPException(status_code=404, detail="Unknown corridor preset")
|
||||
preset = get_preset(preset_id)
|
||||
if preset is None:
|
||||
# Ad-hoc viewport runs are stored on disk but not in CORRIDOR_PRESETS.
|
||||
return {"ok": True, "preset": None, "result": meta, "status": _status_payload()}
|
||||
return {"ok": True, "preset": preset, "result": meta, "status": _status_payload()}
|
||||
@@ -0,0 +1,16 @@
|
||||
"""Supply-chain risk overlay."""
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, Request
|
||||
|
||||
from auth import require_local_operator
|
||||
from limiter import limiter
|
||||
from services.scm.suppliers import build_scm_payload
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/api/scm-suppliers")
|
||||
@limiter.limit("30/minute")
|
||||
async def scm_suppliers(request: Request, _: None = Depends(require_local_operator)) -> dict:
|
||||
return build_scm_payload()
|
||||
@@ -85,6 +85,39 @@ async def api_geocode_reverse(
|
||||
return await asyncio.to_thread(reverse_geocode, lat, lng, local_only)
|
||||
|
||||
|
||||
# ── Wikimedia proxy (#360) — browser calls these instead of wikipedia.org ───
|
||||
@router.get("/api/wikipedia/summary")
|
||||
@limiter.limit("60/minute")
|
||||
def api_wikipedia_summary(
|
||||
request: Request,
|
||||
title: str = Query(..., min_length=1, max_length=256),
|
||||
):
|
||||
"""Proxy Wikipedia REST summaries through the self-hosted backend."""
|
||||
from services.region_dossier import fetch_wikipedia_page_summary
|
||||
|
||||
summary = fetch_wikipedia_page_summary(title)
|
||||
if summary is None:
|
||||
return JSONResponse(status_code=404, content={"detail": "not_found"})
|
||||
return summary
|
||||
|
||||
|
||||
class WikidataSparqlRequest(BaseModel):
|
||||
query: str
|
||||
|
||||
|
||||
@router.post("/api/wikidata/sparql")
|
||||
@limiter.limit("30/minute")
|
||||
def api_wikidata_sparql(request: Request, body: WikidataSparqlRequest):
|
||||
"""Proxy Wikidata SPARQL so the browser never contacts query.wikidata.org."""
|
||||
from services.region_dossier import fetch_wikidata_sparql_bindings
|
||||
|
||||
q = (body.query or "").strip()
|
||||
if len(q) > 12_000:
|
||||
raise HTTPException(400, "SPARQL query too large")
|
||||
bindings = fetch_wikidata_sparql_bindings(q)
|
||||
return {"bindings": bindings}
|
||||
|
||||
|
||||
# ── Sentinel proxy routes (Issue #299/#300/#301, reported by tg12) ──────────
|
||||
# These three endpoints relay external Sentinel / Planetary Computer
|
||||
# requests through the backend to avoid browser CORS blocks. They are
|
||||
|
||||
@@ -29,7 +29,7 @@ def main() -> None:
|
||||
from services.network_utils import outbound_user_agent
|
||||
ua = outbound_user_agent("release-script-power-plants")
|
||||
except Exception:
|
||||
ua = "Shadowbroker/0.9 (release-script-power-plants; +https://github.com/BigBodyCobain/Shadowbroker/issues)"
|
||||
ua = "operator-release-script (purpose: power-plants)"
|
||||
req = urllib.request.Request(CSV_URL, headers={"User-Agent": ua})
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
raw = resp.read().decode("utf-8")
|
||||
|
||||
@@ -167,6 +167,11 @@ def cmd_hash(args: argparse.Namespace) -> int:
|
||||
print("")
|
||||
print("Updater pin:")
|
||||
print(f"MESH_UPDATE_SHA256={digest}")
|
||||
print("")
|
||||
print("Release checklist:")
|
||||
print(" - add this digest to SHA256SUMS.txt for the GitHub release")
|
||||
print(" - add/update backend/data/release_digests.json for bundled updater verification")
|
||||
print(" - keep MESH_UPDATE_SHA256 available as the operator override path")
|
||||
return 0 if asset_matches else 2
|
||||
|
||||
|
||||
|
||||
@@ -92,18 +92,37 @@ SECRET_REGEX+='pypi-[0-9a-zA-Z-]{50,}' # PyPI token
|
||||
TEXT_FILES=$(grep -ivE '\.(png|jpg|jpeg|gif|ico|svg|woff2?|ttf|eot|pbf|zip|tar|gz|db|sqlite|xlsx|pdf|mp[34]|wav|ogg|webm|webp|avif)$' "$FILELIST" | grep -v 'scan-secrets\.sh$' || true)
|
||||
|
||||
if [[ -n "$TEXT_FILES" ]]; then
|
||||
# Known-public exclusions: lines matching `<host-or-ip> ssh-<algo> <key>`
|
||||
# are SSH known_hosts entries — the host's PUBLIC fingerprint, which is
|
||||
# by definition safe to commit (the whole point of pinning known_hosts
|
||||
# is to publish the fingerprint widely so MITM is detectable). Filter
|
||||
# these out before flagging the file.
|
||||
KNOWN_HOSTS_LINE='^[[:space:]]*[a-zA-Z0-9._:,*-]+([[:space:]]+[a-zA-Z0-9._:,*-]+)?[[:space:]]+(ssh-rsa|ssh-ed25519|ssh-dss|ecdsa-sha2-nistp256|ecdsa-sha2-nistp384|ecdsa-sha2-nistp521)[[:space:]]+AAAA'
|
||||
|
||||
# Use grep with file list, skip missing/binary, limit output
|
||||
CONTENT_HITS=$(echo "$TEXT_FILES" | xargs grep -lE "$SECRET_REGEX" 2>/dev/null || true)
|
||||
if [[ -n "$CONTENT_HITS" ]]; then
|
||||
echo -e "\n${RED}BLOCKED: Embedded secrets/tokens found in:${NC}"
|
||||
echo "$CONTENT_HITS" | while read -r f; do
|
||||
echo -e " ${RED}$f${NC}"
|
||||
# Show first matching line for context
|
||||
grep -nE "$SECRET_REGEX" "$f" 2>/dev/null | head -2 | while read -r line; do
|
||||
echo -e " ${YELLOW}$line${NC}"
|
||||
done
|
||||
done
|
||||
FOUND=1
|
||||
REAL_HITS=""
|
||||
REAL_REPORT=""
|
||||
while IFS= read -r f; do
|
||||
[[ -z "$f" ]] && continue
|
||||
# Re-grep this file, but filter out known_hosts-style lines.
|
||||
FILE_HITS=$(grep -nE "$SECRET_REGEX" "$f" 2>/dev/null | grep -vE "$KNOWN_HOSTS_LINE" || true)
|
||||
if [[ -n "$FILE_HITS" ]]; then
|
||||
REAL_HITS+="$f"$'\n'
|
||||
REAL_REPORT+=" ${RED}$f${NC}"$'\n'
|
||||
# Show first 2 matching lines for context
|
||||
while IFS= read -r line; do
|
||||
[[ -z "$line" ]] && continue
|
||||
REAL_REPORT+=" ${YELLOW}$line${NC}"$'\n'
|
||||
done < <(echo "$FILE_HITS" | head -2)
|
||||
fi
|
||||
done <<< "$CONTENT_HITS"
|
||||
if [[ -n "$REAL_HITS" ]]; then
|
||||
echo -e "\n${RED}BLOCKED: Embedded secrets/tokens found in:${NC}"
|
||||
echo -en "$REAL_REPORT"
|
||||
FOUND=1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
@@ -51,6 +51,15 @@ API_REGISTRY = [
|
||||
"url": "https://aisstream.io/",
|
||||
"required": True,
|
||||
},
|
||||
{
|
||||
"id": "gfw_api_token",
|
||||
"env_key": "GFW_API_TOKEN",
|
||||
"name": "Global Fishing Watch",
|
||||
"description": "Bearer token for Global Fishing Watch fishing-vessel activity events (Fishing Activity map layer). Free registration at globalfishingwatch.org.",
|
||||
"category": "Maritime",
|
||||
"url": "https://globalfishingwatch.org/our-apis/",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"id": "adsb_lol",
|
||||
"env_key": None,
|
||||
|
||||
@@ -17,6 +17,9 @@ _KNOWN_CCTV_MEDIA_HOST_ALIASES = {
|
||||
# Trusted upstream occasionally publishes a typo for this Georgia camera
|
||||
# host. Normalize it at ingest so the proxy and client stay consistent.
|
||||
"navigatos-c2c.dot.ga.gov": "navigator-c2c.dot.ga.gov",
|
||||
# TravelIQ staging hosts occasionally appear in 511 catalog metadata.
|
||||
"on.stage.traveliq.co": "511on.ca",
|
||||
"ab.stage.traveliq.co": "511.alberta.ca",
|
||||
}
|
||||
|
||||
_POINT_WKT_RE = re.compile(
|
||||
@@ -40,6 +43,17 @@ def _normalize_cctv_media_url(raw_url: str) -> str:
|
||||
return urlunparse(parsed._replace(netloc=netloc))
|
||||
|
||||
|
||||
def _ensure_https_url(raw_url: str) -> str:
|
||||
"""Upgrade http:// media/catalog URLs to https:// at ingest time."""
|
||||
candidate = _normalize_cctv_media_url(str(raw_url or "").strip())
|
||||
if not candidate:
|
||||
return ""
|
||||
parsed = urlparse(candidate)
|
||||
if parsed.scheme.lower() == "http":
|
||||
return urlunparse(parsed._replace(scheme="https"))
|
||||
return candidate
|
||||
|
||||
|
||||
def _looks_like_direct_cctv_media_url(url: str) -> bool:
|
||||
candidate = str(url or "").strip().lower()
|
||||
if not candidate.startswith(("http://", "https://")):
|
||||
@@ -93,6 +107,165 @@ def _parse_wkt_point(raw_point: str) -> tuple[float | None, float | None]:
|
||||
return lat, lon
|
||||
|
||||
|
||||
def _fetch_traveliq_v2_cameras(
|
||||
*,
|
||||
api_url: str,
|
||||
base_url: str,
|
||||
id_prefix: str,
|
||||
source_agency: str,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Parse TravelIQ-style GET /api/v2/get/cameras feeds (Ontario, Alberta)."""
|
||||
resp = fetch_with_curl(
|
||||
api_url,
|
||||
timeout=30,
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.error(
|
||||
"%s CCTV fetch failed: HTTP %s",
|
||||
source_agency,
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
return []
|
||||
|
||||
data = resp.json()
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
for cam in data:
|
||||
if not isinstance(cam, dict):
|
||||
continue
|
||||
try:
|
||||
lat = float(cam.get("Latitude"))
|
||||
lon = float(cam.get("Longitude"))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
|
||||
site_id = cam.get("Id")
|
||||
location = str(cam.get("Location") or cam.get("Roadway") or "Camera")[:120]
|
||||
views = cam.get("Views") or []
|
||||
if not views:
|
||||
continue
|
||||
|
||||
for view in views:
|
||||
if not isinstance(view, dict):
|
||||
continue
|
||||
status = str(view.get("Status") or "enabled").strip().lower()
|
||||
if status and status not in {"enabled", "active"}:
|
||||
continue
|
||||
media_url = _ensure_https_url(
|
||||
urljoin(base_url, str(view.get("Url") or "").strip())
|
||||
)
|
||||
if not media_url:
|
||||
continue
|
||||
view_id = view.get("Id") or site_id
|
||||
if site_id is None or view_id is None:
|
||||
continue
|
||||
label = str(view.get("Description") or location or "Camera")[:120]
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"{id_prefix}-{site_id}-{view_id}",
|
||||
"source_agency": source_agency,
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"direction_facing": label,
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 60,
|
||||
}
|
||||
)
|
||||
return cameras
|
||||
|
||||
|
||||
def _fetch_511_datatables_cameras(
|
||||
*,
|
||||
list_url: str,
|
||||
base_url: str,
|
||||
id_prefix: str,
|
||||
source_agency: str,
|
||||
referer: str,
|
||||
page_size: int = 500,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Parse 511 DataTables POST /List/GetData/Cameras feeds (Georgia, Florida)."""
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
start = 0
|
||||
draw = 1
|
||||
while True:
|
||||
resp = fetch_with_curl(
|
||||
list_url,
|
||||
method="POST",
|
||||
json_data={"draw": draw, "start": start, "length": page_size},
|
||||
timeout=30,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Referer": referer,
|
||||
"Origin": base_url.rstrip("/"),
|
||||
},
|
||||
)
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.error(
|
||||
"%s CCTV fetch failed: HTTP %s",
|
||||
source_agency,
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
break
|
||||
|
||||
data = resp.json()
|
||||
rows = data.get("data") or []
|
||||
if not rows:
|
||||
break
|
||||
|
||||
for row in rows:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
site_id = row.get("id") or row.get("DT_RowId")
|
||||
location = row.get("location") or row.get("roadway") or source_agency
|
||||
lat_lng = row.get("latLng") or {}
|
||||
geography = lat_lng.get("geography") if isinstance(lat_lng, dict) else {}
|
||||
lat, lon = _parse_wkt_point(
|
||||
geography.get("wellKnownText") if isinstance(geography, dict) else ""
|
||||
)
|
||||
images = row.get("images") or []
|
||||
image = next(
|
||||
(
|
||||
candidate
|
||||
for candidate in images
|
||||
if str(candidate.get("imageUrl") or "").strip()
|
||||
and not bool(candidate.get("blocked"))
|
||||
),
|
||||
None,
|
||||
)
|
||||
if not (site_id and image and lat is not None and lon is not None):
|
||||
continue
|
||||
media_url = _ensure_https_url(
|
||||
urljoin(base_url, str(image.get("imageUrl") or "").strip())
|
||||
)
|
||||
if not media_url:
|
||||
continue
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"{id_prefix}-{site_id}",
|
||||
"source_agency": source_agency,
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"direction_facing": str(location)[:120],
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 60,
|
||||
}
|
||||
)
|
||||
|
||||
start += len(rows)
|
||||
draw += 1
|
||||
total = int(data.get("recordsTotal") or 0)
|
||||
if total and start >= total:
|
||||
break
|
||||
if not total and len(rows) < page_size:
|
||||
break
|
||||
return cameras
|
||||
|
||||
|
||||
def init_db():
|
||||
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(str(DB_PATH))
|
||||
@@ -169,7 +342,7 @@ class BaseCCTVIngestor(ABC):
|
||||
cam.get("lat"),
|
||||
cam.get("lon"),
|
||||
cam.get("direction_facing", "Unknown"),
|
||||
cam.get("media_url"),
|
||||
_ensure_https_url(cam.get("media_url", "")),
|
||||
cam.get("media_type", _detect_media_type(cam.get("media_url", ""))),
|
||||
cam.get("refresh_rate_seconds", 60),
|
||||
),
|
||||
@@ -454,77 +627,14 @@ class WSDOTIngestor(BaseCCTVIngestor):
|
||||
class GeorgiaDOTIngestor(BaseCCTVIngestor):
|
||||
"""Georgia cameras via the public 511GA list feed."""
|
||||
|
||||
URL = "https://511ga.org/List/GetData/Cameras"
|
||||
BASE_URL = "https://511ga.org"
|
||||
PAGE_SIZE = 500
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
cameras = []
|
||||
start = 0
|
||||
draw = 1
|
||||
while True:
|
||||
resp = fetch_with_curl(
|
||||
self.URL,
|
||||
method="POST",
|
||||
json_data={"draw": draw, "start": start, "length": self.PAGE_SIZE},
|
||||
timeout=30,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Referer": "https://511ga.org/cctv",
|
||||
"Origin": "https://511ga.org",
|
||||
},
|
||||
)
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.error(
|
||||
"Georgia CCTV fetch failed: HTTP %s",
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
break
|
||||
data = resp.json()
|
||||
rows = data.get("data") or []
|
||||
if not rows:
|
||||
break
|
||||
for row in rows:
|
||||
site_id = row.get("id") or row.get("DT_RowId")
|
||||
location = row.get("location") or row.get("roadway") or "GA Camera"
|
||||
lat_lng = row.get("latLng") or {}
|
||||
geography = lat_lng.get("geography") if isinstance(lat_lng, dict) else {}
|
||||
lat, lon = _parse_wkt_point(geography.get("wellKnownText") if isinstance(geography, dict) else "")
|
||||
images = row.get("images") or []
|
||||
image = next(
|
||||
(
|
||||
candidate
|
||||
for candidate in images
|
||||
if str(candidate.get("imageUrl") or "").strip()
|
||||
and not bool(candidate.get("blocked"))
|
||||
),
|
||||
None,
|
||||
)
|
||||
if not (site_id and image and lat is not None and lon is not None):
|
||||
continue
|
||||
media_url = _normalize_cctv_media_url(
|
||||
urljoin(self.BASE_URL, str(image.get("imageUrl") or "").strip())
|
||||
)
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"GDOT-{site_id}",
|
||||
"source_agency": "Georgia DOT",
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"direction_facing": str(location)[:120],
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 60,
|
||||
}
|
||||
)
|
||||
start += len(rows)
|
||||
draw += 1
|
||||
total = int(data.get("recordsTotal") or 0)
|
||||
if total and start >= total:
|
||||
break
|
||||
if not total and len(rows) < self.PAGE_SIZE:
|
||||
break
|
||||
return cameras
|
||||
return _fetch_511_datatables_cameras(
|
||||
list_url="https://511ga.org/List/GetData/Cameras",
|
||||
base_url="https://511ga.org",
|
||||
id_prefix="GDOT",
|
||||
source_agency="Georgia DOT",
|
||||
referer="https://511ga.org/cctv",
|
||||
)
|
||||
|
||||
|
||||
class IllinoisDOTIngestor(BaseCCTVIngestor):
|
||||
@@ -1009,17 +1119,72 @@ def _extract_img_src(html_fragment: str):
|
||||
return None
|
||||
|
||||
|
||||
class AsfinagIngestor(BaseCCTVIngestor):
|
||||
"""Austria ASFINAG motorway webcams (Osiris port)."""
|
||||
|
||||
API_URL = "https://odo.asfinag.at/odo/rest/sec/resource/001/json/webcams?language=atDE"
|
||||
HEADERS = {
|
||||
"User-Agent": "Shadowbroker-CCTV/1.0",
|
||||
"Accept": "application/json",
|
||||
"Referer": "https://www.asfinag.at/",
|
||||
"Authorization": "Basic bWFwX3dpZGdldDp0ZWdkaXc=",
|
||||
}
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
try:
|
||||
response = fetch_with_curl(self.API_URL, timeout=15, headers=self.HEADERS)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
except Exception as exc:
|
||||
logger.error("AsfinagIngestor: fetch failed: %s", exc)
|
||||
return []
|
||||
if not isinstance(payload, list):
|
||||
return []
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
for cam in payload:
|
||||
cam_id = cam.get("wcs_id")
|
||||
lat = cam.get("wgs84_lat")
|
||||
lon = cam.get("wgs84_lon")
|
||||
image_url = cam.get("url_campic")
|
||||
if not cam_id or lat is None or lon is None or not image_url:
|
||||
continue
|
||||
if str(cam_id).startswith("Utinform"):
|
||||
continue
|
||||
label = cam.get("position_txt") or cam.get("direction_txt") or "ASFINAG Webcam"
|
||||
secure_url = _ensure_https_url(image_url)
|
||||
if not secure_url:
|
||||
continue
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"ASFINAG-{cam_id}",
|
||||
"source_agency": "ASFINAG Austria",
|
||||
"lat": float(lat),
|
||||
"lon": float(lon),
|
||||
"direction_facing": label,
|
||||
"media_url": secure_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 300,
|
||||
}
|
||||
)
|
||||
logger.info("AsfinagIngestor: parsed %s cameras", len(cameras))
|
||||
return cameras
|
||||
|
||||
|
||||
class MadridCityIngestor(BaseCCTVIngestor):
|
||||
"""Madrid City Hall traffic cameras from datos.madrid.es KML feed."""
|
||||
|
||||
KML_URL = "http://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
KML_URL = "https://datos.madrid.es/egob/catalogo/202088-0-trafico-camaras.kml"
|
||||
|
||||
def _fetch_kml(self):
|
||||
response = fetch_with_curl(self.KML_URL, timeout=20)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
import defusedxml.ElementTree as ET
|
||||
|
||||
try:
|
||||
response = fetch_with_curl(self.KML_URL, timeout=20)
|
||||
response.raise_for_status()
|
||||
response = self._fetch_kml()
|
||||
except Exception as e:
|
||||
logger.error(f"MadridCityIngestor: failed to fetch KML: {e}")
|
||||
return []
|
||||
@@ -1055,6 +1220,9 @@ class MadridCityIngestor(BaseCCTVIngestor):
|
||||
if desc_el is not None and desc_el.text:
|
||||
image_url = _extract_img_src(desc_el.text)
|
||||
|
||||
if not image_url:
|
||||
continue
|
||||
image_url = _ensure_https_url(image_url)
|
||||
if not image_url:
|
||||
continue
|
||||
|
||||
@@ -1076,6 +1244,153 @@ class MadridCityIngestor(BaseCCTVIngestor):
|
||||
return cameras
|
||||
|
||||
|
||||
class Ontario511Ingestor(BaseCCTVIngestor):
|
||||
"""Ontario highway cameras via 511on.ca TravelIQ API."""
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
return _fetch_traveliq_v2_cameras(
|
||||
api_url="https://511on.ca/api/v2/get/cameras",
|
||||
base_url="https://511on.ca",
|
||||
id_prefix="ON511",
|
||||
source_agency="511 Ontario",
|
||||
)
|
||||
|
||||
|
||||
class Alberta511Ingestor(BaseCCTVIngestor):
|
||||
"""Alberta highway cameras via 511 Alberta TravelIQ API."""
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
return _fetch_traveliq_v2_cameras(
|
||||
api_url="https://511.alberta.ca/api/v2/get/cameras",
|
||||
base_url="https://511.alberta.ca",
|
||||
id_prefix="AB511",
|
||||
source_agency="511 Alberta",
|
||||
)
|
||||
|
||||
|
||||
class Florida511Ingestor(BaseCCTVIngestor):
|
||||
"""Florida cameras via FL511 DataTables feed (~4,800 sites)."""
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
return _fetch_511_datatables_cameras(
|
||||
list_url="https://fl511.com/List/GetData/Cameras",
|
||||
base_url="https://fl511.com",
|
||||
id_prefix="FL511",
|
||||
source_agency="Florida 511",
|
||||
referer="https://fl511.com/",
|
||||
)
|
||||
|
||||
|
||||
class AustraliaLiveTrafficIngestor(BaseCCTVIngestor):
|
||||
"""NSW / Australia live traffic cameras via Transport for NSW JSON feed."""
|
||||
|
||||
URL = "https://www.livetraffic.com/datajson/all-feeds-web.json"
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
resp = fetch_with_curl(self.URL, timeout=35, headers={"Accept": "application/json"})
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.error(
|
||||
"Australia Live Traffic CCTV fetch failed: HTTP %s",
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
return []
|
||||
|
||||
data = resp.json()
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
for item in data:
|
||||
if not isinstance(item, dict) or item.get("eventType") != "liveCams":
|
||||
continue
|
||||
geometry = item.get("geometry") if isinstance(item.get("geometry"), dict) else {}
|
||||
coords = geometry.get("coordinates") if isinstance(geometry.get("coordinates"), list) else []
|
||||
if len(coords) < 2:
|
||||
continue
|
||||
try:
|
||||
lon = float(coords[0])
|
||||
lat = float(coords[1])
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
|
||||
props = item.get("properties") if isinstance(item.get("properties"), dict) else {}
|
||||
media_url = _ensure_https_url(str(props.get("href") or "").strip())
|
||||
if not media_url:
|
||||
continue
|
||||
|
||||
cam_id = str(item.get("path") or props.get("id") or len(cameras)).strip("/")
|
||||
label = str(props.get("title") or props.get("headline") or "Australia Camera")[:120]
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"AUS-{cam_id}",
|
||||
"source_agency": "NSW Live Traffic",
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"direction_facing": label,
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 120,
|
||||
}
|
||||
)
|
||||
logger.info("AustraliaLiveTrafficIngestor: parsed %s cameras", len(cameras))
|
||||
return cameras
|
||||
|
||||
|
||||
class NetherlandsRWSIngestor(BaseCCTVIngestor):
|
||||
"""Netherlands Rijkswaterstaat cameras from legacy NDW open-data JSON.
|
||||
|
||||
The opendata.ndw.nu/cameras.json feed Osiris used is often offline; when
|
||||
unavailable this ingestor returns an empty set and logs a warning.
|
||||
"""
|
||||
|
||||
URL = "https://opendata.ndw.nu/cameras.json"
|
||||
MAX_CAMERAS = 1200
|
||||
|
||||
def fetch_data(self) -> List[Dict[str, Any]]:
|
||||
resp = fetch_with_curl(self.URL, timeout=25, headers={"Accept": "application/json"})
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.warning(
|
||||
"Netherlands RWS cameras.json unavailable (HTTP %s) — "
|
||||
"NDW retired this open-data endpoint; no cameras ingested",
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
return []
|
||||
|
||||
data = resp.json()
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
|
||||
cameras: List[Dict[str, Any]] = []
|
||||
for i, cam in enumerate(data[: self.MAX_CAMERAS]):
|
||||
if not isinstance(cam, dict):
|
||||
continue
|
||||
lat = cam.get("lat") if cam.get("lat") is not None else cam.get("latitude")
|
||||
lon = cam.get("lng") if cam.get("lng") is not None else cam.get("longitude")
|
||||
media_url = _ensure_https_url(
|
||||
str(cam.get("imageUrl") or cam.get("feed_url") or cam.get("url") or "").strip()
|
||||
)
|
||||
if lat is None or lon is None or not media_url:
|
||||
continue
|
||||
try:
|
||||
lat_f, lon_f = float(lat), float(lon)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
cameras.append(
|
||||
{
|
||||
"id": f"NLRWS-{cam.get('id') or i}",
|
||||
"source_agency": "Rijkswaterstaat",
|
||||
"lat": lat_f,
|
||||
"lon": lon_f,
|
||||
"direction_facing": str(cam.get("name") or "Netherlands Camera")[:120],
|
||||
"media_url": media_url,
|
||||
"media_type": "image",
|
||||
"refresh_rate_seconds": 120,
|
||||
}
|
||||
)
|
||||
logger.info("NetherlandsRWSIngestor: parsed %s cameras", len(cameras))
|
||||
return cameras
|
||||
|
||||
|
||||
def _detect_media_type(url: str) -> str:
|
||||
"""Detect the media type from a camera URL for proper frontend rendering."""
|
||||
if not url:
|
||||
@@ -1094,29 +1409,40 @@ def _detect_media_type(url: str) -> str:
|
||||
return "image"
|
||||
|
||||
|
||||
def scheduled_cctv_ingestors() -> List[tuple["BaseCCTVIngestor", str]]:
|
||||
"""Canonical list of CCTV ingestors for startup, scheduler, and DB seeding."""
|
||||
return [
|
||||
(TFLJamCamIngestor(), "cctv_tfl"),
|
||||
(LTASingaporeIngestor(), "cctv_lta"),
|
||||
(AustinTXIngestor(), "cctv_atx"),
|
||||
(NYCDOTIngestor(), "cctv_nyc"),
|
||||
(CaltransIngestor(), "cctv_caltrans"),
|
||||
(ColoradoDOTIngestor(), "cctv_codot"),
|
||||
(WSDOTIngestor(), "cctv_wsdot"),
|
||||
(GeorgiaDOTIngestor(), "cctv_gdot"),
|
||||
(IllinoisDOTIngestor(), "cctv_idot"),
|
||||
(MichiganDOTIngestor(), "cctv_mdot"),
|
||||
(WindyWebcamsIngestor(), "cctv_windy"),
|
||||
(DGTNationalIngestor(), "cctv_dgt"),
|
||||
(MadridCityIngestor(), "cctv_madrid"),
|
||||
(OSMTrafficCameraIngestor(), "cctv_osm"),
|
||||
(AsfinagIngestor(), "cctv_asfinag"),
|
||||
(OSMALPRCameraIngestor(), "cctv_osm_alpr"),
|
||||
(Ontario511Ingestor(), "cctv_on511"),
|
||||
(Alberta511Ingestor(), "cctv_ab511"),
|
||||
(Florida511Ingestor(), "cctv_fl511"),
|
||||
(AustraliaLiveTrafficIngestor(), "cctv_australia"),
|
||||
(NetherlandsRWSIngestor(), "cctv_nl_rws"),
|
||||
]
|
||||
|
||||
|
||||
def run_all_ingestors():
|
||||
"""Run all CCTV ingestors synchronously. Used for first-run DB seeding."""
|
||||
ingestors = [
|
||||
TFLJamCamIngestor(),
|
||||
LTASingaporeIngestor(),
|
||||
AustinTXIngestor(),
|
||||
NYCDOTIngestor(),
|
||||
CaltransIngestor(),
|
||||
ColoradoDOTIngestor(),
|
||||
WSDOTIngestor(),
|
||||
GeorgiaDOTIngestor(),
|
||||
IllinoisDOTIngestor(),
|
||||
MichiganDOTIngestor(),
|
||||
WindyWebcamsIngestor(),
|
||||
OSMTrafficCameraIngestor(),
|
||||
DGTNationalIngestor(),
|
||||
MadridCityIngestor(),
|
||||
]
|
||||
for ing in ingestors:
|
||||
for ingestor, _name in scheduled_cctv_ingestors():
|
||||
try:
|
||||
ing.ingest()
|
||||
ingestor.ingest()
|
||||
except Exception as e:
|
||||
logger.warning(f"Ingestor {ing.__class__.__name__} failed during seed: {e}")
|
||||
logger.warning(f"Ingestor {ingestor.__class__.__name__} failed during seed: {e}")
|
||||
|
||||
|
||||
def get_all_cameras() -> List[Dict[str, Any]]:
|
||||
|
||||
@@ -19,6 +19,7 @@ import concurrent.futures
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
@@ -75,6 +76,7 @@ from services.fetchers.infrastructure import ( # noqa: F401
|
||||
fetch_tinygs,
|
||||
fetch_psk_reporter,
|
||||
)
|
||||
from services.fetchers.road_corridor_sat import fetch_road_corridor_trends # noqa: F401
|
||||
from services.fetchers.geo import ( # noqa: F401
|
||||
fetch_ships,
|
||||
fetch_airports,
|
||||
@@ -99,6 +101,10 @@ from services.fetchers.crowdthreat import fetch_crowdthreat # noqa: F401
|
||||
from services.fetchers.wastewater import fetch_wastewater # noqa: F401
|
||||
from services.fetchers.sar_catalog import fetch_sar_catalog # noqa: F401
|
||||
from services.fetchers.sar_products import fetch_sar_products # noqa: F401
|
||||
from services.fetchers.malware import fetch_malware_threats # noqa: F401
|
||||
from services.fetchers.telegram_osint import fetch_telegram_osint # noqa: F401
|
||||
from services.fetchers.cyber_status import fetch_cyber_threats # noqa: F401
|
||||
from services.scm.suppliers import fetch_scm_suppliers # noqa: F401
|
||||
from services.ais_stream import prune_stale_vessels # noqa: F401
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -144,13 +150,18 @@ _STARTUP_HEAVY_REFRESH_DELAY_S = float(os.environ.get("SHADOWBROKER_STARTUP_HEAV
|
||||
_STARTUP_HEAVY_REFRESH_STARTED = False
|
||||
_STARTUP_HEAVY_REFRESH_LOCK = threading.Lock()
|
||||
_FETCH_WORKERS = int(os.environ.get("SHADOWBROKER_FETCH_WORKERS", "8"))
|
||||
_HEAVY_FETCH_WORKERS = int(os.environ.get("SHADOWBROKER_HEAVY_FETCH_WORKERS", "2"))
|
||||
_SLOW_FETCH_CONCURRENCY = int(os.environ.get("SHADOWBROKER_SLOW_FETCH_CONCURRENCY", "4"))
|
||||
_STARTUP_HEAVY_CONCURRENCY = int(os.environ.get("SHADOWBROKER_STARTUP_HEAVY_CONCURRENCY", "2"))
|
||||
|
||||
# Shared thread pool — reused across all fetch cycles instead of creating/destroying per tick
|
||||
# Fast-tier pool (flights, ships, sigint, …). Slow / heavy work uses a separate pool
|
||||
# so Playwright, GDELT, CCTV ingest, etc. cannot starve the 60s refresh path (#375).
|
||||
_SHARED_EXECUTOR = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=max(2, _FETCH_WORKERS), thread_name_prefix="fetch"
|
||||
)
|
||||
_SLOW_EXECUTOR = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=max(1, _HEAVY_FETCH_WORKERS), thread_name_prefix="fetch-slow"
|
||||
)
|
||||
|
||||
|
||||
def _cache_json_safe(value):
|
||||
@@ -319,10 +330,49 @@ def seed_startup_caches() -> None:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scheduler & Orchestration
|
||||
# ---------------------------------------------------------------------------
|
||||
def _executor_for_task_label(label: str) -> concurrent.futures.ThreadPoolExecutor:
|
||||
if label.startswith(("slow-tier", "startup-heavy")):
|
||||
return _SLOW_EXECUTOR
|
||||
return _SHARED_EXECUTOR
|
||||
|
||||
|
||||
def _run_task_with_health_on_executor(
|
||||
executor: concurrent.futures.ThreadPoolExecutor,
|
||||
func,
|
||||
name: str | None = None,
|
||||
) -> None:
|
||||
"""Run a scheduled job on the given pool so it cannot starve fast-tier workers."""
|
||||
task_name = name or getattr(func, "__name__", "task")
|
||||
future = executor.submit(func)
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
future.result(timeout=_TASK_HARD_TIMEOUT_S)
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_success
|
||||
|
||||
record_success(task_name, duration_s=duration)
|
||||
if duration > _SLOW_FETCH_S:
|
||||
logger.warning("task slow: %s took %.2fs", task_name, duration)
|
||||
except concurrent.futures.TimeoutError:
|
||||
future.cancel()
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_failure
|
||||
|
||||
record_failure(task_name, error=TimeoutError(f"{task_name} timed out"), duration_s=duration)
|
||||
logger.error("task timed out: %s (%.2fs)", task_name, duration)
|
||||
except Exception as e:
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_failure
|
||||
|
||||
record_failure(task_name, error=e, duration_s=duration)
|
||||
logger.exception("task failed: %s", task_name)
|
||||
|
||||
|
||||
def _run_tasks(label: str, funcs: list, *, max_concurrency: int | None = None):
|
||||
"""Run tasks concurrently and log any exceptions (do not fail silently)."""
|
||||
if not funcs:
|
||||
return
|
||||
executor = _executor_for_task_label(label)
|
||||
if max_concurrency is None:
|
||||
if label.startswith("slow-tier"):
|
||||
max_concurrency = _SLOW_FETCH_CONCURRENCY
|
||||
@@ -330,12 +380,13 @@ def _run_tasks(label: str, funcs: list, *, max_concurrency: int | None = None):
|
||||
max_concurrency = _STARTUP_HEAVY_CONCURRENCY
|
||||
else:
|
||||
max_concurrency = len(funcs)
|
||||
max_concurrency = max(1, min(max_concurrency, len(funcs)))
|
||||
pool_workers = getattr(executor, "_max_workers", len(funcs))
|
||||
max_concurrency = max(1, min(max_concurrency, len(funcs), pool_workers))
|
||||
|
||||
remaining_funcs = list(funcs)
|
||||
while remaining_funcs:
|
||||
batch, remaining_funcs = remaining_funcs[:max_concurrency], remaining_funcs[max_concurrency:]
|
||||
futures = {_SHARED_EXECUTOR.submit(func): (func.__name__, time.perf_counter()) for func in batch}
|
||||
futures = {executor.submit(func): (func.__name__, time.perf_counter()) for func in batch}
|
||||
_drain_task_futures(label, futures)
|
||||
|
||||
|
||||
@@ -352,6 +403,13 @@ def _drain_task_futures(label: str, futures: dict):
|
||||
record_success(name, duration_s=duration)
|
||||
if duration > _SLOW_FETCH_S:
|
||||
logger.warning(f"{label} task slow: {name} took {duration:.2f}s")
|
||||
except concurrent.futures.TimeoutError:
|
||||
future.cancel()
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_failure
|
||||
|
||||
record_failure(name, error=TimeoutError(f"{name} timed out"), duration_s=duration)
|
||||
logger.error("%s task timed out: %s (%.2fs)", label, name, duration)
|
||||
except Exception as e:
|
||||
duration = time.perf_counter() - start
|
||||
from services.fetch_health import record_failure
|
||||
@@ -405,7 +463,6 @@ def update_slow_data():
|
||||
logger.info("Slow-tier data update starting...")
|
||||
slow_funcs = [
|
||||
fetch_news,
|
||||
fetch_prediction_markets,
|
||||
fetch_earthquakes,
|
||||
fetch_firms_fires,
|
||||
fetch_firms_country_fires,
|
||||
@@ -427,6 +484,9 @@ def update_slow_data():
|
||||
fetch_fishing_activity,
|
||||
fetch_power_plants,
|
||||
fetch_ukraine_air_raid_alerts,
|
||||
fetch_malware_threats,
|
||||
fetch_cyber_threats,
|
||||
fetch_scm_suppliers,
|
||||
]
|
||||
_run_tasks("slow-tier", slow_funcs)
|
||||
# Run correlation engine after all data is fresh
|
||||
@@ -470,6 +530,15 @@ def _load_cctv_cache_for_startup() -> None:
|
||||
logger.warning("Startup CCTV cache load failed (non-fatal): %s", e)
|
||||
|
||||
|
||||
def _load_static_infrastructure_for_startup() -> None:
|
||||
"""Disk-backed reference layers — instant, no network."""
|
||||
for func in (fetch_datacenters, fetch_military_bases, fetch_power_plants):
|
||||
try:
|
||||
func()
|
||||
except Exception as e:
|
||||
logger.warning("Startup static infrastructure load failed for %s: %s", func.__name__, e)
|
||||
|
||||
|
||||
def _run_delayed_startup_heavy_refresh() -> None:
|
||||
if _STARTUP_HEAVY_REFRESH_DELAY_S > 0:
|
||||
logger.info(
|
||||
@@ -482,6 +551,7 @@ def _run_delayed_startup_heavy_refresh() -> None:
|
||||
"startup-heavy",
|
||||
[
|
||||
update_slow_data,
|
||||
fetch_telegram_osint,
|
||||
fetch_volcanoes,
|
||||
fetch_viirs_change_nodes,
|
||||
fetch_unusual_whales,
|
||||
@@ -520,6 +590,7 @@ def update_all_data(*, startup_mode: bool = False):
|
||||
logger.info("Full data update starting (parallel)...")
|
||||
# Preload Meshtastic map cache immediately (instant, from disk)
|
||||
seed_startup_caches()
|
||||
_load_static_infrastructure_for_startup()
|
||||
with _data_lock:
|
||||
meshtastic_seeded = bool(latest_data.get("meshtastic_map_nodes"))
|
||||
if startup_mode:
|
||||
@@ -596,22 +667,9 @@ def update_all_data(*, startup_mode: bool = False):
|
||||
# (the scheduled job also runs every 10 min for ongoing refresh).
|
||||
if startup_mode:
|
||||
try:
|
||||
from services.cctv_pipeline import (
|
||||
TFLJamCamIngestor, LTASingaporeIngestor, AustinTXIngestor,
|
||||
NYCDOTIngestor, CaltransIngestor, ColoradoDOTIngestor,
|
||||
WSDOTIngestor, GeorgiaDOTIngestor, IllinoisDOTIngestor,
|
||||
MichiganDOTIngestor, WindyWebcamsIngestor, DGTNationalIngestor,
|
||||
MadridCityIngestor, OSMTrafficCameraIngestor, get_all_cameras,
|
||||
)
|
||||
from services.cctv_pipeline import OSMALPRCameraIngestor
|
||||
_startup_ingestors = [
|
||||
TFLJamCamIngestor(), LTASingaporeIngestor(), AustinTXIngestor(),
|
||||
NYCDOTIngestor(), CaltransIngestor(), ColoradoDOTIngestor(),
|
||||
WSDOTIngestor(), GeorgiaDOTIngestor(), IllinoisDOTIngestor(),
|
||||
MichiganDOTIngestor(), WindyWebcamsIngestor(), DGTNationalIngestor(),
|
||||
MadridCityIngestor(), OSMTrafficCameraIngestor(),
|
||||
OSMALPRCameraIngestor(),
|
||||
]
|
||||
from services.cctv_pipeline import get_all_cameras, scheduled_cctv_ingestors
|
||||
|
||||
_startup_ingestors = [ing for ing, _name in scheduled_cctv_ingestors()]
|
||||
logger.info("Running CCTV ingest at startup (%d ingestors)...", len(_startup_ingestors))
|
||||
ingest_futures = {
|
||||
_SHARED_EXECUTOR.submit(ing.ingest): ing.__class__.__name__
|
||||
@@ -747,6 +805,39 @@ def start_scheduler():
|
||||
misfire_grace_time=120,
|
||||
)
|
||||
|
||||
# Telegram OSINT — hourly t.me/s channel scrape (kept off the 5-minute slow tier).
|
||||
_telegram_interval_m = max(15, int(os.environ.get("TELEGRAM_OSINT_INTERVAL_MINUTES", "60")))
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_telegram_osint, "fetch_telegram_osint"),
|
||||
"interval",
|
||||
minutes=_telegram_interval_m,
|
||||
next_run_time=datetime.utcnow() + timedelta(seconds=45),
|
||||
id="telegram_osint",
|
||||
max_instances=1,
|
||||
misfire_grace_time=600,
|
||||
)
|
||||
|
||||
# Prediction markets — own jittered cadence (Polymarket/Kalshi clearnet egress).
|
||||
# Kept off the fixed 5-minute slow tier so poll timing is less fingerprintable.
|
||||
from services.fetchers.prediction_markets import fetch_prediction_markets
|
||||
|
||||
_pm_interval_m = max(5, int(os.environ.get("PREDICTION_MARKETS_INTERVAL_MINUTES", "7")))
|
||||
_pm_jitter_s = max(0, int(os.environ.get("PREDICTION_MARKETS_SCHEDULER_JITTER_S", "240")))
|
||||
_pm_initial_max_s = max(0, int(os.environ.get("PREDICTION_MARKETS_INITIAL_DELAY_MAX_S", "180")))
|
||||
_pm_first_run = datetime.utcnow() + timedelta(
|
||||
seconds=random.randint(30, max(30, _pm_initial_max_s))
|
||||
)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_prediction_markets, "fetch_prediction_markets"),
|
||||
"interval",
|
||||
minutes=_pm_interval_m,
|
||||
jitter=_pm_jitter_s,
|
||||
next_run_time=_pm_first_run,
|
||||
id="prediction_markets",
|
||||
max_instances=1,
|
||||
misfire_grace_time=300,
|
||||
)
|
||||
|
||||
# Weather alerts — every 5 minutes (time-critical, separate from slow tier)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_weather_alerts, "fetch_weather_alerts"),
|
||||
@@ -844,7 +935,7 @@ def start_scheduler():
|
||||
|
||||
# GDELT — every 30 minutes (downloads 32 ZIP files per call, avoid rate limits)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_gdelt, "fetch_gdelt"),
|
||||
lambda: _run_task_with_health_on_executor(_SLOW_EXECUTOR, fetch_gdelt, "fetch_gdelt"),
|
||||
"interval",
|
||||
minutes=30,
|
||||
id="gdelt",
|
||||
@@ -852,7 +943,9 @@ def start_scheduler():
|
||||
misfire_grace_time=120,
|
||||
)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(update_liveuamap, "update_liveuamap"),
|
||||
lambda: _run_task_with_health_on_executor(
|
||||
_SLOW_EXECUTOR, update_liveuamap, "update_liveuamap"
|
||||
),
|
||||
"interval",
|
||||
minutes=30,
|
||||
id="liveuamap",
|
||||
@@ -862,39 +955,9 @@ def start_scheduler():
|
||||
|
||||
# CCTV pipeline refresh — runs all ingestors, then refreshes in-memory data.
|
||||
# Delay the first run slightly so startup serves cached/DB-backed data first.
|
||||
from services.cctv_pipeline import (
|
||||
TFLJamCamIngestor,
|
||||
LTASingaporeIngestor,
|
||||
AustinTXIngestor,
|
||||
NYCDOTIngestor,
|
||||
CaltransIngestor,
|
||||
ColoradoDOTIngestor,
|
||||
WSDOTIngestor,
|
||||
GeorgiaDOTIngestor,
|
||||
IllinoisDOTIngestor,
|
||||
MichiganDOTIngestor,
|
||||
WindyWebcamsIngestor,
|
||||
DGTNationalIngestor,
|
||||
MadridCityIngestor,
|
||||
OSMTrafficCameraIngestor,
|
||||
)
|
||||
from services.cctv_pipeline import scheduled_cctv_ingestors
|
||||
|
||||
_cctv_ingestors = [
|
||||
(TFLJamCamIngestor(), "cctv_tfl"),
|
||||
(LTASingaporeIngestor(), "cctv_lta"),
|
||||
(AustinTXIngestor(), "cctv_atx"),
|
||||
(NYCDOTIngestor(), "cctv_nyc"),
|
||||
(CaltransIngestor(), "cctv_caltrans"),
|
||||
(ColoradoDOTIngestor(), "cctv_codot"),
|
||||
(WSDOTIngestor(), "cctv_wsdot"),
|
||||
(GeorgiaDOTIngestor(), "cctv_gdot"),
|
||||
(IllinoisDOTIngestor(), "cctv_idot"),
|
||||
(MichiganDOTIngestor(), "cctv_mdot"),
|
||||
(WindyWebcamsIngestor(), "cctv_windy"),
|
||||
(DGTNationalIngestor(), "cctv_dgt"),
|
||||
(MadridCityIngestor(), "cctv_madrid"),
|
||||
(OSMTrafficCameraIngestor(), "cctv_osm"),
|
||||
]
|
||||
_cctv_ingestors = scheduled_cctv_ingestors()
|
||||
|
||||
def _run_cctv_ingest_cycle():
|
||||
from services.fetchers._store import is_any_active
|
||||
@@ -913,7 +976,9 @@ def start_scheduler():
|
||||
logger.warning(f"CCTV post-ingest refresh failed: {e}")
|
||||
|
||||
_scheduler.add_job(
|
||||
_run_cctv_ingest_cycle,
|
||||
lambda: _run_task_with_health_on_executor(
|
||||
_SLOW_EXECUTOR, _run_cctv_ingest_cycle, "cctv_ingest_cycle"
|
||||
),
|
||||
"interval",
|
||||
minutes=10,
|
||||
id="cctv_ingest",
|
||||
@@ -983,6 +1048,16 @@ def start_scheduler():
|
||||
misfire_grace_time=600,
|
||||
)
|
||||
|
||||
# Sentinel-2 road corridor freight trends — daily (opt-in, heavy CDSE usage)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_road_corridor_trends, "fetch_road_corridor_trends"),
|
||||
"interval",
|
||||
hours=24,
|
||||
id="road_corridor_trends",
|
||||
max_instances=1,
|
||||
misfire_grace_time=3600,
|
||||
)
|
||||
|
||||
# FIMI disinformation index — every 12 hours (weekly editorial feed)
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(fetch_fimi, "fetch_fimi"),
|
||||
@@ -993,9 +1068,9 @@ def start_scheduler():
|
||||
misfire_grace_time=600,
|
||||
)
|
||||
|
||||
# UAP sightings (NUFORC) — weekly on Mondays at 12:00 UTC. The layer is a
|
||||
# rolling last-60-days digest; refreshing once a week is enough cadence
|
||||
# for human-readable map exploration and keeps load on nuforc.org light.
|
||||
# UAP sightings (NUFORC) — weekly Mondays 12:00 UTC. Rolling ~60-day window;
|
||||
# each self-hosted install pulls live nuforc.org so operators see current
|
||||
# reports (typically ~400–500 mappable pins). Disk cache TTL defaults to 7d.
|
||||
_scheduler.add_job(
|
||||
lambda: _run_task_with_health(
|
||||
lambda: fetch_uap_sightings(force_refresh=True),
|
||||
@@ -1130,7 +1205,10 @@ def start_scheduler():
|
||||
def stop_scheduler():
|
||||
if _scheduler:
|
||||
_scheduler.shutdown(wait=False)
|
||||
_SLOW_EXECUTOR.shutdown(wait=False, cancel_futures=True)
|
||||
|
||||
|
||||
def get_latest_data():
|
||||
return get_latest_data_subset(*latest_data.keys())
|
||||
from services.fetchers._store import get_latest_data_deepcopy_snapshot
|
||||
|
||||
return get_latest_data_deepcopy_snapshot()
|
||||
|
||||
@@ -46,6 +46,7 @@ _CRITICAL_WARN = {
|
||||
|
||||
_OPTIONAL = {
|
||||
"AIS_API_KEY": "AIS vessel streaming (ships layer will be empty without it)",
|
||||
"GFW_API_TOKEN": "Global Fishing Watch fishing-vessel activity (fishing_activity layer)",
|
||||
"LTA_ACCOUNT_KEY": "Singapore LTA traffic cameras (CCTV layer)",
|
||||
"PUBLIC_API_KEY": "Optional client auth for public endpoints (recommended for exposed deployments)",
|
||||
}
|
||||
|
||||
@@ -69,6 +69,11 @@ class DashboardData(TypedDict, total=False):
|
||||
sar_scenes: List[Dict[str, Any]]
|
||||
sar_anomalies: List[Dict[str, Any]]
|
||||
sar_aoi_coverage: List[Dict[str, Any]]
|
||||
road_corridor_trends: Dict[str, Any]
|
||||
malware_threats: Dict[str, Any]
|
||||
cyber_threats: Dict[str, Any]
|
||||
scm_suppliers: Dict[str, Any]
|
||||
telegram_osint: Dict[str, Any]
|
||||
|
||||
|
||||
# In-memory store
|
||||
@@ -119,6 +124,11 @@ latest_data: DashboardData = {
|
||||
"sar_scenes": [],
|
||||
"sar_anomalies": [],
|
||||
"sar_aoi_coverage": [],
|
||||
"road_corridor_trends": {"updated_at": None, "corridors": []},
|
||||
"malware_threats": {"threats": [], "total": 0, "timestamp": None},
|
||||
"cyber_threats": {"threats": [], "stats": {}},
|
||||
"scm_suppliers": {"suppliers": [], "total": 0, "critical_count": 0},
|
||||
"telegram_osint": {"posts": [], "total": 0, "geolocated": 0, "timestamp": None},
|
||||
}
|
||||
|
||||
# Per-source freshness timestamps
|
||||
@@ -230,27 +240,35 @@ _active_layers_version: int = 0
|
||||
def bump_active_layers_version() -> None:
|
||||
"""Increment the active-layer version when frontend toggles change response shape."""
|
||||
global _active_layers_version
|
||||
_active_layers_version += 1
|
||||
with _data_lock:
|
||||
_active_layers_version += 1
|
||||
|
||||
|
||||
def get_active_layers_version() -> int:
|
||||
"""Return the current active-layer version (for ETag generation)."""
|
||||
return _active_layers_version
|
||||
with _data_lock:
|
||||
return _active_layers_version
|
||||
|
||||
|
||||
def get_latest_data_subset(*keys: str) -> DashboardData:
|
||||
"""Return a deep snapshot of only the requested top-level keys.
|
||||
|
||||
This avoids cloning the entire dashboard store for endpoints that only need
|
||||
a small tier-specific subset. Deep copy ensures callers cannot mutate
|
||||
nested structures (e.g. individual flight dicts) and affect the live store.
|
||||
Grabs references under the lock, then deep-copies outside it so fetcher
|
||||
writers are not blocked for the duration of a large clone (#375).
|
||||
"""
|
||||
with _data_lock:
|
||||
snap: DashboardData = {}
|
||||
for key in keys:
|
||||
value = latest_data.get(key)
|
||||
snap[key] = copy.deepcopy(value)
|
||||
return snap
|
||||
items = [(key, latest_data.get(key)) for key in keys]
|
||||
snap: DashboardData = {}
|
||||
for key, value in items:
|
||||
snap[key] = copy.deepcopy(value)
|
||||
return snap
|
||||
|
||||
|
||||
def get_latest_data_deepcopy_snapshot() -> DashboardData:
|
||||
"""Deep-copy the full dashboard for legacy /api/live-data consumers."""
|
||||
with _data_lock:
|
||||
items = list(latest_data.items())
|
||||
return {key: copy.deepcopy(value) for key, value in items}
|
||||
|
||||
|
||||
def get_latest_data_subset_refs(*keys: str) -> DashboardData:
|
||||
@@ -320,6 +338,12 @@ active_layers: dict[str, bool] = {
|
||||
"ai_intel": True,
|
||||
"crowdthreat": False,
|
||||
"sar": True,
|
||||
"road_corridor_trends": False,
|
||||
"malware_c2": False,
|
||||
"submarine_cables": False,
|
||||
"scm_suppliers": False,
|
||||
"cyber_threats": False,
|
||||
"telegram_osint": True,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -38,8 +38,6 @@ _S3_NS = "{http://s3.amazonaws.com/doc/2006-03-01/}"
|
||||
_REFRESH_INTERVAL_S = 5 * 24 * 3600
|
||||
_LIST_TIMEOUT_S = 30
|
||||
_DOWNLOAD_TIMEOUT_S = 600
|
||||
from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT
|
||||
|
||||
_lock = threading.RLock()
|
||||
_aircraft_by_hex: dict[str, dict[str, str]] = {}
|
||||
_last_refresh = 0.0
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
"""CISA KEV + cyber threat stats (Osiris port)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def fetch_cyber_threats() -> dict[str, Any]:
|
||||
if not is_any_active("cyber_threats"):
|
||||
return latest_data.get("cyber_threats") or {"threats": [], "stats": {}}
|
||||
|
||||
results: dict[str, Any] = {"threats": [], "stats": {}, "timestamp": datetime.now(timezone.utc).isoformat()}
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json",
|
||||
timeout=15,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
vulns = data.get("vulnerabilities") or []
|
||||
results["stats"]["cisa_total"] = len(vulns)
|
||||
now = datetime.now(timezone.utc)
|
||||
recent = []
|
||||
for v in vulns:
|
||||
try:
|
||||
added = datetime.fromisoformat(v.get("dateAdded", "").replace("Z", "+00:00"))
|
||||
days = (now - added).total_seconds() / 86400
|
||||
except Exception:
|
||||
continue
|
||||
if days <= 30:
|
||||
recent.append(v)
|
||||
recent = recent[:10]
|
||||
results["threats"] = [
|
||||
{
|
||||
"id": v.get("cveID"),
|
||||
"name": v.get("vulnerabilityName"),
|
||||
"vendor": v.get("vendorProject"),
|
||||
"product": v.get("product"),
|
||||
"severity": "CRITICAL",
|
||||
"date": v.get("dateAdded"),
|
||||
"due": v.get("dueDate"),
|
||||
"source": "CISA KEV",
|
||||
}
|
||||
for v in recent
|
||||
]
|
||||
except Exception as exc:
|
||||
logger.warning("CISA KEV fetch failed: %s", exc)
|
||||
|
||||
count = len(results["threats"])
|
||||
results["stats"]["active_cves"] = count
|
||||
results["stats"]["threat_level"] = "CRITICAL" if count >= 8 else "HIGH" if count >= 4 else "ELEVATED"
|
||||
|
||||
with _data_lock:
|
||||
latest_data["cyber_threats"] = results
|
||||
_mark_fresh("cyber_threats")
|
||||
return results
|
||||
@@ -692,7 +692,8 @@ _NUFORC_TILESET = "nuforc.cmm18aqea06bu1mmselhpnano-0ce5v"
|
||||
_NUFORC_TOKEN = os.environ.get("NUFORC_MAPBOX_TOKEN", "").strip()
|
||||
_NUFORC_RADIUS_M = 200_000 # 200 km query radius
|
||||
_NUFORC_LIMIT = 50 # max features per tilequery call
|
||||
_NUFORC_RECENT_DAYS = int(os.environ.get("NUFORC_RECENT_DAYS", "60"))
|
||||
# Rolling window shown on the map (~2 calendar months). Override via NUFORC_RECENT_DAYS.
|
||||
_NUFORC_RECENT_DAYS = max(1, int(os.environ.get("NUFORC_RECENT_DAYS", "60")))
|
||||
_NUFORC_HF_FALLBACK_LIMIT = max(25, int(os.environ.get("NUFORC_HF_FALLBACK_LIMIT", "250")))
|
||||
_NUFORC_HF_GEOCODE_LIMIT = max(25, int(os.environ.get("NUFORC_HF_GEOCODE_LIMIT", "150")))
|
||||
_NUFORC_GEOCODE_WORKERS = max(1, int(os.environ.get("NUFORC_GEOCODE_WORKERS", "1")))
|
||||
@@ -700,6 +701,12 @@ _NUFORC_GEOCODE_WORKERS = max(1, int(os.environ.get("NUFORC_GEOCODE_WORKERS", "1
|
||||
# practice, so a 0.3s spacing keeps us well under any soft throttle while
|
||||
# still rebuilding a full 12-month window in ~10 minutes.
|
||||
_NUFORC_GEOCODE_SPACING_S = float(os.environ.get("NUFORC_GEOCODE_SPACING_S", "0.3"))
|
||||
# Disk cache TTL — match the weekly scheduler so restarts between fetches still
|
||||
# serve the same rolling 60-day snapshot without hammering nuforc.org daily.
|
||||
_NUFORC_CACHE_TTL_S = max(
|
||||
3600,
|
||||
int(os.environ.get("NUFORC_CACHE_TTL_HOURS", "168")) * 3600,
|
||||
)
|
||||
_NUFORC_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data"
|
||||
_NUFORC_SIGHTINGS_CACHE_FILE = _NUFORC_DATA_DIR / "nuforc_recent_sightings.json"
|
||||
_NUFORC_LOCATION_CACHE_FILE = _NUFORC_DATA_DIR / "nuforc_location_cache.json"
|
||||
@@ -766,6 +773,35 @@ def _fetch_nuforc_tilequery(lng: float, lat: float) -> list[dict]:
|
||||
return []
|
||||
|
||||
|
||||
def _uap_cutoff_date_str() -> str:
|
||||
return (datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)).strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def _uap_sighting_date_str(sighting: dict) -> str | None:
|
||||
"""Normalize a sighting row to YYYY-MM-DD for window filtering."""
|
||||
from services.fetchers.nuforc_enrichment import _parse_date
|
||||
|
||||
raw = str(sighting.get("date_time") or sighting.get("occurred") or "").strip()
|
||||
if not raw:
|
||||
return None
|
||||
parsed = _parse_date(raw)
|
||||
if parsed:
|
||||
return parsed
|
||||
if len(raw) >= 10 and raw[4] == "-" and raw[7] == "-":
|
||||
return raw[:10]
|
||||
return None
|
||||
|
||||
|
||||
def _filter_uap_sightings_recent(sightings: list[dict]) -> list[dict]:
|
||||
"""Drop anything outside the rolling NUFORC_RECENT_DAYS window."""
|
||||
cutoff = _uap_cutoff_date_str()
|
||||
return [
|
||||
sighting
|
||||
for sighting in sightings
|
||||
if (_uap_sighting_date_str(sighting) or "") >= cutoff
|
||||
]
|
||||
|
||||
|
||||
def _parse_nuforc_tile_date(value: str) -> datetime | None:
|
||||
raw = str(value or "").strip()
|
||||
if not raw:
|
||||
@@ -802,19 +838,41 @@ def _load_nuforc_sightings_cache(*, force_refresh: bool = False) -> list[dict] |
|
||||
built_dt = datetime.fromisoformat(built) if built else None
|
||||
if built_dt is None:
|
||||
return None
|
||||
if (datetime.utcnow() - built_dt).total_seconds() > 86400:
|
||||
if (datetime.utcnow() - built_dt).total_seconds() > _NUFORC_CACHE_TTL_S:
|
||||
return None
|
||||
if raw.get("cutoff_days") != _NUFORC_RECENT_DAYS:
|
||||
logger.info(
|
||||
"UAP sightings: cache cutoff_days mismatch (%s != %s); rebuilding",
|
||||
raw.get("cutoff_days"),
|
||||
_NUFORC_RECENT_DAYS,
|
||||
)
|
||||
return None
|
||||
sightings = raw.get("sightings")
|
||||
if isinstance(sightings, list):
|
||||
if len(sightings) <= 0:
|
||||
logger.info("UAP sightings: cache is fresh but empty; rebuilding")
|
||||
return None
|
||||
filtered = _filter_uap_sightings_recent(sightings)
|
||||
if not filtered:
|
||||
logger.warning(
|
||||
"UAP sightings: cache had %d rows but none within last %d days; rebuilding",
|
||||
len(sightings),
|
||||
_NUFORC_RECENT_DAYS,
|
||||
)
|
||||
return None
|
||||
if len(filtered) < len(sightings):
|
||||
logger.info(
|
||||
"UAP sightings: dropped %d stale cached rows outside %d-day window",
|
||||
len(sightings) - len(filtered),
|
||||
_NUFORC_RECENT_DAYS,
|
||||
)
|
||||
logger.info(
|
||||
"UAP sightings: loaded %d cached reports from %s",
|
||||
len(sightings),
|
||||
"UAP sightings: loaded %d cached reports from %s (within %d-day window)",
|
||||
len(filtered),
|
||||
built,
|
||||
_NUFORC_RECENT_DAYS,
|
||||
)
|
||||
return sightings
|
||||
return filtered
|
||||
except Exception as e:
|
||||
logger.warning("UAP sightings: cache load error: %s", e)
|
||||
return None
|
||||
@@ -828,6 +886,7 @@ def _save_nuforc_sightings_cache(sightings: list[dict]) -> None:
|
||||
_NUFORC_DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
payload = {
|
||||
"built": datetime.utcnow().isoformat(),
|
||||
"cutoff_days": _NUFORC_RECENT_DAYS,
|
||||
"count": len(sightings),
|
||||
"sightings": sightings,
|
||||
}
|
||||
@@ -1035,27 +1094,128 @@ def _nuforc_months_for_window(days: int) -> list[str]:
|
||||
return months
|
||||
|
||||
|
||||
def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
"""Pull one month of NUFORC sightings via the live wpDataTables AJAX.
|
||||
|
||||
Returns a list of raw row dicts with the fields we care about:
|
||||
id, occurred (YYYY-MM-DD), posted (YYYY-MM-DD), city, state, country,
|
||||
shape_raw, summary, explanation. Empty list on any failure — caller
|
||||
decides whether a failure is fatal.
|
||||
"""
|
||||
def _parse_nuforc_live_datatables_rows(raw_rows: list) -> list[dict]:
|
||||
"""Parse wpDataTables ``data`` array into normalized row dicts."""
|
||||
from services.fetchers.nuforc_enrichment import _parse_date
|
||||
|
||||
curl_bin = shutil.which("curl") or "curl"
|
||||
out: list[dict] = []
|
||||
for raw in raw_rows:
|
||||
if not isinstance(raw, list) or len(raw) < 8:
|
||||
continue
|
||||
link_html = str(raw[0] or "")
|
||||
occurred_raw = str(raw[1] or "")
|
||||
city = str(raw[2] or "").strip()
|
||||
state = str(raw[3] or "").strip()
|
||||
country = str(raw[4] or "").strip()
|
||||
shape_raw = (str(raw[5] or "").strip() or "Unknown")
|
||||
summary = str(raw[6] or "").strip()
|
||||
reported_raw = str(raw[7] or "")
|
||||
explanation = str(raw[9] or "").strip() if len(raw) > 9 and raw[9] else ""
|
||||
|
||||
occurred_ymd = _parse_date(occurred_raw)
|
||||
if not occurred_ymd:
|
||||
continue
|
||||
if not city and not state and not country:
|
||||
continue
|
||||
|
||||
id_match = _NUFORC_LIVE_SIGHTING_ID_RE.search(link_html)
|
||||
if id_match:
|
||||
sighting_id = f"NUFORC-{id_match.group(1)}"
|
||||
else:
|
||||
digest = hashlib.sha1(
|
||||
f"{occurred_ymd}|{city}|{state}|{summary}".encode("utf-8", "ignore")
|
||||
).hexdigest()[:12]
|
||||
sighting_id = f"NUFORC-{digest}"
|
||||
|
||||
if summary and len(summary) > 280:
|
||||
summary = summary[:277] + "..."
|
||||
if not summary:
|
||||
summary = "Sighting reported"
|
||||
|
||||
out.append({
|
||||
"id": sighting_id,
|
||||
"occurred": occurred_ymd,
|
||||
"posted": _parse_date(reported_raw) or occurred_ymd,
|
||||
"city": city,
|
||||
"state": state,
|
||||
"country": country,
|
||||
"shape_raw": shape_raw,
|
||||
"summary": summary,
|
||||
"explanation": explanation,
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
def _nuforc_fetch_month_live_requests(yyyymm: str) -> list[dict]:
|
||||
"""Live NUFORC month fetch via requests (Windows-safe when curl is disabled)."""
|
||||
import requests
|
||||
|
||||
index_url = _NUFORC_LIVE_INDEX_URL.format(yyyymm=yyyymm)
|
||||
ajax_url = _NUFORC_LIVE_AJAX_URL.format(yyyymm=yyyymm)
|
||||
|
||||
if not external_curl_fallback_enabled():
|
||||
headers = {"User-Agent": _nuforc_live_user_agent()}
|
||||
session = requests.Session()
|
||||
session.headers.update(headers)
|
||||
try:
|
||||
index_res = session.get(index_url, timeout=60)
|
||||
except requests.RequestException as e:
|
||||
logger.warning("NUFORC live (requests): index fetch failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if index_res.status_code != 200 or not index_res.text:
|
||||
logger.warning(
|
||||
"NUFORC live: external curl disabled on Windows for %s; "
|
||||
"set SHADOWBROKER_ENABLE_WINDOWS_CURL_FALLBACK=1 to opt in.",
|
||||
"NUFORC live (requests): index HTTP %s for %s",
|
||||
index_res.status_code,
|
||||
yyyymm,
|
||||
)
|
||||
return []
|
||||
nonce_match = _NUFORC_LIVE_NONCE_RE.search(index_res.text)
|
||||
if not nonce_match:
|
||||
logger.warning("NUFORC live (requests): wdtNonce not found for %s", yyyymm)
|
||||
return []
|
||||
nonce = nonce_match.group(1)
|
||||
post_data = (
|
||||
"draw=1"
|
||||
"&columns%5B0%5D%5Bdata%5D=0&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=false"
|
||||
"&columns%5B1%5D%5Bdata%5D=1&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true"
|
||||
"&order%5B0%5D%5Bcolumn%5D=1&order%5B0%5D%5Bdir%5D=desc"
|
||||
"&start=0&length=-1"
|
||||
"&search%5Bvalue%5D=&search%5Bregex%5D=false"
|
||||
f"&wdtNonce={nonce}"
|
||||
)
|
||||
try:
|
||||
ajax_res = session.post(
|
||||
ajax_url,
|
||||
data=post_data,
|
||||
headers={
|
||||
**headers,
|
||||
"Referer": index_url,
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
except requests.RequestException as e:
|
||||
logger.warning("NUFORC live (requests): ajax failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
if ajax_res.status_code != 200 or not ajax_res.text:
|
||||
logger.warning(
|
||||
"NUFORC live (requests): ajax HTTP %s for %s",
|
||||
ajax_res.status_code,
|
||||
yyyymm,
|
||||
)
|
||||
return []
|
||||
try:
|
||||
payload = ajax_res.json()
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning("NUFORC live (requests): ajax JSON decode failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
return _parse_nuforc_live_datatables_rows(payload.get("data") or [])
|
||||
|
||||
|
||||
def _nuforc_fetch_month_live_curl(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
"""Pull one month of NUFORC sightings via curl + wpDataTables AJAX."""
|
||||
curl_bin = shutil.which("curl") or "curl"
|
||||
index_url = _NUFORC_LIVE_INDEX_URL.format(yyyymm=yyyymm)
|
||||
ajax_url = _NUFORC_LIVE_AJAX_URL.format(yyyymm=yyyymm)
|
||||
|
||||
# Step 1: GET the month index to capture session cookies + fresh nonce.
|
||||
try:
|
||||
@@ -1125,65 +1285,27 @@ def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
logger.warning("NUFORC live: ajax JSON decode failed for %s: %s", yyyymm, e)
|
||||
return []
|
||||
|
||||
raw_rows = payload.get("data") or []
|
||||
out: list[dict] = []
|
||||
for raw in raw_rows:
|
||||
if not isinstance(raw, list) or len(raw) < 8:
|
||||
continue
|
||||
link_html = str(raw[0] or "")
|
||||
occurred_raw = str(raw[1] or "")
|
||||
city = str(raw[2] or "").strip()
|
||||
state = str(raw[3] or "").strip()
|
||||
country = str(raw[4] or "").strip()
|
||||
shape_raw = (str(raw[5] or "").strip() or "Unknown")
|
||||
summary = str(raw[6] or "").strip()
|
||||
reported_raw = str(raw[7] or "")
|
||||
explanation = str(raw[9] or "").strip() if len(raw) > 9 and raw[9] else ""
|
||||
return _parse_nuforc_live_datatables_rows(payload.get("data") or [])
|
||||
|
||||
occurred_ymd = _parse_date(occurred_raw)
|
||||
if not occurred_ymd:
|
||||
continue
|
||||
if not city and not state and not country:
|
||||
continue
|
||||
|
||||
id_match = _NUFORC_LIVE_SIGHTING_ID_RE.search(link_html)
|
||||
if id_match:
|
||||
sighting_id = f"NUFORC-{id_match.group(1)}"
|
||||
else:
|
||||
digest = hashlib.sha1(
|
||||
f"{occurred_ymd}|{city}|{state}|{summary}".encode("utf-8", "ignore")
|
||||
).hexdigest()[:12]
|
||||
sighting_id = f"NUFORC-{digest}"
|
||||
|
||||
if summary and len(summary) > 280:
|
||||
summary = summary[:277] + "..."
|
||||
if not summary:
|
||||
summary = "Sighting reported"
|
||||
|
||||
out.append({
|
||||
"id": sighting_id,
|
||||
"occurred": occurred_ymd,
|
||||
"posted": _parse_date(reported_raw) or occurred_ymd,
|
||||
"city": city,
|
||||
"state": state,
|
||||
"country": country,
|
||||
"shape_raw": shape_raw,
|
||||
"summary": summary,
|
||||
"explanation": explanation,
|
||||
})
|
||||
return out
|
||||
def _nuforc_fetch_month_live(yyyymm: str, cookie_jar: Path) -> list[dict]:
|
||||
"""Pull one month of NUFORC sightings via live wpDataTables AJAX."""
|
||||
if external_curl_fallback_enabled():
|
||||
rows = _nuforc_fetch_month_live_curl(yyyymm, cookie_jar)
|
||||
if rows:
|
||||
return rows
|
||||
return _nuforc_fetch_month_live_requests(yyyymm)
|
||||
|
||||
|
||||
def _build_recent_uap_sightings() -> list[dict]:
|
||||
"""Build the rolling 1-year UAP sightings layer from live NUFORC data.
|
||||
"""Build the rolling UAP sightings layer from live NUFORC data.
|
||||
|
||||
Hits nuforc.org's public sub-index once per month in the window, drops
|
||||
anything outside the exact day-precision cutoff, dedupes by sighting id,
|
||||
geocodes city+state via the existing location cache, and returns rows
|
||||
keyed to the same schema the frontend already renders.
|
||||
"""
|
||||
cutoff_dt = datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)
|
||||
cutoff_str = cutoff_dt.strftime("%Y-%m-%d")
|
||||
cutoff_str = _uap_cutoff_date_str()
|
||||
months = _nuforc_months_for_window(_NUFORC_RECENT_DAYS)
|
||||
|
||||
try:
|
||||
@@ -1530,11 +1652,12 @@ def _build_uap_sightings_from_hf_mirror() -> list[dict]:
|
||||
|
||||
@with_retry(max_retries=1, base_delay=5)
|
||||
def fetch_uap_sightings(*, force_refresh: bool = False):
|
||||
"""Fetch last-year UAP sightings from NUFORC.
|
||||
"""Fetch rolling-window UAP sightings from live NUFORC.
|
||||
|
||||
Startup reads the cached daily snapshot when it is still fresh. The daily
|
||||
scheduler forces a rebuild so this layer updates once per day instead of
|
||||
churning continuously.
|
||||
Startup reads the cached snapshot when still within NUFORC_CACHE_TTL_HOURS
|
||||
(default 168h / one week). The weekly scheduler forces a rebuild so every
|
||||
install refreshes the same ~60-day layer without daily load on nuforc.org.
|
||||
Operators can also POST /api/refresh (admin) to pull immediately.
|
||||
"""
|
||||
from services.fetchers._store import is_any_active
|
||||
|
||||
@@ -1567,12 +1690,16 @@ def fetch_uap_sightings(*, force_refresh: bool = False):
|
||||
live_error,
|
||||
)
|
||||
|
||||
if sightings:
|
||||
sightings = _filter_uap_sightings_recent(sightings)
|
||||
|
||||
with _data_lock:
|
||||
latest_data["uap_sightings"] = sightings or []
|
||||
if sightings:
|
||||
_mark_fresh("uap_sightings")
|
||||
return
|
||||
|
||||
# Unreachable legacy Mapbox tilequery path (kept for reference).
|
||||
cutoff = datetime.utcnow() - timedelta(days=_NUFORC_RECENT_DAYS)
|
||||
|
||||
# Query the grid concurrently (up to 8 threads)
|
||||
|
||||
@@ -20,17 +20,9 @@ def _env_flag(name: str) -> str:
|
||||
|
||||
|
||||
def liveuamap_scraper_enabled() -> bool:
|
||||
"""Return whether the Playwright-based LiveUAMap scraper should run.
|
||||
from services.liveuamap_settings import liveuamap_scraper_enabled as _enabled
|
||||
|
||||
It is useful enrichment, but it starts a browser/Node driver and must not be
|
||||
allowed to destabilize Windows local startup.
|
||||
"""
|
||||
setting = _env_flag("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER")
|
||||
if setting in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
if setting in {"0", "false", "no", "off"}:
|
||||
return False
|
||||
return os.name != "nt"
|
||||
return _enabled()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -210,10 +202,17 @@ def update_liveuamap():
|
||||
if not is_any_active("global_incidents"):
|
||||
return
|
||||
if not liveuamap_scraper_enabled():
|
||||
logger.info(
|
||||
"Liveuamap scraper disabled for this runtime; set "
|
||||
"SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=1 to opt in."
|
||||
)
|
||||
from services.liveuamap_settings import liveuamap_requires_ui_opt_in
|
||||
|
||||
if liveuamap_requires_ui_opt_in():
|
||||
logger.info(
|
||||
"Liveuamap scraper disabled: enable Global Incidents in the UI to "
|
||||
"consent, or set SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=1."
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Liveuamap scraper disabled; set SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER=1 to opt in."
|
||||
)
|
||||
return
|
||||
logger.info("Running scheduled Liveuamap scraper...")
|
||||
try:
|
||||
@@ -279,6 +278,16 @@ _FISHING_FETCH_INTERVAL_S = 3600 # once per hour — GFW data has ~5 day lag
|
||||
_last_fishing_fetch_ts: float = 0.0
|
||||
|
||||
|
||||
def _gfw_int_env(name: str, default: int, *, minimum: int = 1, maximum: int | None = None) -> int:
|
||||
try:
|
||||
value = int(os.environ.get(name, str(default)) or default)
|
||||
except (TypeError, ValueError):
|
||||
value = default
|
||||
if maximum is not None:
|
||||
value = min(maximum, value)
|
||||
return max(minimum, value)
|
||||
|
||||
|
||||
@with_retry(max_retries=1, base_delay=5)
|
||||
def fetch_fishing_activity():
|
||||
"""Fetch recent fishing events from Global Fishing Watch (~5 day lag)."""
|
||||
@@ -301,10 +310,16 @@ def fetch_fishing_activity():
|
||||
try:
|
||||
import datetime as _dt
|
||||
|
||||
# GFW publishes with ~5 day lag; windows shorter than ~7 days often return 0 events.
|
||||
lookback_days = _gfw_int_env("GFW_EVENTS_LOOKBACK_DAYS", 7, minimum=1, maximum=14)
|
||||
max_pages = _gfw_int_env("GFW_EVENTS_MAX_PAGES", 10, minimum=1, maximum=100)
|
||||
timeout_s = _gfw_int_env("GFW_EVENTS_TIMEOUT_S", 90, minimum=30, maximum=180)
|
||||
_end = _dt.date.today().isoformat()
|
||||
_start = (_dt.date.today() - _dt.timedelta(days=7)).isoformat()
|
||||
page_size = max(1, int(os.environ.get("GFW_EVENTS_PAGE_SIZE", "500") or "500"))
|
||||
_start = (_dt.date.today() - _dt.timedelta(days=lookback_days)).isoformat()
|
||||
page_size = _gfw_int_env("GFW_EVENTS_PAGE_SIZE", 500, minimum=1, maximum=1000)
|
||||
offset = 0
|
||||
pages_fetched = 0
|
||||
total_available: int | None = None
|
||||
seen_offsets: set[int] = set()
|
||||
seen_ids: set[str] = set()
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
@@ -325,7 +340,7 @@ def fetch_fishing_activity():
|
||||
}
|
||||
)
|
||||
url = f"https://gateway.api.globalfishingwatch.org/v3/events?{query}"
|
||||
response = fetch_with_curl(url, timeout=30, headers=headers)
|
||||
response = fetch_with_curl(url, timeout=timeout_s, headers=headers)
|
||||
if response.status_code != 200:
|
||||
logger.warning(
|
||||
"Fishing activity fetch failed at offset=%s: HTTP %s",
|
||||
@@ -335,10 +350,16 @@ def fetch_fishing_activity():
|
||||
break
|
||||
|
||||
payload = response.json() or {}
|
||||
if total_available is None:
|
||||
try:
|
||||
total_available = int(payload.get("total")) if payload.get("total") is not None else None
|
||||
except (TypeError, ValueError):
|
||||
total_available = None
|
||||
entries = payload.get("entries", [])
|
||||
if not entries:
|
||||
break
|
||||
|
||||
pages_fetched += 1
|
||||
added_this_page = 0
|
||||
for e in entries:
|
||||
pos = e.get("position", {})
|
||||
@@ -373,6 +394,15 @@ def fetch_fishing_activity():
|
||||
if len(entries) < page_size:
|
||||
break
|
||||
|
||||
if pages_fetched >= max_pages:
|
||||
logger.info(
|
||||
"Fishing activity: capped at %s pages (%s events fetched; GFW total=%s)",
|
||||
max_pages,
|
||||
len(events),
|
||||
total_available if total_available is not None else "unknown",
|
||||
)
|
||||
break
|
||||
|
||||
next_offset = payload.get("nextOffset")
|
||||
if next_offset is None:
|
||||
next_offset = (payload.get("pagination") or {}).get("nextOffset")
|
||||
|
||||
@@ -235,11 +235,11 @@ _DC_GEOCODED_PATH = Path(__file__).parent.parent.parent / "data" / "datacenters_
|
||||
|
||||
|
||||
def fetch_datacenters():
|
||||
"""Load geocoded data centers (5K+ street-level precise locations)."""
|
||||
from services.fetchers._store import is_any_active
|
||||
"""Load geocoded data centers (5K+ street-level precise locations).
|
||||
|
||||
if not is_any_active("datacenters"):
|
||||
return
|
||||
Always loads from disk; /api/live-data/slow gates the payload on the
|
||||
datacenters layer toggle so enabling the layer can render immediately.
|
||||
"""
|
||||
dcs = []
|
||||
try:
|
||||
if not _DC_GEOCODED_PATH.exists():
|
||||
|
||||
@@ -0,0 +1,107 @@
|
||||
"""Malware C2 / URLhaus feed (abuse.ch, Osiris port)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
COUNTRY_CENTROIDS: dict[str, tuple[float, float]] = {
|
||||
"AF": (65, 33), "AL": (20, 41), "DZ": (3, 28), "AR": (-64, -34), "AU": (134, -25),
|
||||
"AT": (14, 47.5), "BE": (4, 50.8), "BR": (-51, -10), "CA": (-96, 62), "CN": (105, 35),
|
||||
"DE": (10, 51), "FR": (2, 46), "GB": (-2, 54), "IN": (79, 22), "IR": (53, 32),
|
||||
"IT": (12.5, 42.8), "JP": (138, 36), "KR": (128, 36), "MX": (-102, 23.5), "NL": (5.5, 52.5),
|
||||
"PL": (19.5, 52), "RU": (100, 60), "SG": (103.8, 1.35), "TW": (121, 23.7), "UA": (32, 49),
|
||||
"US": (-97, 38), "VN": (106, 16),
|
||||
}
|
||||
|
||||
|
||||
def fetch_malware_threats() -> list[dict[str, Any]]:
|
||||
if not is_any_active("malware_c2"):
|
||||
return latest_data.get("malware_threats") or []
|
||||
|
||||
threats: list[dict[str, Any]] = []
|
||||
threat_id = 0
|
||||
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://feodotracker.abuse.ch/downloads/ipblocklist.json",
|
||||
timeout=10,
|
||||
headers={"User-Agent": "Shadowbroker/1.0", "Accept": "application/json"},
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
entries = resp.json()
|
||||
if not isinstance(entries, list):
|
||||
entries = []
|
||||
for entry in entries[:200]:
|
||||
cc = entry.get("country")
|
||||
if not cc or cc not in COUNTRY_CENTROIDS:
|
||||
continue
|
||||
lng, lat = COUNTRY_CENTROIDS[cc]
|
||||
j_lng = ((threat_id * 173.7) % 200 - 100) / 100 * 4
|
||||
j_lat = ((threat_id * 293.1) % 200 - 100) / 100 * 4
|
||||
threats.append(
|
||||
{
|
||||
"id": f"feodo-{threat_id}",
|
||||
"lat": lat + j_lat,
|
||||
"lng": lng + j_lng,
|
||||
"ip": entry.get("ip_address") or "unknown",
|
||||
"port": entry.get("dst_port") or 0,
|
||||
"malware": entry.get("malware") or "unknown",
|
||||
"status": entry.get("status") or "active",
|
||||
"first_seen": entry.get("first_seen"),
|
||||
"last_online": entry.get("last_online"),
|
||||
"country": cc,
|
||||
"threat_type": "botnet_c2",
|
||||
}
|
||||
)
|
||||
threat_id += 1
|
||||
except Exception as exc:
|
||||
logger.warning("Feodo fetch failed: %s", exc)
|
||||
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://urlhaus-api.abuse.ch/v1/urls/recent/limit/100/",
|
||||
timeout=8,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
urls = (resp.json() or {}).get("urls") or []
|
||||
for u in urls:
|
||||
cc = u.get("country")
|
||||
if not cc or cc not in COUNTRY_CENTROIDS:
|
||||
cc = next(iter(COUNTRY_CENTROIDS))
|
||||
lng, lat = COUNTRY_CENTROIDS[cc]
|
||||
j_lng = ((threat_id * 137.3) % 200 - 100) / 100 * 5
|
||||
j_lat = ((threat_id * 211.7) % 200 - 100) / 100 * 5
|
||||
threats.append(
|
||||
{
|
||||
"id": f"urlhaus-{threat_id}",
|
||||
"lat": lat + j_lat,
|
||||
"lng": lng + j_lng,
|
||||
"ip": u.get("host") or "unknown",
|
||||
"port": 0,
|
||||
"malware": ", ".join(u.get("tags") or []) or u.get("threat") or "malware",
|
||||
"status": u.get("url_status") or "online",
|
||||
"first_seen": u.get("dateadded"),
|
||||
"country": cc,
|
||||
"threat_type": "malware_url",
|
||||
}
|
||||
)
|
||||
threat_id += 1
|
||||
except Exception as exc:
|
||||
logger.debug("URLhaus supplement failed: %s", exc)
|
||||
|
||||
payload = {
|
||||
"threats": threats,
|
||||
"total": len(threats),
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"source": "abuse.ch Feodo Tracker + URLhaus",
|
||||
}
|
||||
with _data_lock:
|
||||
latest_data["malware_threats"] = payload
|
||||
_mark_fresh("malware_threats")
|
||||
return threats
|
||||
@@ -188,8 +188,8 @@ def fetch_meshtastic_nodes():
|
||||
callsign = ""
|
||||
|
||||
send_callsign_header = str(
|
||||
_os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")
|
||||
).strip().lower() not in {"0", "false", "no", "off", ""}
|
||||
_os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "false")
|
||||
).strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
# Round 7a: outbound_user_agent already includes the per-install handle.
|
||||
# The optional Meshtastic callsign is appended as additional context so
|
||||
|
||||
@@ -158,21 +158,26 @@ _KEYWORD_COORDS = {
|
||||
_SORTED_KEYWORDS = sorted(_KEYWORD_COORDS.items(), key=lambda x: len(x[0]), reverse=True)
|
||||
|
||||
|
||||
def resolve_coords_match(text: str) -> tuple[tuple[float, float], str] | None:
|
||||
"""Return ((lat, lng), matched_keyword) for the most specific keyword hit."""
|
||||
padded_text = f" {text} "
|
||||
for kw, coords in _SORTED_KEYWORDS:
|
||||
if kw.startswith(" ") or kw.endswith(" "):
|
||||
if kw in padded_text:
|
||||
return coords, kw
|
||||
elif re.search(r"\b" + re.escape(kw) + r"\b", text):
|
||||
return coords, kw
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_coords(text: str) -> tuple[float, float] | None:
|
||||
"""Return (lat, lng) for the most specific keyword match, or None.
|
||||
|
||||
Longer keywords are tried first. Space-padded keywords (" us ", " uk ")
|
||||
use substring matching on padded text; all others use word-boundary regex.
|
||||
"""
|
||||
padded_text = f" {text} "
|
||||
for kw, coords in _SORTED_KEYWORDS:
|
||||
if kw.startswith(" ") or kw.endswith(" "):
|
||||
if kw in padded_text:
|
||||
return coords
|
||||
else:
|
||||
if re.search(r'\b' + re.escape(kw) + r'\b', text):
|
||||
return coords
|
||||
return None
|
||||
match = resolve_coords_match(text)
|
||||
return match[0] if match else None
|
||||
|
||||
|
||||
@with_retry(max_retries=1, base_delay=2)
|
||||
|
||||
@@ -9,6 +9,7 @@ import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from urllib.parse import urlencode
|
||||
@@ -21,23 +22,34 @@ _prev_probabilities: dict[str, float] = {}
|
||||
_market_cache = TTLCache(maxsize=1, ttl=300)
|
||||
_POLYMARKET_PAGE_DELAY_S = float(os.environ.get("MESH_POLYMARKET_PAGE_DELAY_S", "0.02"))
|
||||
_KALSHI_PAGE_DELAY_S = float(os.environ.get("MESH_KALSHI_PAGE_DELAY_S", "0.08"))
|
||||
_POLYMARKET_PAGE_DELAY_JITTER_S = float(os.environ.get("MESH_POLYMARKET_PAGE_DELAY_JITTER_S", "0.08"))
|
||||
_KALSHI_PAGE_DELAY_JITTER_S = float(os.environ.get("MESH_KALSHI_PAGE_DELAY_JITTER_S", "0.2"))
|
||||
# Random delay before each full Polymarket+Kalshi cycle (decorrelates from other slow-tier jobs).
|
||||
_PRE_FETCH_JITTER_S = float(os.environ.get("PREDICTION_MARKETS_PRE_FETCH_JITTER_S", "90"))
|
||||
# Random pause between finishing Polymarket pagination and starting Kalshi.
|
||||
_PROVIDER_GAP_JITTER_S = float(os.environ.get("PREDICTION_MARKETS_PROVIDER_GAP_JITTER_S", "45"))
|
||||
_provider_pace_lock = threading.Lock()
|
||||
_provider_last_request_at: dict[str, float] = {}
|
||||
|
||||
|
||||
def prediction_markets_fetch_enabled() -> bool:
|
||||
"""Return True only when the operator explicitly opts into Polymarket/Kalshi pulls."""
|
||||
return str(os.environ.get("PREDICTION_MARKETS_ENABLED", "")).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
"""Return True when UI opt-in or PREDICTION_MARKETS_ENABLED enables pulls."""
|
||||
from services.prediction_markets_settings import prediction_markets_fetch_enabled as _enabled
|
||||
|
||||
return _enabled()
|
||||
|
||||
|
||||
def _pace_provider(provider: str, min_interval_s: float) -> None:
|
||||
if min_interval_s <= 0:
|
||||
return
|
||||
jitter_s = (
|
||||
_POLYMARKET_PAGE_DELAY_JITTER_S
|
||||
if provider == "polymarket"
|
||||
else _KALSHI_PAGE_DELAY_JITTER_S
|
||||
if provider == "kalshi"
|
||||
else 0.0
|
||||
)
|
||||
min_interval_s += random.uniform(0.0, jitter_s) if jitter_s > 0 else 0.0
|
||||
with _provider_pace_lock:
|
||||
now = time.monotonic()
|
||||
wait_s = min_interval_s - (now - _provider_last_request_at.get(provider, 0.0))
|
||||
@@ -47,6 +59,24 @@ def _pace_provider(provider: str, min_interval_s: float) -> None:
|
||||
_provider_last_request_at[provider] = now
|
||||
|
||||
|
||||
def _apply_pre_fetch_jitter() -> None:
|
||||
if _PRE_FETCH_JITTER_S <= 0:
|
||||
return
|
||||
delay = random.uniform(0.0, _PRE_FETCH_JITTER_S)
|
||||
if delay >= 1.0:
|
||||
logger.debug("Prediction markets: pre-fetch jitter %.1fs", delay)
|
||||
time.sleep(delay)
|
||||
|
||||
|
||||
def _apply_provider_gap_jitter() -> None:
|
||||
if _PROVIDER_GAP_JITTER_S <= 0:
|
||||
return
|
||||
delay = random.uniform(0.0, _PROVIDER_GAP_JITTER_S)
|
||||
if delay >= 1.0:
|
||||
logger.debug("Prediction markets: provider gap jitter %.1fs", delay)
|
||||
time.sleep(delay)
|
||||
|
||||
|
||||
def _finite_or_none(value):
|
||||
try:
|
||||
n = float(value)
|
||||
@@ -750,7 +780,9 @@ def _merge_markets(poly_events: list[dict], kalshi_events: list[dict]) -> list[d
|
||||
@cached(_market_cache)
|
||||
def fetch_prediction_markets_raw() -> list[dict]:
|
||||
"""Fetch and merge prediction markets from both sources. Cached 5 min."""
|
||||
_apply_pre_fetch_jitter()
|
||||
poly = _fetch_polymarket_events()
|
||||
_apply_provider_gap_jitter()
|
||||
kalshi = _fetch_kalshi_events()
|
||||
merged = _merge_markets(poly, kalshi)
|
||||
logger.info(
|
||||
|
||||
@@ -11,15 +11,20 @@ import random
|
||||
import logging
|
||||
import functools
|
||||
import requests
|
||||
from requests.exceptions import ChunkedEncodingError, ConnectionError as RequestsConnectionError
|
||||
from requests.exceptions import Timeout as RequestsTimeout
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Only retry on transient network/OS errors — not on parse errors, key errors, etc.
|
||||
# Only retry on transient network/OS errors — not parse/key errors or HTTP 4xx/5xx.
|
||||
# requests.HTTPError (from raise_for_status) is intentionally excluded.
|
||||
TRANSIENT_ERRORS = (
|
||||
TimeoutError,
|
||||
ConnectionError,
|
||||
OSError,
|
||||
requests.RequestException,
|
||||
RequestsConnectionError,
|
||||
RequestsTimeout,
|
||||
ChunkedEncodingError,
|
||||
)
|
||||
|
||||
|
||||
@@ -43,6 +48,8 @@ def with_retry(max_retries: int = 3, base_delay: float = 2.0, max_delay: float =
|
||||
for attempt in range(1 + max_retries):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except requests.HTTPError:
|
||||
raise
|
||||
except TRANSIENT_ERRORS as exc:
|
||||
last_exc = exc
|
||||
if attempt < max_retries:
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
"""Scheduled Sentinel-2 road corridor freight trend fetcher (opt-in, slow tier)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_REFRESH_HOURS = float(os.environ.get("ROAD_CORRIDOR_REFRESH_HOURS", "24"))
|
||||
|
||||
|
||||
def _hours_since(iso_ts: str) -> float | None:
|
||||
try:
|
||||
dt = datetime.fromisoformat(iso_ts.replace("Z", "+00:00"))
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return (datetime.now(timezone.utc) - dt).total_seconds() / 3600.0
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _feature_ready() -> bool:
|
||||
from services.road_corridor_sat.config import optional_deps_available, road_corridor_sat_enabled
|
||||
from services.road_corridor_sat.credentials import sentinel_credentials_configured
|
||||
|
||||
if not road_corridor_sat_enabled():
|
||||
return False
|
||||
if not optional_deps_available():
|
||||
logger.debug("road_corridor_trends skipped — optional deps not installed")
|
||||
return False
|
||||
if not sentinel_credentials_configured():
|
||||
logger.debug("road_corridor_trends skipped — Sentinel credentials missing")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def refresh_road_corridor_store() -> None:
|
||||
from services.road_corridor_sat.storage import build_trends_payload
|
||||
|
||||
payload = build_trends_payload()
|
||||
with _data_lock:
|
||||
latest_data["road_corridor_trends"] = payload
|
||||
_mark_fresh("road_corridor_trends")
|
||||
|
||||
|
||||
def fetch_road_corridor_trends(force: bool = False) -> None:
|
||||
"""Refresh scheduled corridor presets (default: laredo_i35 every 24h)."""
|
||||
if not is_any_active("road_corridor_trends"):
|
||||
return
|
||||
if not _feature_ready():
|
||||
return
|
||||
|
||||
from services.road_corridor_sat.config import SCHEDULED_PRESET_IDS
|
||||
from services.road_corridor_sat.pipeline import analyze_preset
|
||||
from services.road_corridor_sat.presets import get_preset
|
||||
from services.road_corridor_sat.storage import load_refresh_state
|
||||
|
||||
state = load_refresh_state()
|
||||
for preset_id in SCHEDULED_PRESET_IDS:
|
||||
preset = get_preset(preset_id)
|
||||
if preset is None:
|
||||
logger.warning("Unknown scheduled road corridor preset: %s", preset_id)
|
||||
continue
|
||||
last = state.get(preset_id)
|
||||
if last and not force:
|
||||
age_h = _hours_since(last)
|
||||
if age_h is not None and age_h < _REFRESH_HOURS:
|
||||
logger.info(
|
||||
"road_corridor %s fresh (%.1fh < %.1fh) — skipping",
|
||||
preset_id,
|
||||
age_h,
|
||||
_REFRESH_HOURS,
|
||||
)
|
||||
continue
|
||||
try:
|
||||
logger.info("road_corridor analysis starting for %s", preset_id)
|
||||
analyze_preset(preset_id)
|
||||
except Exception as exc:
|
||||
logger.exception("road_corridor analysis failed for %s: %s", preset_id, exc)
|
||||
|
||||
refresh_road_corridor_store()
|
||||
@@ -30,8 +30,6 @@ _AIRPORTS_URL = "https://vrs-standing-data.adsb.lol/airports.csv.gz"
|
||||
_REFRESH_INTERVAL_S = 5 * 24 * 3600
|
||||
_HTTP_TIMEOUT_S = 60
|
||||
|
||||
from services.network_utils import DEFAULT_USER_AGENT as _USER_AGENT
|
||||
|
||||
_lock = threading.RLock()
|
||||
_routes_by_callsign: dict[str, dict[str, Any]] = {}
|
||||
_airports_by_icao: dict[str, dict[str, Any]] = {}
|
||||
|
||||
@@ -0,0 +1,381 @@
|
||||
"""Telegram OSINT — public channel web previews (t.me/s) with keyword geoparsing."""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, is_any_active, latest_data
|
||||
from services.fetchers.news import resolve_coords_match
|
||||
from services.network_utils import fetch_with_curl, outbound_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DEFAULT_CHANNELS = (
|
||||
"osintdefender",
|
||||
"insiderpaper",
|
||||
"aljazeeraenglish",
|
||||
"nexta_live",
|
||||
"war_monitor",
|
||||
"OSINTtechnical",
|
||||
"Liveuamap",
|
||||
)
|
||||
|
||||
_MESSAGE_BLOCK_RE = re.compile(
|
||||
r'<div class="tgme_widget_message_wrap js-widget_message_wrap"[\s\S]*?</div>\s*</div>\s*</div>',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_TEXT_RE = re.compile(
|
||||
r'<div class="tgme_widget_message_text[^>]*>([\s\S]*?)</div>',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_DATE_RE = re.compile(
|
||||
r'<a class="tgme_widget_message_date" href="(https://t\.me/[^"]+)".*?<time datetime="([^"]+)"',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_HAS_VIDEO_RE = re.compile(
|
||||
r'tgme_widget_message_video|js-message_video|<video\s',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_HAS_PHOTO_RE = re.compile(r'tgme_widget_message_photo_wrap', re.IGNORECASE)
|
||||
_VIDEO_SRC_RE = re.compile(r'<video[^>]+src="([^"]+)"', re.IGNORECASE)
|
||||
_BG_IMAGE_RE = re.compile(r"background-image:url\('([^']+)'\)", re.IGNORECASE)
|
||||
|
||||
_TELEGRAM_MEDIA_HOST_SUFFIXES = (".telesco.pe", ".telegram-cdn.org")
|
||||
|
||||
# Cyrillic / Arabic aliases for war-reporting channels (merged after English resolver).
|
||||
_EXTRA_PLACE_KEYWORDS: dict[str, tuple[float, float]] = {
|
||||
"киев": (50.450, 30.523),
|
||||
"київ": (50.450, 30.523),
|
||||
"харьков": (49.993, 36.231),
|
||||
"харків": (49.993, 36.231),
|
||||
"одесса": (46.482, 30.724),
|
||||
"одеса": (46.482, 30.724),
|
||||
"донецк": (48.015, 37.803),
|
||||
"донецьк": (48.015, 37.803),
|
||||
"луганск": (48.574, 39.307),
|
||||
"луганськ": (48.574, 39.307),
|
||||
"москва": (55.755, 37.617),
|
||||
"крым": (45.000, 34.000),
|
||||
"крим": (45.000, 34.000),
|
||||
"бахмут": (48.595, 38.000),
|
||||
"запорожье": (47.838, 35.139),
|
||||
"запоріжжя": (47.838, 35.139),
|
||||
"غزة": (31.416, 34.333),
|
||||
"دمشق": (33.513, 36.276),
|
||||
"بيروت": (33.893, 35.501),
|
||||
"tel aviv": (32.085, 34.781),
|
||||
"תל אביב": (32.085, 34.781),
|
||||
}
|
||||
|
||||
# Country-level news geocodes sit on national centroids that stack with threat alerts.
|
||||
# Telegram uses major metro anchors so pins land on a different map cell than news.
|
||||
_TELEGRAM_ANCHOR_OVERRIDES: dict[str, tuple[float, float]] = {
|
||||
"israel": (32.085, 34.781), # Tel Aviv (news uses central Israel ~Jerusalem corridor)
|
||||
"middle east": (32.085, 34.781),
|
||||
"china": (39.904, 116.407), # Beijing (news uses country centroid)
|
||||
"united states": (40.712, -74.006), # New York (news uses Washington DC)
|
||||
"usa": (40.712, -74.006),
|
||||
"us": (40.712, -74.006),
|
||||
"america": (40.712, -74.006),
|
||||
"uk": (51.507, -0.127), # London
|
||||
"iran": (35.689, 51.389), # Tehran
|
||||
"russia": (55.755, 37.617), # Moscow
|
||||
"ukraine": (50.450, 30.523), # Kyiv
|
||||
"france": (48.856, 2.352), # Paris
|
||||
"germany": (52.520, 13.405), # Berlin
|
||||
"lebanon": (34.433, 35.844), # Tripoli (news uses Beirut corridor)
|
||||
}
|
||||
|
||||
_RISK_KEYWORDS = (
|
||||
"war",
|
||||
"missile",
|
||||
"strike",
|
||||
"attack",
|
||||
"crisis",
|
||||
"tension",
|
||||
"military",
|
||||
"conflict",
|
||||
"defense",
|
||||
"clash",
|
||||
"nuclear",
|
||||
"invasion",
|
||||
"bomb",
|
||||
"drone",
|
||||
"weapon",
|
||||
"sanctions",
|
||||
"ceasefire",
|
||||
"escalation",
|
||||
"killed",
|
||||
"destroyed",
|
||||
"operation",
|
||||
"casualty",
|
||||
"frontline",
|
||||
"threat",
|
||||
"explosion",
|
||||
"shelling",
|
||||
)
|
||||
|
||||
|
||||
def telegram_osint_enabled() -> bool:
|
||||
return str(os.environ.get("TELEGRAM_OSINT_ENABLED", "true")).strip().lower() not in {
|
||||
"0",
|
||||
"false",
|
||||
"no",
|
||||
"off",
|
||||
"",
|
||||
}
|
||||
|
||||
|
||||
def _configured_channels() -> list[str]:
|
||||
raw = str(os.environ.get("TELEGRAM_OSINT_CHANNELS", "")).strip()
|
||||
if raw:
|
||||
return [part.strip().lstrip("@") for part in raw.split(",") if part.strip()]
|
||||
return list(_DEFAULT_CHANNELS)
|
||||
|
||||
|
||||
def telegram_media_host_allowed(hostname: str | None) -> bool:
|
||||
host = str(hostname or "").strip().lower()
|
||||
if not host:
|
||||
return False
|
||||
return any(host.endswith(suffix) for suffix in _TELEGRAM_MEDIA_HOST_SUFFIXES)
|
||||
|
||||
|
||||
def _extract_media(block: str, link: str) -> dict[str, Any]:
|
||||
has_video = bool(_HAS_VIDEO_RE.search(block))
|
||||
has_photo = bool(_HAS_PHOTO_RE.search(block))
|
||||
media_type: str | None = None
|
||||
media_url: str | None = None
|
||||
if has_video:
|
||||
media_type = "video"
|
||||
video_match = _VIDEO_SRC_RE.search(block)
|
||||
if video_match:
|
||||
media_url = video_match.group(1).strip()
|
||||
elif has_photo:
|
||||
media_type = "photo"
|
||||
photo_match = _BG_IMAGE_RE.search(block)
|
||||
if photo_match:
|
||||
media_url = photo_match.group(1).strip()
|
||||
|
||||
embed_url: str | None = None
|
||||
if media_type and link:
|
||||
embed_url = f"{link}?embed=1"
|
||||
|
||||
return {
|
||||
"media_type": media_type,
|
||||
"media_url": media_url,
|
||||
"embed_url": embed_url,
|
||||
}
|
||||
|
||||
|
||||
def _strip_html(text: str) -> str:
|
||||
cleaned = re.sub(r"<br\s*/?>", "\n", text, flags=re.IGNORECASE)
|
||||
cleaned = re.sub(r"<[^>]+>", "", cleaned)
|
||||
return (
|
||||
cleaned.replace(""", '"')
|
||||
.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.strip()
|
||||
)
|
||||
|
||||
|
||||
def _score_risk(text: str) -> int:
|
||||
lower = text.lower()
|
||||
score = 1
|
||||
for kw in _RISK_KEYWORDS:
|
||||
if kw in lower:
|
||||
score += 2
|
||||
return min(10, score)
|
||||
|
||||
|
||||
def _refresh_post_coords(post: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Re-apply geoparsing so stored posts pick up anchor updates."""
|
||||
text = "\n".join(
|
||||
str(part).strip()
|
||||
for part in (post.get("title"), post.get("description"))
|
||||
if part and str(part).strip()
|
||||
)
|
||||
if not text:
|
||||
return post
|
||||
coords = _resolve_telegram_coords(text)
|
||||
if not coords:
|
||||
return post
|
||||
updated = dict(post)
|
||||
updated["coords"] = [coords[0], coords[1]]
|
||||
return updated
|
||||
|
||||
|
||||
def _resolve_telegram_coords(text: str) -> tuple[float, float] | None:
|
||||
lower = text.lower()
|
||||
match = resolve_coords_match(lower)
|
||||
if match:
|
||||
_coords, keyword = match
|
||||
anchor = _TELEGRAM_ANCHOR_OVERRIDES.get(keyword.strip().lower())
|
||||
if anchor:
|
||||
return anchor
|
||||
return _coords
|
||||
for keyword, coords in sorted(_EXTRA_PLACE_KEYWORDS.items(), key=lambda x: len(x[0]), reverse=True):
|
||||
if keyword in lower:
|
||||
return coords
|
||||
return None
|
||||
|
||||
|
||||
def _post_link(post: dict[str, Any]) -> str:
|
||||
return str(post.get("link") or "").strip()
|
||||
|
||||
|
||||
def _extract_new_channel_posts(
|
||||
html: str,
|
||||
channel: str,
|
||||
known_links: set[str],
|
||||
*,
|
||||
bootstrap_limit: int = 12,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Return unseen posts from a channel page; stop once we hit a stored link."""
|
||||
parsed = parse_telegram_channel_html(html, channel)
|
||||
if not parsed:
|
||||
return []
|
||||
if not known_links:
|
||||
return parsed[-bootstrap_limit:]
|
||||
|
||||
fresh: list[dict[str, Any]] = []
|
||||
for post in reversed(parsed):
|
||||
link = _post_link(post)
|
||||
if not link:
|
||||
continue
|
||||
if link in known_links:
|
||||
break
|
||||
fresh.append(post)
|
||||
fresh.reverse()
|
||||
return fresh
|
||||
|
||||
|
||||
def _merge_telegram_posts(
|
||||
existing: list[dict[str, Any]],
|
||||
incoming: list[dict[str, Any]],
|
||||
*,
|
||||
max_posts: int = 120,
|
||||
) -> tuple[list[dict[str, Any]], int]:
|
||||
known_links = {_post_link(post) for post in existing if _post_link(post)}
|
||||
added = 0
|
||||
for post in incoming:
|
||||
link = _post_link(post)
|
||||
if not link or link in known_links:
|
||||
continue
|
||||
known_links.add(link)
|
||||
existing.append(post)
|
||||
added += 1
|
||||
existing.sort(key=lambda p: str(p.get("published") or ""), reverse=True)
|
||||
return existing[:max_posts], added
|
||||
|
||||
|
||||
def parse_telegram_channel_html(html: str, channel: str) -> list[dict[str, Any]]:
|
||||
"""Parse public t.me/s channel preview HTML into post dicts."""
|
||||
posts: list[dict[str, Any]] = []
|
||||
for block in _MESSAGE_BLOCK_RE.findall(html or ""):
|
||||
text_match = _TEXT_RE.search(block)
|
||||
if not text_match:
|
||||
continue
|
||||
text = _strip_html(text_match.group(1))
|
||||
if len(text) < 10:
|
||||
continue
|
||||
|
||||
date_match = _DATE_RE.search(block)
|
||||
link = date_match.group(1) if date_match else f"https://t.me/{channel}"
|
||||
published = date_match.group(2) if date_match else datetime.now(timezone.utc).isoformat()
|
||||
title = text.split("\n", 1)[0][:160]
|
||||
risk_score = _score_risk(text)
|
||||
coords = _resolve_telegram_coords(text)
|
||||
post_id = hashlib.sha1(f"{link}|{published}".encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
media = _extract_media(block, link)
|
||||
posts.append(
|
||||
{
|
||||
"id": post_id,
|
||||
"title": title,
|
||||
"description": text[:1200],
|
||||
"link": link,
|
||||
"published": published,
|
||||
"source": f"t.me/{channel}",
|
||||
"channel": channel,
|
||||
"risk_score": risk_score,
|
||||
"coords": [coords[0], coords[1]] if coords else None,
|
||||
**media,
|
||||
}
|
||||
)
|
||||
return posts
|
||||
|
||||
|
||||
def fetch_telegram_osint() -> dict[str, Any]:
|
||||
if not is_any_active("telegram_osint"):
|
||||
return latest_data.get("telegram_osint") or {"posts": [], "total": 0, "timestamp": None}
|
||||
|
||||
if not telegram_osint_enabled():
|
||||
with _data_lock:
|
||||
latest_data["telegram_osint"] = {"posts": [], "total": 0, "timestamp": None, "disabled": True}
|
||||
_mark_fresh("telegram_osint")
|
||||
return latest_data["telegram_osint"]
|
||||
|
||||
headers = {
|
||||
"User-Agent": (
|
||||
f"Mozilla/5.0 (compatible; {outbound_user_agent('telegram-osint')}) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "text/html,application/xhtml+xml",
|
||||
}
|
||||
|
||||
with _data_lock:
|
||||
prior = latest_data.get("telegram_osint") or {}
|
||||
existing_posts = list(prior.get("posts") or [])
|
||||
|
||||
known_links = {_post_link(post) for post in existing_posts if _post_link(post)}
|
||||
incoming: list[dict[str, Any]] = []
|
||||
|
||||
for channel in _configured_channels():
|
||||
url = f"https://t.me/s/{channel}"
|
||||
try:
|
||||
resp = fetch_with_curl(url, timeout=15, headers=headers)
|
||||
if not resp or resp.status_code != 200:
|
||||
logger.warning(
|
||||
"Telegram channel %s fetch failed: HTTP %s",
|
||||
channel,
|
||||
resp.status_code if resp else "no response",
|
||||
)
|
||||
continue
|
||||
channel_new = _extract_new_channel_posts(resp.text, channel, known_links)
|
||||
for post in channel_new:
|
||||
link = _post_link(post)
|
||||
if not link or link in known_links:
|
||||
continue
|
||||
known_links.add(link)
|
||||
incoming.append(post)
|
||||
except Exception as exc:
|
||||
logger.warning("Telegram channel %s parse failed: %s", channel, exc)
|
||||
|
||||
merged_posts, added = _merge_telegram_posts(existing_posts, incoming)
|
||||
merged_posts = [_refresh_post_coords(post) for post in merged_posts]
|
||||
geolocated = sum(1 for p in merged_posts if p.get("coords"))
|
||||
|
||||
payload = {
|
||||
"posts": merged_posts,
|
||||
"total": len(merged_posts),
|
||||
"geolocated": geolocated,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"channels": _configured_channels(),
|
||||
"last_fetch_new": added,
|
||||
}
|
||||
|
||||
with _data_lock:
|
||||
latest_data["telegram_osint"] = payload
|
||||
_mark_fresh("telegram_osint")
|
||||
logger.info(
|
||||
"Telegram OSINT: +%s new, %s retained (%s geolocated)",
|
||||
added,
|
||||
len(merged_posts),
|
||||
geolocated,
|
||||
)
|
||||
return payload
|
||||
@@ -1,3 +1,4 @@
|
||||
import os
|
||||
import requests
|
||||
import logging
|
||||
import zipfile
|
||||
@@ -20,6 +21,50 @@ logger = logging.getLogger(__name__)
|
||||
# Cache Frontline data for 30 minutes, it doesn't move that fast
|
||||
frontline_cache = TTLCache(maxsize=1, ttl=1800)
|
||||
|
||||
_DEFAULT_DEEPSTATE_MIRROR_REPO = "cyterat/deepstate-map-data"
|
||||
|
||||
|
||||
def _deepstate_mirror_ref() -> tuple[str, str]:
|
||||
"""Return (github_repo_slug, git_ref) for the DeepState mirror.
|
||||
|
||||
When ``DEEPSTATE_MIRROR_COMMIT`` is set, ingest is pinned to that immutable
|
||||
SHA instead of following the mutable ``main`` branch (#362).
|
||||
"""
|
||||
repo = (os.environ.get("DEEPSTATE_MIRROR_REPO") or _DEFAULT_DEEPSTATE_MIRROR_REPO).strip()
|
||||
if repo.count("/") != 1:
|
||||
repo = _DEFAULT_DEEPSTATE_MIRROR_REPO
|
||||
commit = (os.environ.get("DEEPSTATE_MIRROR_COMMIT") or "").strip()
|
||||
ref = commit if commit else "main"
|
||||
return repo, ref
|
||||
|
||||
|
||||
def _latest_deepstate_geo_path(tree_items: list) -> str | None:
|
||||
geo_files = [
|
||||
item["path"]
|
||||
for item in tree_items
|
||||
if isinstance(item, dict)
|
||||
and str(item.get("path", "")).startswith("data/deepstatemap_data_")
|
||||
and str(item.get("path", "")).endswith(".geojson")
|
||||
]
|
||||
return sorted(geo_files)[-1] if geo_files else None
|
||||
|
||||
|
||||
def _annotate_deepstate_geojson(data: dict) -> dict:
|
||||
name_map = {
|
||||
0: "Russian-occupied areas",
|
||||
1: "Russian advance",
|
||||
2: "Liberated area",
|
||||
3: "Russian-occupied areas", # Crimea / LPR / DPR
|
||||
4: "Directions of UA attacks",
|
||||
}
|
||||
if "features" in data:
|
||||
for idx, feature in enumerate(data["features"]):
|
||||
if "properties" not in feature or feature["properties"] is None:
|
||||
feature["properties"] = {}
|
||||
feature["properties"]["name"] = name_map.get(idx, "Russian-occupied areas")
|
||||
feature["properties"]["zone_id"] = idx
|
||||
return data
|
||||
|
||||
|
||||
@cached(frontline_cache)
|
||||
def fetch_ukraine_frontlines():
|
||||
@@ -27,67 +72,34 @@ def fetch_ukraine_frontlines():
|
||||
Fetches the latest GeoJSON data representing the Ukraine frontline.
|
||||
We use the cyterat/deepstate-map-data github mirror since the public API is locked.
|
||||
"""
|
||||
repo, ref = _deepstate_mirror_ref()
|
||||
try:
|
||||
logger.info("Fetching DeepStateMap from GitHub mirror...")
|
||||
logger.info("Fetching DeepStateMap from GitHub mirror (%s @ %s)...", repo, ref)
|
||||
|
||||
# First, query the repo tree to find the latest file name
|
||||
tree_url = (
|
||||
"https://api.github.com/repos/cyterat/deepstate-map-data/git/trees/main?recursive=1"
|
||||
)
|
||||
tree_url = f"https://api.github.com/repos/{repo}/git/trees/{ref}?recursive=1"
|
||||
res_tree = requests.get(tree_url, timeout=10)
|
||||
|
||||
if res_tree.status_code == 200:
|
||||
tree_data = res_tree.json().get("tree", [])
|
||||
# Filter for geojson files in data folder
|
||||
geo_files = [
|
||||
item["path"]
|
||||
for item in tree_data
|
||||
if item["path"].startswith("data/deepstatemap_data_")
|
||||
and item["path"].endswith(".geojson")
|
||||
]
|
||||
|
||||
if geo_files:
|
||||
# Get the alphabetically latest file (since it's named with YYYYMMDD)
|
||||
latest_file = sorted(geo_files)[-1]
|
||||
|
||||
raw_url = f"https://raw.githubusercontent.com/cyterat/deepstate-map-data/main/{latest_file}"
|
||||
logger.info(f"Downloading latest DeepStateMap: {raw_url}")
|
||||
latest_file = _latest_deepstate_geo_path(res_tree.json().get("tree", []))
|
||||
if latest_file:
|
||||
raw_url = f"https://raw.githubusercontent.com/{repo}/{ref}/{latest_file}"
|
||||
logger.info("Downloading DeepStateMap: %s", raw_url)
|
||||
|
||||
res_geo = requests.get(raw_url, timeout=20)
|
||||
if res_geo.status_code == 200:
|
||||
data = res_geo.json()
|
||||
|
||||
# The Cyterat GitHub mirror strips all properties and just provides a raw array of Feature polygons.
|
||||
# Based on DeepStateMap's frontend mapping, the array index corresponds to the zone type:
|
||||
# 0: Russian-occupied areas
|
||||
# 1: Russian advance
|
||||
# 2: Liberated area
|
||||
# 3: Uncontested/Crimea (often folded into occupied)
|
||||
name_map = {
|
||||
0: "Russian-occupied areas",
|
||||
1: "Russian advance",
|
||||
2: "Liberated area",
|
||||
3: "Russian-occupied areas", # Crimea / LPR / DPR
|
||||
4: "Directions of UA attacks",
|
||||
}
|
||||
|
||||
if "features" in data:
|
||||
for idx, feature in enumerate(data["features"]):
|
||||
if "properties" not in feature or feature["properties"] is None:
|
||||
feature["properties"] = {}
|
||||
|
||||
feature["properties"]["name"] = name_map.get(
|
||||
idx, "Russian-occupied areas"
|
||||
)
|
||||
feature["properties"]["zone_id"] = idx
|
||||
|
||||
return data
|
||||
else:
|
||||
logger.error(
|
||||
f"Failed to fetch parsed Github Raw GeoJSON: {res_geo.status_code}"
|
||||
)
|
||||
return _annotate_deepstate_geojson(res_geo.json())
|
||||
logger.error(
|
||||
"Failed to fetch parsed Github Raw GeoJSON: %s", res_geo.status_code
|
||||
)
|
||||
else:
|
||||
logger.error("No deepstatemap_data_*.geojson files in mirror tree at %s", ref)
|
||||
else:
|
||||
logger.error(f"Failed to fetch Github Tree for Deepstatemap: {res_tree.status_code}")
|
||||
logger.error(
|
||||
"Failed to fetch Github tree for Deepstatemap (%s @ %s): %s",
|
||||
repo,
|
||||
ref,
|
||||
res_tree.status_code,
|
||||
)
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.error(f"Error fetching DeepStateMap: {e}")
|
||||
return None
|
||||
|
||||
@@ -1,14 +1,20 @@
|
||||
"""Function Keys — anonymous citizenship proof.
|
||||
"""Function Keys — anonymous credential scaffolding.
|
||||
|
||||
Source of truth: ``infonet-economy/IMPLEMENTATION_PLAN.md`` §4.4,
|
||||
``infonet-economy/BRAINDUMP.md`` §11 item 9.
|
||||
|
||||
A citizen should be able to prove "I am a UBI-eligible Infonet
|
||||
citizen" to a real-world operator (food bank, community service)
|
||||
**without revealing their Infonet identity**. The naive approach
|
||||
(scramble a public key, record each redemption on chain) leaks
|
||||
identity through metadata correlation (time, location, operator,
|
||||
frequency).
|
||||
A citizen should eventually be able to prove "I am a UBI-eligible
|
||||
Infonet citizen" to a real-world operator (food bank, community
|
||||
service) **without revealing their Infonet identity**. The current
|
||||
Python implementation wires the accounting, nullifier, receipt, and
|
||||
operator flows, but its HMAC challenge-response is a placeholder for
|
||||
integration tests. It is not a production anonymous or zero-knowledge
|
||||
citizenship proof until blind signatures or anonymous credentials are
|
||||
selected and wired.
|
||||
|
||||
The naive approach (scramble a public key, record each redemption on
|
||||
chain) leaks identity through metadata correlation (time, location,
|
||||
operator, frequency).
|
||||
|
||||
The full design has six pieces; five are implemented in pure Python
|
||||
here. The remaining piece — issuance via blind signatures or
|
||||
@@ -27,7 +33,8 @@ Pieces:
|
||||
operator: tracked via ``NullifierTracker``.
|
||||
3. **Challenge-response** (`challenge_response.py`) — operator
|
||||
issues a fresh nonce, key-holder signs with the Function Key's
|
||||
secret. Prevents screenshot attacks, key sharing, replay.
|
||||
secret. This is HMAC placeholder plumbing for screenshot/replay
|
||||
resistance, not the final anonymous credential proof.
|
||||
4. **Two-phase commit receipts** (`receipt.py`) — Phase 1
|
||||
verification receipt (operator-signed, day-level date NOT
|
||||
timestamp, no node_id). Phase 2 fulfillment receipt (citizen
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
"""Country risk index (static scores + USGS quake enrichment)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
RISK_FACTORS: dict[str, dict[str, Any]] = {
|
||||
"UA": {"base": 85, "tags": ["active_conflict", "infrastructure_damage"]},
|
||||
"RU": {"base": 72, "tags": ["sanctions", "military_mobilization"]},
|
||||
"IL": {"base": 78, "tags": ["active_conflict", "regional_instability"]},
|
||||
"PS": {"base": 90, "tags": ["active_conflict", "humanitarian_crisis"]},
|
||||
"SY": {"base": 82, "tags": ["post_conflict", "infrastructure_damage"]},
|
||||
"YE": {"base": 88, "tags": ["active_conflict", "humanitarian_crisis"]},
|
||||
"MM": {"base": 76, "tags": ["civil_unrest", "military_junta"]},
|
||||
"SD": {"base": 84, "tags": ["active_conflict", "humanitarian_crisis"]},
|
||||
"AF": {"base": 80, "tags": ["post_conflict", "governance_collapse"]},
|
||||
"KP": {"base": 70, "tags": ["nuclear_risk", "isolation"]},
|
||||
"IR": {"base": 68, "tags": ["sanctions", "nuclear_program", "regional_proxy"]},
|
||||
"CN": {"base": 35, "tags": ["strategic_competition", "taiwan_tensions"]},
|
||||
"TW": {"base": 45, "tags": ["invasion_risk", "semiconductor_dependency"]},
|
||||
"VE": {"base": 60, "tags": ["economic_collapse", "political_instability"]},
|
||||
"HT": {"base": 85, "tags": ["gang_violence", "governance_collapse"]},
|
||||
"LB": {"base": 65, "tags": ["economic_crisis", "political_deadlock"]},
|
||||
"PK": {"base": 55, "tags": ["terrorism", "political_instability"]},
|
||||
"SO": {"base": 82, "tags": ["terrorism", "state_fragility"]},
|
||||
"LY": {"base": 72, "tags": ["divided_government", "militia_control"]},
|
||||
"ET": {"base": 62, "tags": ["ethnic_tensions", "regional_conflicts"]},
|
||||
}
|
||||
|
||||
EXCHANGES = [
|
||||
{"name": "NYSE", "tz": "America/New_York", "open": 9.5, "close": 16, "country": "US"},
|
||||
{"name": "NASDAQ", "tz": "America/New_York", "open": 9.5, "close": 16, "country": "US"},
|
||||
{"name": "LSE", "tz": "Europe/London", "open": 8, "close": 16.5, "country": "GB"},
|
||||
{"name": "TSE", "tz": "Asia/Tokyo", "open": 9, "close": 15, "country": "JP"},
|
||||
{"name": "SSE", "tz": "Asia/Shanghai", "open": 9.5, "close": 15, "country": "CN"},
|
||||
{"name": "HKEX", "tz": "Asia/Hong_Kong", "open": 9.5, "close": 16, "country": "HK"},
|
||||
{"name": "FRA", "tz": "Europe/Berlin", "open": 8, "close": 20, "country": "DE"},
|
||||
{"name": "TSX", "tz": "America/Toronto", "open": 9.5, "close": 16, "country": "CA"},
|
||||
{"name": "MOEX", "tz": "Europe/Moscow", "open": 10, "close": 18.5, "country": "RU"},
|
||||
]
|
||||
|
||||
|
||||
def _exchange_open(ex: dict[str, Any]) -> bool:
|
||||
try:
|
||||
now = datetime.now(ZoneInfo(ex["tz"]))
|
||||
if now.weekday() >= 5:
|
||||
return False
|
||||
decimal = now.hour + now.minute / 60
|
||||
return ex["open"] <= decimal < ex["close"]
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def build_country_risk_payload() -> dict[str, Any]:
|
||||
quake_risks: dict[str, float] = {}
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_day.geojson",
|
||||
timeout=5,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
for f in resp.json().get("features") or []:
|
||||
place = (f.get("properties") or {}).get("place") or ""
|
||||
mag = (f.get("properties") or {}).get("mag") or 0
|
||||
for code in RISK_FACTORS:
|
||||
if code.lower() in place.lower():
|
||||
quake_risks[code] = quake_risks.get(code, 0) + mag
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
countries = []
|
||||
for code, data in RISK_FACTORS.items():
|
||||
base = data["base"]
|
||||
score = min(100, base + quake_risks.get(code, 0))
|
||||
countries.append(
|
||||
{
|
||||
"code": code,
|
||||
"risk_score": score,
|
||||
"risk_level": "CRITICAL" if base >= 80 else "HIGH" if base >= 60 else "ELEVATED" if base >= 40 else "LOW",
|
||||
"tags": data["tags"],
|
||||
}
|
||||
)
|
||||
countries.sort(key=lambda c: c["risk_score"], reverse=True)
|
||||
exchanges = [{"name": e["name"], "country": e["country"], "open": _exchange_open(e)} for e in EXCHANGES]
|
||||
return {
|
||||
"countries": countries,
|
||||
"exchanges": exchanges,
|
||||
"open_exchanges": sum(1 for e in exchanges if e["open"]),
|
||||
"total_exchanges": len(exchanges),
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
@@ -32,14 +32,14 @@ logger = logging.getLogger(__name__)
|
||||
_REFRESH_SECONDS = 24 * 3600
|
||||
kiwisdr_cache: TTLCache = TTLCache(maxsize=1, ttl=_REFRESH_SECONDS)
|
||||
|
||||
_SOURCE_URL = "http://rx.linkfanel.net/kiwisdr_com.js"
|
||||
_SOURCE_URL_HTTP = "http://rx.linkfanel.net/kiwisdr_com.js"
|
||||
_SOURCE_URL_HTTPS = "https://rx.linkfanel.net/kiwisdr_com.js"
|
||||
_CACHE_FILE = Path(__file__).resolve().parent.parent / "data" / "kiwisdr_cache.json"
|
||||
# Bundled fallback — shipped with the codebase so the KiwiSDR layer always
|
||||
# has something to render even when the upstream is unreachable, returns
|
||||
# garbage, or appears to have been tampered with. Issue #206: the upstream
|
||||
# only speaks HTTP, so we can't rely on TLS for integrity — instead we
|
||||
# validate the response's shape and fall back to this bundle if it doesn't
|
||||
# look right.
|
||||
# garbage, or appears to have been tampered with. Issue #206 / #364: try HTTPS
|
||||
# first, then HTTP; we still validate shape and fall back to this bundle if the
|
||||
# payload does not look right.
|
||||
_BUNDLED_FALLBACK = Path(__file__).resolve().parent.parent / "data" / "kiwisdr_directory.json"
|
||||
|
||||
# Minimum number of receivers we expect from a healthy upstream response.
|
||||
@@ -184,6 +184,29 @@ def _validate_fetched_nodes(nodes: list[dict]) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _fetch_mirror_payload_text() -> str | None:
|
||||
"""Try HTTPS first, then HTTP. Shape validation still applies (#364)."""
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
last_error: Exception | None = None
|
||||
for url in (_SOURCE_URL_HTTPS, _SOURCE_URL_HTTP):
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=20)
|
||||
if res and res.status_code == 200:
|
||||
if url == _SOURCE_URL_HTTP:
|
||||
logger.info(
|
||||
"KiwiSDR: HTTPS mirror unavailable; using HTTP with shape validation"
|
||||
)
|
||||
return res.text
|
||||
last_error = RuntimeError(f"HTTP {getattr(res, 'status_code', 'unknown')}")
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.debug("KiwiSDR mirror fetch failed for %s: %s", url, e)
|
||||
if last_error is not None:
|
||||
logger.warning("KiwiSDR mirror fetch failed: %s", last_error)
|
||||
return None
|
||||
|
||||
|
||||
def _load_bundled_fallback() -> list[dict]:
|
||||
"""Last-resort directory shipped with the codebase. Always returns a
|
||||
list (may be empty if the bundle is missing in older deployments)."""
|
||||
@@ -202,9 +225,8 @@ def _load_bundled_fallback() -> list[dict]:
|
||||
def fetch_kiwisdr_nodes() -> list[dict]:
|
||||
"""Return the KiwiSDR receiver list, refreshed at most once per day.
|
||||
|
||||
Layered fallback (issue #206 — upstream is HTTP-only, so we defend with
|
||||
content validation + bundled static directory rather than trying to
|
||||
upgrade the transport):
|
||||
Layered fallback (issue #206 / #364 — HTTPS first, HTTP fallback, plus
|
||||
content validation + bundled static directory):
|
||||
|
||||
1. In-memory cache (handled by @cached on this function)
|
||||
2. On-disk cache if <24h old
|
||||
@@ -216,8 +238,6 @@ def fetch_kiwisdr_nodes() -> list[dict]:
|
||||
tampered upstream returning garbage is caught by _validate_fetched_nodes()
|
||||
and falls through to whatever previously-trusted snapshot we have.
|
||||
"""
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
# 1. Trust on-disk cache if fresh.
|
||||
cached_nodes = _load_disk_cache()
|
||||
if cached_nodes is not None:
|
||||
@@ -230,14 +250,12 @@ def fetch_kiwisdr_nodes() -> list[dict]:
|
||||
fresh_nodes: list[dict] = []
|
||||
fetch_succeeded = False
|
||||
try:
|
||||
res = fetch_with_curl(_SOURCE_URL, timeout=20)
|
||||
if res and res.status_code == 200:
|
||||
fresh_nodes = _parse_mirror_payload(res.text)
|
||||
body = _fetch_mirror_payload_text()
|
||||
if body:
|
||||
fresh_nodes = _parse_mirror_payload(body)
|
||||
fetch_succeeded = True
|
||||
else:
|
||||
logger.warning(
|
||||
f"KiwiSDR fetch returned HTTP {res.status_code if res else 'no response'}"
|
||||
)
|
||||
logger.warning("KiwiSDR fetch returned no usable mirror payload")
|
||||
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
||||
logger.warning(f"KiwiSDR fetch exception: {e}")
|
||||
|
||||
|
||||
@@ -27,11 +27,21 @@ def fetch_liveuamap():
|
||||
browser = p.chromium.launch(
|
||||
headless=True, args=["--disable-blink-features=AutomationControlled"]
|
||||
)
|
||||
from services.network_utils import outbound_user_agent
|
||||
|
||||
# Per-install handle (no shared Shadowbroker product token). Stealth remains
|
||||
# for Turnstile; see docs/OUTBOUND_DATA.md #348.
|
||||
playwright_ua = (
|
||||
f"Mozilla/5.0 (compatible; {outbound_user_agent('liveuamap')})"
|
||||
)
|
||||
context = browser.new_context(
|
||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
user_agent=playwright_ua,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
color_scheme="dark",
|
||||
)
|
||||
# Bound navigation and script evaluation so a stuck region cannot hang the slow pool.
|
||||
context.set_default_navigation_timeout(60_000)
|
||||
context.set_default_timeout(30_000)
|
||||
page = context.new_page()
|
||||
stealth_sync(page)
|
||||
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
"""LiveUAMap Playwright scraper opt-in (#348) — UI consent on Windows."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_OPT_IN_FILE = Path(__file__).resolve().parent.parent / "data" / "liveuamap_scraper_opt_in.json"
|
||||
_OPT_IN_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _env_flag(name: str) -> str:
|
||||
return str(os.getenv(name, "")).strip().lower()
|
||||
|
||||
|
||||
def liveuamap_requires_ui_opt_in() -> bool:
|
||||
"""Windows local installs need explicit consent before Playwright contacts LiveUAMap."""
|
||||
return os.name == "nt"
|
||||
|
||||
|
||||
def get_liveuamap_ui_opt_in() -> bool:
|
||||
if not _OPT_IN_FILE.exists():
|
||||
return False
|
||||
try:
|
||||
payload = json.loads(_OPT_IN_FILE.read_text(encoding="utf-8"))
|
||||
return bool(payload.get("opted_in"))
|
||||
except (OSError, json.JSONDecodeError, TypeError) as e:
|
||||
logger.warning("LiveUAMap opt-in file unreadable: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
def set_liveuamap_ui_opt_in(opted_in: bool) -> None:
|
||||
_OPT_IN_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
with _OPT_IN_LOCK:
|
||||
_OPT_IN_FILE.write_text(
|
||||
json.dumps({"opted_in": bool(opted_in)}, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def liveuamap_scraper_enabled() -> bool:
|
||||
"""Whether the Playwright LiveUAMap scraper may run on this backend."""
|
||||
setting = _env_flag("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER")
|
||||
if setting in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
if setting in {"0", "false", "no", "off"}:
|
||||
return False
|
||||
if not liveuamap_requires_ui_opt_in():
|
||||
return True
|
||||
return get_liveuamap_ui_opt_in()
|
||||
|
||||
|
||||
def liveuamap_scraper_status() -> dict[str, Any]:
|
||||
setting = _env_flag("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER")
|
||||
env_override = None
|
||||
if setting in {"1", "true", "yes", "on"}:
|
||||
env_override = "on"
|
||||
elif setting in {"0", "false", "no", "off"}:
|
||||
env_override = "off"
|
||||
ui_opted_in = get_liveuamap_ui_opt_in()
|
||||
requires = liveuamap_requires_ui_opt_in()
|
||||
return {
|
||||
"platform_requires_opt_in": requires,
|
||||
"ui_opted_in": ui_opted_in,
|
||||
"scraper_enabled": liveuamap_scraper_enabled(),
|
||||
"env_override": env_override,
|
||||
}
|
||||
@@ -38,6 +38,11 @@ _REVOCATION_TTL_CACHE: dict[str, dict[str, Any]] = {}
|
||||
_REVOCATION_TTL_LOCK = threading.Lock()
|
||||
_REVOCATION_REFRESH_LOCK = threading.Lock()
|
||||
_REVOCATION_REFRESH_FAIL_FAST_WINDOW_S = 5.0
|
||||
|
||||
|
||||
def _request_scope_path(request: Request) -> str:
|
||||
scope = getattr(request, "scope", {}) or {}
|
||||
return str(scope.get("path") or "")
|
||||
_REVOCATION_REFRESH_RETRY_AFTER_S = 5
|
||||
_REVOCATION_PRECHECK_UNAVAILABLE_DETAIL = "Signed event integrity preflight unavailable"
|
||||
|
||||
@@ -166,7 +171,7 @@ def _canonical_signed_write_retry_payload(
|
||||
signed_context = build_signed_context(
|
||||
event_type=prepared.event_type,
|
||||
kind=prepared.kind.value,
|
||||
endpoint=str(request.url.path or ""),
|
||||
endpoint=_request_scope_path(request),
|
||||
lane_floor=_content_private_required_transport_tier(prepared.kind),
|
||||
sequence_domain=_signed_context_sequence_domain(prepared),
|
||||
node_id=prepared.node_id,
|
||||
@@ -540,7 +545,7 @@ def _apply_signed_context_policy(prepared: "PreparedSignedWrite", request: Reque
|
||||
ok, reason = validate_signed_context(
|
||||
event_type=prepared.event_type,
|
||||
kind=prepared.kind.value,
|
||||
endpoint=str(request.url.path or ""),
|
||||
endpoint=_request_scope_path(request),
|
||||
lane_floor=_content_private_required_transport_tier(prepared.kind),
|
||||
sequence_domain=_signed_context_sequence_domain(prepared),
|
||||
node_id=prepared.node_id,
|
||||
|
||||
@@ -234,12 +234,12 @@ def _fetch_dm_prekey_bundle_from_public_lookup(lookup_token: str) -> dict[str, A
|
||||
# Generic UA: any peer-facing crypto request should not carry a
|
||||
# fork-specific identifier — that turns prekey lookups into a
|
||||
# software-fingerprinting beacon.
|
||||
from services.network_utils import DEFAULT_USER_AGENT
|
||||
from services.network_utils import default_user_agent
|
||||
request = urllib.request.Request(
|
||||
f"{normalized_peer_url}/api/mesh/dm/prekey-bundle?{encoded}",
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
"User-Agent": default_user_agent(),
|
||||
},
|
||||
method="GET",
|
||||
)
|
||||
|
||||
@@ -34,9 +34,9 @@ _session.mount("http://", HTTPAdapter(max_retries=_retry, pool_maxsize=10))
|
||||
# upstream's only recourse was to block "Shadowbroker" as a whole — which
|
||||
# would take out every other install too.
|
||||
#
|
||||
# Fix: give each install a stable pseudonymous handle and include it in
|
||||
# the User-Agent. Now an upstream can rate-limit or block the offending
|
||||
# operator without affecting anyone else.
|
||||
# Fix: give each install a stable pseudonymous handle used as the entire
|
||||
# User-Agent product token (no shared "Shadowbroker" label). Upstreams see
|
||||
# ``operator-7f3a92`` (or ``OPERATOR_HANDLE``), not one monolithic app name.
|
||||
#
|
||||
# The handle:
|
||||
#
|
||||
@@ -51,7 +51,6 @@ _session.mount("http://", HTTPAdapter(max_retries=_retry, pool_maxsize=10))
|
||||
# - Is NEVER mixed into mesh / Wormhole / Infonet identity. This layer is
|
||||
# strictly for public third-party API attribution.
|
||||
|
||||
_SHADOWBROKER_VERSION = "0.9"
|
||||
_OPERATOR_HANDLE_FILE = (
|
||||
Path(__file__).parent.parent / "data" / "operator_handle.json"
|
||||
)
|
||||
@@ -146,7 +145,12 @@ def get_operator_handle() -> str:
|
||||
# 3. On-disk handle from a previous run.
|
||||
persisted = _load_persisted_operator_handle()
|
||||
if persisted:
|
||||
_OPERATOR_HANDLE_CACHE = _normalize_handle(persisted)
|
||||
normalized = _normalize_handle(persisted)
|
||||
# Migrate legacy auto-generated handles (pre-Round-7a ``shadow-`` prefix).
|
||||
if normalized.startswith("shadow-"):
|
||||
normalized = f"operator-{normalized[len('shadow-'):]}"
|
||||
_persist_operator_handle(normalized)
|
||||
_OPERATOR_HANDLE_CACHE = normalized
|
||||
return _OPERATOR_HANDLE_CACHE
|
||||
|
||||
# 4. Generate, persist, return.
|
||||
@@ -170,41 +174,21 @@ def _normalize_handle(raw: str) -> str:
|
||||
return safe[:48] if safe else "anonymous"
|
||||
|
||||
|
||||
_CONTACT_URL = "https://github.com/BigBodyCobain/Shadowbroker/issues"
|
||||
|
||||
|
||||
def outbound_user_agent(purpose: str = "") -> str:
|
||||
"""Build a User-Agent for an outbound third-party HTTP request.
|
||||
|
||||
Returns something like::
|
||||
Returns the per-install handle only, e.g. ``operator-7f3a92`` or
|
||||
``operator-7f3a92 (purpose: wikipedia)``. No shared project name — so
|
||||
upstream abuse teams cannot block every install with one ``Shadowbroker``
|
||||
rule.
|
||||
|
||||
Shadowbroker/0.9 (operator: shadow-7f3a92; purpose: wikipedia;
|
||||
+https://github.com/BigBodyCobain/Shadowbroker/issues)
|
||||
|
||||
The ``purpose`` is optional but recommended — it tells the upstream
|
||||
what feature of ours is making the call (``wikipedia``, ``openmhz``,
|
||||
``nominatim``, etc.), which makes their logs and our complaints
|
||||
actionable.
|
||||
|
||||
Every outbound call in the backend that previously sent a custom
|
||||
User-Agent should call this helper instead. Centralizing here means:
|
||||
- one place to change the contact URL,
|
||||
- one place to bump the version on release,
|
||||
- one place a Wikimedia / OpenMHz operator can reach to ask for
|
||||
the project to back off, with a per-install handle so they can
|
||||
target the specific install instead of the project as a whole.
|
||||
Set ``SHADOWBROKER_USER_AGENT`` to override the entire string if needed.
|
||||
"""
|
||||
handle = get_operator_handle()
|
||||
if purpose:
|
||||
purpose_clean = _normalize_handle(purpose)
|
||||
return (
|
||||
f"Shadowbroker/{_SHADOWBROKER_VERSION} "
|
||||
f"(operator: {handle}; purpose: {purpose_clean}; +{_CONTACT_URL})"
|
||||
)
|
||||
return (
|
||||
f"Shadowbroker/{_SHADOWBROKER_VERSION} "
|
||||
f"(operator: {handle}; +{_CONTACT_URL})"
|
||||
)
|
||||
return f"{handle} (purpose: {purpose_clean})"
|
||||
return handle
|
||||
|
||||
|
||||
def _reset_operator_handle_cache_for_tests() -> None:
|
||||
@@ -215,19 +199,13 @@ def _reset_operator_handle_cache_for_tests() -> None:
|
||||
_OPERATOR_HANDLE_CACHE = ""
|
||||
|
||||
|
||||
# Default outbound User-Agent. Retained for backwards compatibility with
|
||||
# call sites that haven't been migrated to ``outbound_user_agent()`` yet.
|
||||
# Operators who want full per-install attribution should set the
|
||||
# ``OPERATOR_HANDLE`` setting and migrate call sites incrementally.
|
||||
#
|
||||
# Operators who run a public-facing relay can also override the whole UA
|
||||
# string via the ``SHADOWBROKER_USER_AGENT`` env var. That override
|
||||
# completely bypasses the per-operator helper; only use it if you know
|
||||
# what you're doing.
|
||||
DEFAULT_USER_AGENT = os.environ.get(
|
||||
"SHADOWBROKER_USER_AGENT",
|
||||
f"Shadowbroker/{_SHADOWBROKER_VERSION}",
|
||||
)
|
||||
def default_user_agent() -> str:
|
||||
"""Default User-Agent for ``fetch_with_curl`` and legacy call sites."""
|
||||
custom = (os.environ.get("SHADOWBROKER_USER_AGENT") or "").strip()
|
||||
if custom:
|
||||
return custom
|
||||
return outbound_user_agent()
|
||||
|
||||
|
||||
# Find bash for curl fallback — Git bash's curl has the TLS features
|
||||
# needed to pass CDN fingerprint checks (brotli, zstd, libpsl)
|
||||
@@ -283,7 +261,7 @@ def fetch_with_curl(url, method="GET", json_data=None, timeout=15, headers=None,
|
||||
both Python requests and the barebones Windows system curl.
|
||||
"""
|
||||
default_headers = {
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
"User-Agent": default_user_agent(),
|
||||
}
|
||||
if headers:
|
||||
default_headers.update(headers)
|
||||
|
||||
@@ -12,6 +12,8 @@ logger = logging.getLogger(__name__)
|
||||
CONFIG_PATH = Path(__file__).parent.parent / "config" / "news_feeds.json"
|
||||
MAX_FEEDS = 50
|
||||
_FEED_URL_REPLACEMENTS = {
|
||||
"http://feeds.bbci.co.uk/news/world/rss.xml": "https://feeds.bbci.co.uk/news/world/rss.xml",
|
||||
"http://www.news.cn/english/rss/worldrss.xml": "https://www.news.cn/english/rss/worldrss.xml",
|
||||
"https://www.channelnewsasia.com/rssfeed/8395986": "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml",
|
||||
}
|
||||
_DEAD_FEED_URLS = {
|
||||
@@ -27,7 +29,7 @@ _DEAD_FEED_URLS = {
|
||||
|
||||
DEFAULT_FEEDS = [
|
||||
{"name": "NPR", "url": "https://feeds.npr.org/1004/rss.xml", "weight": 4},
|
||||
{"name": "BBC", "url": "http://feeds.bbci.co.uk/news/world/rss.xml", "weight": 3},
|
||||
{"name": "BBC", "url": "https://feeds.bbci.co.uk/news/world/rss.xml", "weight": 3},
|
||||
{"name": "AlJazeera", "url": "https://www.aljazeera.com/xml/rss/all.xml", "weight": 2},
|
||||
{"name": "NYT", "url": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml", "weight": 1},
|
||||
{"name": "GDACS", "url": "https://www.gdacs.org/xml/rss.xml", "weight": 5},
|
||||
@@ -35,7 +37,7 @@ DEFAULT_FEEDS = [
|
||||
{"name": "Bellingcat", "url": "https://www.bellingcat.com/feed/", "weight": 4},
|
||||
{"name": "Guardian", "url": "https://www.theguardian.com/world/rss", "weight": 3},
|
||||
{"name": "TASS", "url": "https://tass.com/rss/v2.xml", "weight": 2},
|
||||
{"name": "Xinhua", "url": "http://www.news.cn/english/rss/worldrss.xml", "weight": 2},
|
||||
{"name": "Xinhua", "url": "https://www.news.cn/english/rss/worldrss.xml", "weight": 2},
|
||||
{"name": "CNA", "url": "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml", "weight": 3},
|
||||
{"name": "Mercopress", "url": "https://en.mercopress.com/rss/", "weight": 3},
|
||||
{"name": "SCMP", "url": "https://www.scmp.com/rss/91/feed", "weight": 4},
|
||||
|
||||
@@ -83,6 +83,10 @@ READ_COMMANDS = frozenset({
|
||||
"sar_pin_click",
|
||||
# Analysis zones (OpenClaw map overlays)
|
||||
"list_analysis_zones",
|
||||
# Recon / OSINT toolkit (server-side proxies, SSRF guarded)
|
||||
"osint_lookup",
|
||||
"osint_tools",
|
||||
"entity_expand",
|
||||
})
|
||||
|
||||
WRITE_COMMANDS = frozenset({
|
||||
@@ -112,6 +116,8 @@ WRITE_COMMANDS = frozenset({
|
||||
"place_analysis_zone",
|
||||
"delete_analysis_zone",
|
||||
"clear_analysis_zones",
|
||||
# Active recon (subnet device discovery)
|
||||
"osint_sweep",
|
||||
})
|
||||
|
||||
|
||||
@@ -780,6 +786,7 @@ def _dispatch_command(cmd: str, args: dict[str, Any]) -> dict[str, Any]:
|
||||
query=str(args.get("query", "") or ""),
|
||||
limit=args.get("limit", 10),
|
||||
include_gdelt=bool(args.get("include_gdelt", True)),
|
||||
include_telegram=bool(args.get("include_telegram", True)),
|
||||
)
|
||||
if _wants_compact(args):
|
||||
return {"ok": True, "data": _compact_query_result(result), "format": "compressed_v1"}
|
||||
@@ -846,6 +853,26 @@ def _dispatch_command(cmd: str, args: dict[str, Any]) -> dict[str, Any]:
|
||||
return {"ok": True, "data": _compact_query_result(result), "format": "compressed_v1"}
|
||||
return {"ok": True, "data": result}
|
||||
|
||||
if cmd == "osint_lookup":
|
||||
from services.osint.openclaw_recon import run_osint_lookup
|
||||
tool = str(args.get("tool", "") or args.get("lookup", "") or args.get("type", "") or "")
|
||||
result = run_osint_lookup(tool, args)
|
||||
return {"ok": True, "data": result, "tool": tool.strip().lower()}
|
||||
|
||||
if cmd == "osint_tools":
|
||||
from services.osint.openclaw_recon import osint_tool_help
|
||||
return {"ok": True, "data": osint_tool_help()}
|
||||
|
||||
if cmd == "osint_sweep":
|
||||
from services.osint.openclaw_recon import run_osint_sweep
|
||||
result = run_osint_sweep(args)
|
||||
return {"ok": True, "data": result}
|
||||
|
||||
if cmd == "entity_expand":
|
||||
from services.osint.openclaw_recon import run_entity_expand
|
||||
result = run_entity_expand(args)
|
||||
return {"ok": True, "data": result}
|
||||
|
||||
if cmd == "get_report":
|
||||
from services.telemetry import get_cached_telemetry_refs, get_cached_slow_telemetry_refs
|
||||
fast = get_cached_telemetry_refs()
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
"""Operator-initiated OSINT lookups (server-side proxies)."""
|
||||
@@ -0,0 +1,492 @@
|
||||
"""Server-side OSINT lookups (Osiris port, HTTPS outbound only)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import socket
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
from urllib.parse import quote
|
||||
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.sanctions.ofac import match_exact, search_sanctions
|
||||
from services.ssrf_guard import safe_get, validate_domain, validate_host
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_IPV4_RE = re.compile(r"^(\d{1,3}\.){3}\d{1,3}$")
|
||||
_IPV6_RE = re.compile(r"^[0-9a-fA-F:]+$")
|
||||
_CVE_RE = re.compile(r"^CVE-\d{4}-\d{4,}$", re.I)
|
||||
_ASN_RE = re.compile(r"^(AS)?\d+$", re.I)
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _json_get(url: str, *, timeout: float = 8.0, headers: dict[str, str] | None = None) -> Any:
|
||||
resp = fetch_with_curl(url, timeout=timeout, headers=headers or {"Accept": "application/json"})
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
try:
|
||||
return resp.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _sanctions_hits(*values: str) -> list[dict[str, Any]] | None:
|
||||
hits: list[dict[str, Any]] = []
|
||||
seen: set[str] = set()
|
||||
for value in values:
|
||||
if not value or value in seen:
|
||||
continue
|
||||
seen.add(value)
|
||||
entries = match_exact(value)
|
||||
if entries:
|
||||
hits.append({"matched_value": value, "entries": entries})
|
||||
return hits or None
|
||||
|
||||
|
||||
def lookup_ip(ip: str) -> dict[str, Any]:
|
||||
if not _IPV4_RE.match(ip) and not _IPV6_RE.match(ip):
|
||||
raise ValueError("Invalid IP format")
|
||||
check = validate_host(ip.strip("[]"))
|
||||
if not check.get("ok"):
|
||||
raise ValueError(check.get("reason", "blocked IP"))
|
||||
|
||||
results: dict[str, Any] = {"ip": ip, "timestamp": _now_iso()}
|
||||
fields = (
|
||||
"status,message,continent,country,countryCode,region,regionName,city,zip,"
|
||||
"lat,lon,timezone,isp,org,as,asname,mobile,proxy,hosting,query"
|
||||
)
|
||||
geo = _json_get(f"https://ip-api.com/json/{quote(ip)}?fields={fields}", timeout=5)
|
||||
if isinstance(geo, dict) and geo.get("status") == "success":
|
||||
results["geo"] = {
|
||||
"country": geo.get("country"),
|
||||
"country_code": geo.get("countryCode"),
|
||||
"region": geo.get("regionName"),
|
||||
"city": geo.get("city"),
|
||||
"lat": geo.get("lat"),
|
||||
"lon": geo.get("lon"),
|
||||
"timezone": geo.get("timezone"),
|
||||
"isp": geo.get("isp"),
|
||||
"org": geo.get("org"),
|
||||
"as_number": geo.get("as"),
|
||||
"as_name": geo.get("asname"),
|
||||
"is_mobile": geo.get("mobile"),
|
||||
"is_proxy": geo.get("proxy"),
|
||||
"is_hosting": geo.get("hosting"),
|
||||
}
|
||||
results["reputation"] = {
|
||||
"is_proxy": bool(geo.get("proxy")),
|
||||
"is_hosting": bool(geo.get("hosting")),
|
||||
"is_mobile": bool(geo.get("mobile")),
|
||||
"risk_level": "HIGH" if geo.get("proxy") else "MEDIUM" if geo.get("hosting") else "LOW",
|
||||
}
|
||||
sm = _sanctions_hits(geo.get("org") or "", geo.get("isp") or "", geo.get("asname") or "")
|
||||
if sm:
|
||||
results["sanctions_match"] = {"source": "OFAC SDN", "hits": sm}
|
||||
return results
|
||||
|
||||
|
||||
def lookup_dns(domain: str) -> dict[str, Any]:
|
||||
if not validate_domain(domain):
|
||||
raise ValueError("Invalid domain format")
|
||||
results: dict[str, Any] = {"domain": domain, "records": {}, "timestamp": _now_iso()}
|
||||
for rtype in ("A", "AAAA", "MX", "NS", "TXT", "CNAME", "SOA"):
|
||||
data = _json_get(
|
||||
f"https://dns.google/resolve?name={quote(domain)}&type={rtype}",
|
||||
timeout=5,
|
||||
)
|
||||
answers = []
|
||||
if isinstance(data, dict):
|
||||
for ans in data.get("Answer") or []:
|
||||
answers.append(
|
||||
{
|
||||
"name": ans.get("name"),
|
||||
"type": ans.get("type"),
|
||||
"ttl": ans.get("TTL"),
|
||||
"data": ans.get("data"),
|
||||
}
|
||||
)
|
||||
results["records"][rtype] = answers
|
||||
a_records = results["records"].get("A") or []
|
||||
mx_records = results["records"].get("MX") or []
|
||||
ns_records = results["records"].get("NS") or []
|
||||
results["summary"] = {
|
||||
"ip_addresses": [r["data"] for r in a_records if r.get("data")],
|
||||
"mail_servers": [r["data"] for r in mx_records if r.get("data")],
|
||||
"nameservers": [r["data"] for r in ns_records if r.get("data")],
|
||||
"total_records": sum(len(v) for v in results["records"].values()),
|
||||
}
|
||||
return results
|
||||
|
||||
|
||||
def lookup_whois(domain: str) -> dict[str, Any]:
|
||||
if not validate_domain(domain):
|
||||
raise ValueError("Invalid domain format")
|
||||
results: dict[str, Any] = {"domain": domain, "timestamp": _now_iso()}
|
||||
rdap = _json_get(f"https://rdap.org/domain/{quote(domain)}", timeout=8)
|
||||
if isinstance(rdap, dict):
|
||||
entities = []
|
||||
for ent in rdap.get("entities") or []:
|
||||
vcard = ent.get("vcardArray")
|
||||
name = org = None
|
||||
if isinstance(vcard, list) and len(vcard) > 1:
|
||||
for row in vcard[1]:
|
||||
if row[0] == "fn":
|
||||
name = row[3]
|
||||
if row[0] == "org":
|
||||
org = row[3]
|
||||
if name or org:
|
||||
entities.append({"handle": ent.get("handle"), "roles": ent.get("roles"), "name": name, "org": org})
|
||||
events = [
|
||||
{"action": e.get("eventAction"), "date": e.get("eventDate")}
|
||||
for e in (rdap.get("events") or [])
|
||||
]
|
||||
results["rdap"] = {
|
||||
"handle": rdap.get("handle"),
|
||||
"name": rdap.get("ldhName"),
|
||||
"status": rdap.get("status"),
|
||||
"events": events,
|
||||
"nameservers": [ns.get("ldhName") for ns in (rdap.get("nameservers") or [])],
|
||||
"entities": entities,
|
||||
}
|
||||
results["registration"] = next((e["date"] for e in events if e["action"] == "registration"), None)
|
||||
results["expiration"] = next((e["date"] for e in events if e["action"] == "expiration"), None)
|
||||
results["last_changed"] = next((e["date"] for e in events if e["action"] == "last changed"), None)
|
||||
sm = _sanctions_hits(*(e.get("name") or "" for e in entities), *(e.get("org") or "" for e in entities))
|
||||
if sm:
|
||||
results["sanctions_match"] = {"source": "OFAC SDN", "hits": sm}
|
||||
|
||||
try:
|
||||
res = safe_get(f"https://{domain}", timeout=5, headers={"User-Agent": "Shadowbroker-OSINT/1.0"})
|
||||
headers = {}
|
||||
for h in (
|
||||
"server",
|
||||
"x-powered-by",
|
||||
"x-frame-options",
|
||||
"strict-transport-security",
|
||||
"content-security-policy",
|
||||
"x-content-type-options",
|
||||
"x-xss-protection",
|
||||
"referrer-policy",
|
||||
"permissions-policy",
|
||||
):
|
||||
val = res.headers.get(h)
|
||||
if val:
|
||||
headers[h] = val
|
||||
score = sum(
|
||||
1
|
||||
for k in (
|
||||
"strict-transport-security",
|
||||
"content-security-policy",
|
||||
"x-frame-options",
|
||||
"x-content-type-options",
|
||||
"referrer-policy",
|
||||
)
|
||||
if k in headers
|
||||
) + (2 if "strict-transport-security" in headers else 0) + (2 if "content-security-policy" in headers else 0)
|
||||
results["http"] = {"status": res.status_code, "headers": headers, "final_url": res.url}
|
||||
results["security_score"] = {
|
||||
"score": score,
|
||||
"max": 7,
|
||||
"grade": "A" if score >= 5 else "B" if score >= 3 else "C" if score >= 1 else "F",
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.debug("WHOIS header probe failed for %s: %s", domain, exc)
|
||||
return results
|
||||
|
||||
|
||||
def lookup_certs(domain: str) -> dict[str, Any]:
|
||||
if not validate_domain(domain):
|
||||
raise ValueError("Invalid domain format")
|
||||
resp = fetch_with_curl(
|
||||
f"https://crt.sh/?q=%25.{quote(domain)}&output=json",
|
||||
timeout=10,
|
||||
headers={"User-Agent": "Shadowbroker-OSINT/1.0"},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return {"domain": domain, "certificates": [], "error": "crt.sh unavailable"}
|
||||
try:
|
||||
certs = resp.json()
|
||||
except Exception:
|
||||
certs = []
|
||||
seen: set[str] = set()
|
||||
subdomains: set[str] = set()
|
||||
unique: list[dict[str, Any]] = []
|
||||
for cert in (certs or [])[:200]:
|
||||
key = f"{cert.get('common_name')}-{cert.get('serial_number')}"
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
for name in (cert.get("name_value") or "").split("\n"):
|
||||
clean = name.strip().replace("*.", "")
|
||||
if clean.endswith(domain):
|
||||
subdomains.add(clean)
|
||||
unique.append(
|
||||
{
|
||||
"id": cert.get("id"),
|
||||
"issuer": cert.get("issuer_name"),
|
||||
"common_name": cert.get("common_name"),
|
||||
"not_before": cert.get("not_before"),
|
||||
"not_after": cert.get("not_after"),
|
||||
}
|
||||
)
|
||||
return {
|
||||
"domain": domain,
|
||||
"certificates": unique[:50],
|
||||
"subdomains": sorted(subdomains)[:100],
|
||||
"total_found": len(certs or []),
|
||||
"timestamp": _now_iso(),
|
||||
}
|
||||
|
||||
|
||||
def lookup_threats(query: str | None = None) -> dict[str, Any]:
|
||||
results: dict[str, Any] = {"timestamp": _now_iso()}
|
||||
pulses = _json_get("https://otx.alienvault.com/api/v1/pulses/activity?limit=10", timeout=8)
|
||||
if isinstance(pulses, dict):
|
||||
results["pulses"] = [
|
||||
{
|
||||
"name": p.get("name"),
|
||||
"description": (p.get("description") or "")[:200],
|
||||
"created": p.get("created"),
|
||||
"tags": (p.get("tags") or [])[:5],
|
||||
"adversary": p.get("adversary"),
|
||||
"indicators_count": p.get("indicator_count"),
|
||||
}
|
||||
for p in (pulses.get("results") or [])[:10]
|
||||
]
|
||||
if query:
|
||||
if _IPV4_RE.match(query):
|
||||
try:
|
||||
tor_resp = fetch_with_curl("https://check.torproject.org/torbulkexitlist", timeout=5)
|
||||
results["tor_exit_node"] = query in (tor_resp.text or "").splitlines() if tor_resp.status_code == 200 else None
|
||||
except Exception:
|
||||
results["tor_exit_node"] = None
|
||||
otx = _json_get(f"https://otx.alienvault.com/api/v1/indicators/IPv4/{quote(query)}/general", timeout=5)
|
||||
if isinstance(otx, dict):
|
||||
results["otx"] = {
|
||||
"reputation": otx.get("reputation"),
|
||||
"pulse_count": (otx.get("pulse_info") or {}).get("count", 0),
|
||||
"country": otx.get("country_name"),
|
||||
"asn": otx.get("asn"),
|
||||
}
|
||||
elif validate_domain(query):
|
||||
otx = _json_get(f"https://otx.alienvault.com/api/v1/indicators/domain/{quote(query)}/general", timeout=5)
|
||||
if isinstance(otx, dict):
|
||||
results["otx"] = {"pulse_count": (otx.get("pulse_info") or {}).get("count", 0)}
|
||||
pulse_count = (results.get("otx") or {}).get("pulse_count", 0)
|
||||
results["threat_level"] = "HIGH" if pulse_count > 5 else "MEDIUM" if pulse_count > 0 else "LOW"
|
||||
return results
|
||||
|
||||
|
||||
def lookup_bgp(query: str) -> dict[str, Any]:
|
||||
results: dict[str, Any] = {"query": query, "timestamp": _now_iso()}
|
||||
if _IPV4_RE.match(query):
|
||||
data = _json_get(f"https://api.bgpview.io/ip/{quote(query)}", timeout=8)
|
||||
if isinstance(data, dict) and data.get("status") == "ok":
|
||||
results["ip"] = data.get("data")
|
||||
results["type"] = "ip"
|
||||
return results
|
||||
if _ASN_RE.match(query):
|
||||
asn_num = re.sub(r"^AS", "", query, flags=re.I)
|
||||
asn = _json_get(f"https://api.bgpview.io/asn/{asn_num}", timeout=8)
|
||||
prefixes = _json_get(f"https://api.bgpview.io/asn/{asn_num}/prefixes", timeout=8)
|
||||
peers = _json_get(f"https://api.bgpview.io/asn/{asn_num}/peers", timeout=8)
|
||||
if isinstance(asn, dict) and asn.get("status") == "ok":
|
||||
results["asn"] = asn.get("data")
|
||||
if isinstance(prefixes, dict) and prefixes.get("status") == "ok":
|
||||
pdata = prefixes.get("data") or {}
|
||||
results["prefixes"] = {
|
||||
"ipv4": (pdata.get("ipv4_prefixes") or [])[:20],
|
||||
"ipv6": (pdata.get("ipv6_prefixes") or [])[:10],
|
||||
"total_v4": len(pdata.get("ipv4_prefixes") or []),
|
||||
"total_v6": len(pdata.get("ipv6_prefixes") or []),
|
||||
}
|
||||
if isinstance(peers, dict) and peers.get("status") == "ok":
|
||||
pdata = peers.get("data") or {}
|
||||
results["peers"] = {
|
||||
"upstream": (pdata.get("ipv4_peers") or [])[:10],
|
||||
"total": len(pdata.get("ipv4_peers") or []),
|
||||
}
|
||||
results["type"] = "asn"
|
||||
return results
|
||||
raise ValueError("Unrecognized query format. Use IP address or AS number.")
|
||||
|
||||
|
||||
def lookup_sanctions(query: str, *, schema: str | None = None, limit: int = 25) -> dict[str, Any]:
|
||||
matches = search_sanctions(query, schema=schema, limit=limit)
|
||||
return {
|
||||
"query": query,
|
||||
"schema": schema,
|
||||
"total": len(matches),
|
||||
"matches": matches,
|
||||
"source": "OpenSanctions / US OFAC SDN",
|
||||
"timestamp": _now_iso(),
|
||||
}
|
||||
|
||||
|
||||
def lookup_cve(cve: str) -> dict[str, Any]:
|
||||
if not _CVE_RE.match(cve):
|
||||
raise ValueError("Invalid CVE format")
|
||||
cve_id = cve.upper()
|
||||
data = _json_get(f"https://cveawg.mitre.org/api/cve/{quote(cve_id)}", timeout=8)
|
||||
if isinstance(data, dict) and data.get("cveMetadata"):
|
||||
meta = data["cveMetadata"]
|
||||
desc = ""
|
||||
for block in (data.get("containers") or {}).get("cna", {}).get("descriptions") or []:
|
||||
if block.get("lang") == "en":
|
||||
desc = block.get("value") or desc
|
||||
return {"id": meta.get("cveId", cve_id), "description": desc or "No description.", "timestamp": _now_iso()}
|
||||
fallback = _json_get(f"https://cve.circl.lu/api/cve/{quote(cve_id)}", timeout=8)
|
||||
if isinstance(fallback, dict):
|
||||
return {
|
||||
"id": fallback.get("id", cve_id),
|
||||
"description": fallback.get("summary") or "No description.",
|
||||
"cvss": fallback.get("cvss"),
|
||||
"references": (fallback.get("references") or [])[:5],
|
||||
"timestamp": _now_iso(),
|
||||
}
|
||||
raise ValueError("CVE not found")
|
||||
|
||||
|
||||
def lookup_mac(mac: str) -> dict[str, Any]:
|
||||
clean = mac.strip().upper()
|
||||
clean = re.sub(r"[^A-F0-9:-]", "", clean)
|
||||
data = _json_get(f"https://api.macvendors.com/{quote(clean)}", timeout=8)
|
||||
if isinstance(data, dict):
|
||||
return {"mac": clean, "vendor": data.get("company") or data.get("organization") or "Not Found"}
|
||||
if isinstance(data, str) and data:
|
||||
return {"mac": clean, "vendor": data}
|
||||
return {"mac": clean, "vendor": "Not Found"}
|
||||
|
||||
|
||||
def lookup_github(username: str) -> dict[str, Any]:
|
||||
user = _json_get(f"https://api.github.com/users/{quote(username)}", timeout=8)
|
||||
if not isinstance(user, dict) or user.get("message") == "Not Found":
|
||||
raise ValueError("GitHub user not found")
|
||||
repos = _json_get(f"https://api.github.com/users/{quote(username)}/repos?per_page=10&sort=updated", timeout=8)
|
||||
return {
|
||||
"username": username,
|
||||
"profile": {
|
||||
"name": user.get("name"),
|
||||
"bio": user.get("bio"),
|
||||
"company": user.get("company"),
|
||||
"location": user.get("location"),
|
||||
"public_repos": user.get("public_repos"),
|
||||
"followers": user.get("followers"),
|
||||
"created_at": user.get("created_at"),
|
||||
"html_url": user.get("html_url"),
|
||||
},
|
||||
"repos": [
|
||||
{"name": r.get("name"), "language": r.get("language"), "stars": r.get("stargazers_count")}
|
||||
for r in (repos or [])[:10]
|
||||
if isinstance(r, dict)
|
||||
],
|
||||
"timestamp": _now_iso(),
|
||||
}
|
||||
|
||||
|
||||
def lookup_leaks(email: str) -> dict[str, Any]:
|
||||
if "@" not in email or len(email) < 5:
|
||||
raise ValueError("Invalid email")
|
||||
# HIBP requires API key for v3; use public breach directory style via leak-lookup (rate limited)
|
||||
data = _json_get(f"https://leakcheck.io/api/public?check={quote(email)}", timeout=8)
|
||||
if isinstance(data, dict):
|
||||
return {
|
||||
"email": email,
|
||||
"found": bool(data.get("found")),
|
||||
"sources": data.get("sources") or [],
|
||||
"timestamp": _now_iso(),
|
||||
}
|
||||
return {"email": email, "found": False, "sources": [], "timestamp": _now_iso()}
|
||||
|
||||
|
||||
def sweep_init(ip: str, cidr: int = 24) -> dict[str, Any]:
|
||||
try:
|
||||
addr = ipaddress.IPv4Address(ip)
|
||||
except ValueError as exc:
|
||||
raise ValueError("Invalid IPv4 address format") from exc
|
||||
if addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_reserved:
|
||||
raise ValueError("Private and reserved IP ranges are not allowed")
|
||||
if cidr < 24 or cidr > 32:
|
||||
raise ValueError("CIDR must be between 24 and 32")
|
||||
|
||||
fields = "status,message,country,countryCode,region,regionName,city,lat,lon,isp,org,as,proxy,hosting"
|
||||
geo = _json_get(f"https://ip-api.com/json/{quote(ip)}?fields={fields}", timeout=5)
|
||||
if not isinstance(geo, dict) or geo.get("status") != "success":
|
||||
raise ValueError(f"Geolocation failed: {(geo or {}).get('message', 'unknown')}")
|
||||
return {
|
||||
"center": {
|
||||
"lat": geo.get("lat"),
|
||||
"lng": geo.get("lon"),
|
||||
"city": geo.get("city"),
|
||||
"region": geo.get("regionName"),
|
||||
"country": geo.get("country"),
|
||||
"countryCode": geo.get("countryCode"),
|
||||
"isp": geo.get("isp"),
|
||||
"asn": geo.get("as") or "",
|
||||
"org": geo.get("org") or "",
|
||||
},
|
||||
"target_ip": ip,
|
||||
"cidr": cidr,
|
||||
}
|
||||
|
||||
|
||||
def _internetdb_lookup(ip: str) -> dict[str, Any] | None:
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
f"https://internetdb.shodan.io/{quote(ip)}",
|
||||
timeout=4,
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
if resp.status_code == 404:
|
||||
return None
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
return resp.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def sweep_scan(subnet_start: str, cidr: int, *, max_workers: int = 12) -> dict[str, Any]:
|
||||
"""Scan a /24-/32 via Shodan InternetDB (server-side proxy)."""
|
||||
base = int(ipaddress.IPv4Address(subnet_start))
|
||||
host_count = 2 ** (32 - cidr)
|
||||
if host_count > 256:
|
||||
raise ValueError("Subnet too large")
|
||||
ips = [str(ipaddress.IPv4Address(base + i)) for i in range(host_count)]
|
||||
devices: list[dict[str, Any]] = []
|
||||
t0 = time.time()
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
||||
futures = {pool.submit(_internetdb_lookup, ip): ip for ip in ips}
|
||||
for fut in as_completed(futures):
|
||||
ip = futures[fut]
|
||||
data = fut.result()
|
||||
if not data:
|
||||
continue
|
||||
devices.append(
|
||||
{
|
||||
"ip": data.get("ip") or ip,
|
||||
"ports": data.get("ports") or [],
|
||||
"hostnames": data.get("hostnames") or [],
|
||||
"cpes": data.get("cpes") or [],
|
||||
"vulns": data.get("vulns") or [],
|
||||
"tags": data.get("tags") or [],
|
||||
}
|
||||
)
|
||||
return {
|
||||
"devices": devices,
|
||||
"summary": {"total_hosts": host_count, "total_responsive": len(devices)},
|
||||
"sweep_time_ms": int((time.time() - t0) * 1000),
|
||||
}
|
||||
|
||||
|
||||
def subnet_start_for(ip: str, cidr: int) -> str:
|
||||
net = ipaddress.IPv4Network(f"{ip}/{cidr}", strict=False)
|
||||
return str(net.network_address)
|
||||
@@ -0,0 +1,135 @@
|
||||
"""OpenClaw dispatch for the operator recon / OSINT lookup toolkit."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from services.osint import lookups
|
||||
from services.osint_intel.resolve import ALLOWED_TYPES, resolve_entity
|
||||
|
||||
_OSINT_TOOLS: dict[str, str] = {
|
||||
"ip": "ip",
|
||||
"dns": "domain",
|
||||
"whois": "domain",
|
||||
"certs": "domain",
|
||||
"threats": "query",
|
||||
"bgp": "query",
|
||||
"sanctions": "query",
|
||||
"cve": "cve",
|
||||
"mac": "mac",
|
||||
"github": "username",
|
||||
"leaks": "email",
|
||||
"sweep_init": "ip",
|
||||
}
|
||||
|
||||
_ENTITY_SCHEMAS = frozenset({
|
||||
"Person",
|
||||
"Organization",
|
||||
"Company",
|
||||
"Vessel",
|
||||
"Airplane",
|
||||
"LegalEntity",
|
||||
})
|
||||
|
||||
|
||||
def _require_str(args: dict[str, Any], *keys: str) -> str:
|
||||
for key in keys:
|
||||
value = str(args.get(key, "") or "").strip()
|
||||
if value:
|
||||
return value
|
||||
joined = "/".join(keys)
|
||||
raise ValueError(f"Missing required argument: {joined}")
|
||||
|
||||
|
||||
def run_osint_lookup(tool: str, args: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Run a passive OSINT lookup (same backends as /api/osint/*)."""
|
||||
name = str(tool or "").strip().lower().replace("-", "_")
|
||||
if name not in _OSINT_TOOLS:
|
||||
allowed = ", ".join(sorted(_OSINT_TOOLS))
|
||||
raise ValueError(f"Unknown OSINT tool '{tool}'. Allowed: {allowed}")
|
||||
|
||||
if name == "ip":
|
||||
return lookups.lookup_ip(_require_str(args, "ip", "query", "value"))
|
||||
if name == "dns":
|
||||
return lookups.lookup_dns(_require_str(args, "domain", "query", "value"))
|
||||
if name == "whois":
|
||||
return lookups.lookup_whois(_require_str(args, "domain", "query", "value"))
|
||||
if name == "certs":
|
||||
return lookups.lookup_certs(_require_str(args, "domain", "query", "value"))
|
||||
if name == "threats":
|
||||
query = str(args.get("query", "") or args.get("value", "") or "").strip() or None
|
||||
return lookups.lookup_threats(query)
|
||||
if name == "bgp":
|
||||
return lookups.lookup_bgp(_require_str(args, "query", "asn", "value"))
|
||||
if name == "sanctions":
|
||||
query = _require_str(args, "query", "name", "value")
|
||||
schema = str(args.get("schema", "") or "").strip() or None
|
||||
if schema and schema not in _ENTITY_SCHEMAS:
|
||||
allowed = ", ".join(sorted(_ENTITY_SCHEMAS))
|
||||
raise ValueError(f"Invalid schema. Allowed: {allowed}")
|
||||
limit = args.get("limit", 25)
|
||||
try:
|
||||
limit = int(limit)
|
||||
except (TypeError, ValueError):
|
||||
limit = 25
|
||||
limit = max(1, min(100, limit))
|
||||
return lookups.lookup_sanctions(query, schema=schema, limit=limit)
|
||||
if name == "cve":
|
||||
return lookups.lookup_cve(_require_str(args, "cve", "query", "value"))
|
||||
if name == "mac":
|
||||
return lookups.lookup_mac(_require_str(args, "mac", "query", "value"))
|
||||
if name == "github":
|
||||
return lookups.lookup_github(_require_str(args, "username", "user", "query", "value"))
|
||||
if name == "leaks":
|
||||
return lookups.lookup_leaks(_require_str(args, "email", "query", "value"))
|
||||
if name == "sweep_init":
|
||||
ip = _require_str(args, "ip", "query", "value")
|
||||
cidr = args.get("cidr", 24)
|
||||
try:
|
||||
cidr = int(cidr)
|
||||
except (TypeError, ValueError):
|
||||
cidr = 24
|
||||
return lookups.sweep_init(ip, cidr)
|
||||
|
||||
raise ValueError(f"Unhandled OSINT tool: {name}")
|
||||
|
||||
|
||||
def run_osint_sweep(args: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Run subnet device discovery (Shodan InternetDB proxy). Requires full access tier."""
|
||||
ip = _require_str(args, "ip", "query", "value")
|
||||
cidr = args.get("cidr", 24)
|
||||
try:
|
||||
cidr = int(cidr)
|
||||
except (TypeError, ValueError):
|
||||
cidr = 24
|
||||
subnet = lookups.subnet_start_for(ip, cidr)
|
||||
scan = lookups.sweep_scan(subnet, cidr)
|
||||
init = lookups.sweep_init(ip, cidr)
|
||||
return {**init, **scan, "subnet": f"{subnet}/{cidr}"}
|
||||
|
||||
|
||||
def run_entity_expand(args: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Expand an entity graph node (aircraft, vessel, IP, company, person, country)."""
|
||||
entity_type = _require_str(args, "type", "entity_type")
|
||||
entity_id = _require_str(args, "id", "entity_id", "query", "value")
|
||||
props = {
|
||||
"label": entity_id,
|
||||
"registration": str(args.get("registration", "") or "").strip() or None,
|
||||
"model": str(args.get("model", "") or "").strip() or None,
|
||||
"icao24": str(args.get("icao24", "") or "").strip() or None,
|
||||
}
|
||||
props = {key: value for key, value in props.items() if value is not None}
|
||||
return resolve_entity(entity_type, entity_id, props)
|
||||
|
||||
|
||||
def osint_tool_help() -> dict[str, Any]:
|
||||
"""Discovery metadata for agents."""
|
||||
return {
|
||||
"tools": sorted(_OSINT_TOOLS),
|
||||
"entity_types": sorted(ALLOWED_TYPES),
|
||||
"sanctions_schemas": sorted(_ENTITY_SCHEMAS),
|
||||
"notes": {
|
||||
"osint_lookup": "Passive lookups — same data as the Recon panel /api/osint/* routes.",
|
||||
"osint_sweep": "Active subnet scan via Shodan InternetDB — requires full OpenClaw access tier.",
|
||||
"entity_expand": "Build a relationship graph around aircraft, vessels, IPs, companies, people, or countries.",
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
"""Entity graph resolution (Osiris intel layer port)."""
|
||||
@@ -0,0 +1,268 @@
|
||||
"""Entity graph resolver (Python port of Osiris intel/server.js)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
from typing import Any
|
||||
from urllib.parse import quote
|
||||
|
||||
from services.network_utils import fetch_with_curl
|
||||
from services.sanctions.ofac import match_exact, search_sanctions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ALLOWED_TYPES = frozenset({"aircraft", "vessel", "company", "person", "ip", "country"})
|
||||
_WD_CACHE: dict[str, tuple[float, dict[str, Any]]] = {}
|
||||
_WD_LOCK = threading.Lock()
|
||||
_WD_TTL = 24 * 60 * 60
|
||||
_WD_UA = "Shadowbroker-Intel/1.0 (ontology engine)"
|
||||
|
||||
|
||||
def _dedup(nodes: list[dict], links: list[dict]) -> dict[str, Any]:
|
||||
node_map: dict[str, dict] = {}
|
||||
for n in nodes:
|
||||
node_map[n["id"]] = n
|
||||
seen_links: set[str] = set()
|
||||
out_links: list[dict] = []
|
||||
for link in links:
|
||||
key = f"{link['source']}→{link['target']}→{link['label']}"
|
||||
if key in seen_links:
|
||||
continue
|
||||
seen_links.add(key)
|
||||
out_links.append(link)
|
||||
return {"nodes": list(node_map.values()), "links": out_links}
|
||||
|
||||
|
||||
def _wd_cache_get(key: str) -> dict[str, Any] | None:
|
||||
with _WD_LOCK:
|
||||
entry = _WD_CACHE.get(key)
|
||||
if not entry:
|
||||
return None
|
||||
ts, data = entry
|
||||
if time.time() - ts > _WD_TTL:
|
||||
_WD_CACHE.pop(key, None)
|
||||
return None
|
||||
return data
|
||||
|
||||
|
||||
def _wd_cache_set(key: str, data: dict[str, Any]) -> None:
|
||||
with _WD_LOCK:
|
||||
if len(_WD_CACHE) > 5000:
|
||||
oldest = next(iter(_WD_CACHE))
|
||||
_WD_CACHE.pop(oldest, None)
|
||||
_WD_CACHE[key] = (time.time(), data)
|
||||
|
||||
|
||||
def _add_sanctions(id_label: str, root_id: str, nodes: list, links: list) -> None:
|
||||
for hit in search_sanctions(id_label, limit=3):
|
||||
sid = f"sanction:{hit['id']}"
|
||||
nodes.append(
|
||||
{
|
||||
"id": sid,
|
||||
"label": hit["name"],
|
||||
"type": "sanction",
|
||||
"properties": {"programs": hit.get("programs"), "source": "OFAC SDN"},
|
||||
}
|
||||
)
|
||||
links.append({"source": root_id, "target": sid, "label": "SANCTIONS MATCH"})
|
||||
|
||||
|
||||
def _sparql(query: str) -> list[dict[str, Any]]:
|
||||
url = f"https://query.wikidata.org/sparql?query={quote(query)}&format=json"
|
||||
resp = fetch_with_curl(url, timeout=10, headers={"User-Agent": _WD_UA, "Accept": "application/sparql-results+json"})
|
||||
if resp.status_code != 200:
|
||||
return []
|
||||
try:
|
||||
data = resp.json()
|
||||
except Exception:
|
||||
return []
|
||||
return data.get("results", {}).get("bindings", [])
|
||||
|
||||
|
||||
def _wd_search(label: str) -> str | None:
|
||||
url = (
|
||||
"https://www.wikidata.org/w/api.php?action=wbsearchentities"
|
||||
f"&search={quote(label)}&language=en&limit=1&format=json"
|
||||
)
|
||||
resp = fetch_with_curl(url, timeout=5, headers={"User-Agent": _WD_UA})
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
try:
|
||||
hits = resp.json().get("search") or []
|
||||
except Exception:
|
||||
return None
|
||||
return hits[0]["id"] if hits else None
|
||||
|
||||
|
||||
def _resolve_ip(id_value: str) -> dict[str, Any]:
|
||||
cache_key = f"ip:{id_value}"
|
||||
cached = _wd_cache_get(cache_key)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
root_id = f"ip:{id_value}"
|
||||
nodes: list[dict] = [{"id": root_id, "label": id_value, "type": "ip", "properties": {}}]
|
||||
links: list[dict] = []
|
||||
|
||||
geo = fetch_with_curl(
|
||||
f"https://ip-api.com/json/{quote(id_value)}"
|
||||
"?fields=status,country,countryCode,city,lat,lon,isp,org,as,asname,proxy,hosting,mobile",
|
||||
timeout=8,
|
||||
)
|
||||
if geo.status_code == 200:
|
||||
try:
|
||||
data = geo.json()
|
||||
except Exception:
|
||||
data = {}
|
||||
if data.get("status") == "success":
|
||||
nodes[0]["properties"] = {
|
||||
"proxy": bool(data.get("proxy")),
|
||||
"hosting": bool(data.get("hosting")),
|
||||
"mobile": bool(data.get("mobile")),
|
||||
"source": "ip-api.com",
|
||||
}
|
||||
if data.get("isp"):
|
||||
iid = f"company:{data['isp']}"
|
||||
nodes.append({"id": iid, "label": data["isp"], "type": "company", "properties": {"role": "ISP"}})
|
||||
links.append({"source": root_id, "target": iid, "label": "HOSTED_BY"})
|
||||
if data.get("country"):
|
||||
cid = f"country:{data['country']}"
|
||||
nodes.append(
|
||||
{
|
||||
"id": cid,
|
||||
"label": data["country"],
|
||||
"type": "country",
|
||||
"properties": {"code": data.get("countryCode")},
|
||||
}
|
||||
)
|
||||
links.append({"source": root_id, "target": cid, "label": "LOCATED_IN"})
|
||||
for val in (data.get("isp"), data.get("org"), data.get("asname")):
|
||||
if val:
|
||||
for entry in match_exact(val):
|
||||
sid = f"sanction:{entry['id']}"
|
||||
nodes.append({"id": sid, "label": entry["name"], "type": "sanction", "properties": {}})
|
||||
links.append({"source": root_id, "target": sid, "label": "SANCTIONS MATCH"})
|
||||
|
||||
whois = fetch_with_curl(
|
||||
f"https://stat.ripe.net/data/whois/data.json?resource={quote(id_value)}",
|
||||
timeout=8,
|
||||
)
|
||||
if whois.status_code == 200:
|
||||
try:
|
||||
records = whois.json().get("data", {}).get("records") or []
|
||||
except Exception:
|
||||
records = []
|
||||
for record in records:
|
||||
for field in record:
|
||||
if field.get("key") in ("netname", "NetName"):
|
||||
nid = f"company:{field['value']}"
|
||||
nodes.append({"id": nid, "label": field["value"], "type": "company", "properties": {"role": "Network"}})
|
||||
links.append({"source": root_id, "target": nid, "label": "HOSTED_BY"})
|
||||
|
||||
result = _dedup(nodes, links)
|
||||
_wd_cache_set(cache_key, result)
|
||||
return result
|
||||
|
||||
|
||||
def _resolve_company(id_value: str) -> dict[str, Any]:
|
||||
cache_key = f"company:{id_value}"
|
||||
cached = _wd_cache_get(cache_key)
|
||||
if cached:
|
||||
return cached
|
||||
root_id = f"company:{id_value}"
|
||||
nodes = [{"id": root_id, "label": id_value, "type": "company", "properties": {}}]
|
||||
links: list[dict] = []
|
||||
safe = re.sub(r'[^a-zA-Z0-9 \-._]', '', id_value).strip()
|
||||
qid = _wd_search(safe)
|
||||
filt = f"VALUES ?item {{ wd:{qid} }}" if qid else f'?item rdfs:label "{safe}"@en . ?item wdt:P31/wdt:P279* wd:Q4830453 .'
|
||||
rows = _sparql(
|
||||
f"""
|
||||
SELECT ?countryLabel ?parentLabel ?ceoLabel WHERE {{
|
||||
{filt}
|
||||
OPTIONAL {{ ?item wdt:P17 ?country . }}
|
||||
OPTIONAL {{ ?item wdt:P749 ?parent . }}
|
||||
OPTIONAL {{ ?item wdt:P169 ?ceo . }}
|
||||
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }}
|
||||
}} LIMIT 10
|
||||
"""
|
||||
)
|
||||
for row in rows:
|
||||
if row.get("countryLabel", {}).get("value"):
|
||||
cid = f"country:{row['countryLabel']['value']}"
|
||||
nodes.append({"id": cid, "label": row["countryLabel"]["value"], "type": "country", "properties": {}})
|
||||
links.append({"source": root_id, "target": cid, "label": "HEADQUARTERED"})
|
||||
if row.get("parentLabel", {}).get("value"):
|
||||
pid = f"company:{row['parentLabel']['value']}"
|
||||
nodes.append({"id": pid, "label": row["parentLabel"]["value"], "type": "company", "properties": {}})
|
||||
links.append({"source": root_id, "target": pid, "label": "PARENT ORG"})
|
||||
if row.get("ceoLabel", {}).get("value"):
|
||||
pid = f"person:{row['ceoLabel']['value']}"
|
||||
nodes.append({"id": pid, "label": row["ceoLabel"]["value"], "type": "person", "properties": {"role": "CEO"}})
|
||||
links.append({"source": root_id, "target": pid, "label": "CEO"})
|
||||
_add_sanctions(id_value, root_id, nodes, links)
|
||||
result = _dedup(nodes, links)
|
||||
_wd_cache_set(cache_key, result)
|
||||
return result
|
||||
|
||||
|
||||
def _resolve_from_store(entity_type: str, id_value: str, props: dict[str, Any]) -> dict[str, Any]:
|
||||
from services.fetchers._store import get_latest_data_subset_refs
|
||||
|
||||
root_id = f"{entity_type}:{id_value}"
|
||||
nodes = [{"id": root_id, "label": props.get("label") or id_value, "type": entity_type, "properties": props}]
|
||||
links: list[dict] = []
|
||||
data = get_latest_data_subset_refs("flights", "ships", "military_flights", "tracked_flights")
|
||||
|
||||
if entity_type == "aircraft":
|
||||
icao = (props.get("icao24") or id_value).lower()
|
||||
for bucket in ("military_flights", "tracked_flights", "flights"):
|
||||
for f in data.get(bucket) or []:
|
||||
if str(f.get("icao24", "")).lower() == icao:
|
||||
if f.get("country"):
|
||||
cid = f"country:{f['country']}"
|
||||
nodes.append({"id": cid, "label": f["country"], "type": "country", "properties": {}})
|
||||
links.append({"source": root_id, "target": cid, "label": "REGISTERED_IN"})
|
||||
if f.get("registration"):
|
||||
nodes[0]["properties"]["registration"] = f["registration"]
|
||||
break
|
||||
elif entity_type == "vessel":
|
||||
mmsi = str(props.get("mmsi") or id_value)
|
||||
for ship in data.get("ships") or []:
|
||||
if str(ship.get("mmsi")) == mmsi:
|
||||
if ship.get("country"):
|
||||
cid = f"country:{ship['country']}"
|
||||
nodes.append({"id": cid, "label": ship["country"], "type": "country", "properties": {}})
|
||||
links.append({"source": root_id, "target": cid, "label": "FLAG"})
|
||||
break
|
||||
_add_sanctions(id_value, root_id, nodes, links)
|
||||
return _dedup(nodes, links)
|
||||
|
||||
|
||||
def resolve_entity(entity_type: str, id_value: str, properties: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||
etype = (entity_type or "").lower().strip()
|
||||
eid = (id_value or "").strip()
|
||||
if etype not in ALLOWED_TYPES:
|
||||
raise ValueError(f"Invalid type. Allowed: {', '.join(sorted(ALLOWED_TYPES))}")
|
||||
if len(eid) < 2 or len(eid) > 200:
|
||||
raise ValueError("Invalid id (2-200 chars)")
|
||||
props = properties or {}
|
||||
|
||||
if etype == "ip":
|
||||
return _resolve_ip(eid)
|
||||
if etype in ("company", "person", "country"):
|
||||
if etype == "company":
|
||||
return _resolve_company(eid)
|
||||
if etype == "person":
|
||||
root_id = f"person:{eid}"
|
||||
nodes = [{"id": root_id, "label": eid, "type": "person", "properties": {}}]
|
||||
links: list[dict] = []
|
||||
_add_sanctions(eid, root_id, nodes, links)
|
||||
return _dedup(nodes, links)
|
||||
root_id = f"country:{eid}"
|
||||
nodes = [{"id": root_id, "label": eid, "type": "country", "properties": {}}]
|
||||
links = []
|
||||
_add_sanctions(eid, root_id, nodes, links)
|
||||
return _dedup(nodes, links)
|
||||
return _resolve_from_store(etype, eid, props)
|
||||
@@ -0,0 +1,81 @@
|
||||
"""Operator opt-in for Polymarket/Kalshi outbound fetches (Global Threat Intercept)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_OPT_IN_FILE = Path(__file__).resolve().parent.parent / "data" / "prediction_markets_opt_in.json"
|
||||
_OPT_IN_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _env_flag(name: str) -> str:
|
||||
return str(os.getenv(name, "")).strip().lower()
|
||||
|
||||
|
||||
def get_prediction_markets_ui_opt_in() -> bool:
|
||||
if not _OPT_IN_FILE.exists():
|
||||
return False
|
||||
try:
|
||||
payload = json.loads(_OPT_IN_FILE.read_text(encoding="utf-8"))
|
||||
return bool(payload.get("opted_in"))
|
||||
except (OSError, json.JSONDecodeError, TypeError) as exc:
|
||||
logger.warning("Prediction markets opt-in file unreadable: %s", exc)
|
||||
return False
|
||||
|
||||
|
||||
def set_prediction_markets_ui_opt_in(opted_in: bool) -> None:
|
||||
_OPT_IN_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
with _OPT_IN_LOCK:
|
||||
_OPT_IN_FILE.write_text(
|
||||
json.dumps({"opted_in": bool(opted_in)}, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def prediction_markets_env_forced_on() -> bool:
|
||||
return _env_flag("PREDICTION_MARKETS_ENABLED") in {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def prediction_markets_env_forced_off() -> bool:
|
||||
return _env_flag("PREDICTION_MARKETS_ENABLED") in {"0", "false", "no", "off"}
|
||||
|
||||
|
||||
def prediction_markets_fetch_enabled() -> bool:
|
||||
"""True when UI opt-in or env enables Polymarket/Kalshi pulls."""
|
||||
if get_prediction_markets_ui_opt_in():
|
||||
return True
|
||||
return prediction_markets_env_forced_on()
|
||||
|
||||
|
||||
def prediction_markets_status() -> dict[str, Any]:
|
||||
ui_opted_in = get_prediction_markets_ui_opt_in()
|
||||
env_on = prediction_markets_env_forced_on()
|
||||
env_off = prediction_markets_env_forced_off()
|
||||
env_override = None
|
||||
if env_on:
|
||||
env_override = "on"
|
||||
elif env_off:
|
||||
env_override = "off"
|
||||
return {
|
||||
"enabled": prediction_markets_fetch_enabled(),
|
||||
"ui_opted_in": ui_opted_in,
|
||||
"env_override": env_override,
|
||||
"jitter": {
|
||||
"scheduler_interval_minutes": int(
|
||||
os.environ.get("PREDICTION_MARKETS_INTERVAL_MINUTES", "7")
|
||||
),
|
||||
"scheduler_jitter_seconds": int(
|
||||
os.environ.get("PREDICTION_MARKETS_SCHEDULER_JITTER_S", "240")
|
||||
),
|
||||
"pre_fetch_jitter_seconds": float(
|
||||
os.environ.get("PREDICTION_MARKETS_PRE_FETCH_JITTER_S", "90")
|
||||
),
|
||||
},
|
||||
}
|
||||
@@ -301,3 +301,36 @@ def get_region_dossier(lat: float, lng: float) -> dict:
|
||||
|
||||
dossier_cache[cache_key] = result
|
||||
return result
|
||||
|
||||
|
||||
def fetch_wikipedia_page_summary(title: str) -> dict | None:
|
||||
"""Wikipedia REST summary for a page title (backend-proxied for #360)."""
|
||||
trimmed = (title or "").strip()
|
||||
if not trimmed:
|
||||
return None
|
||||
data = _fetch_local_wiki_summary(trimmed, "")
|
||||
if not data.get("extract") and not data.get("description"):
|
||||
return None
|
||||
return {
|
||||
"title": trimmed,
|
||||
"description": data.get("description", ""),
|
||||
"extract": data.get("extract", ""),
|
||||
"thumbnail": data.get("thumbnail", ""),
|
||||
"type": "standard",
|
||||
}
|
||||
|
||||
|
||||
def fetch_wikidata_sparql_bindings(sparql: str) -> list:
|
||||
"""Run a Wikidata SPARQL query; returns bindings list (empty on failure)."""
|
||||
trimmed = (sparql or "").strip()
|
||||
if not trimmed:
|
||||
return []
|
||||
url = f"https://query.wikidata.org/sparql?query={quote(trimmed)}&format=json"
|
||||
try:
|
||||
res = fetch_with_curl(url, timeout=8, headers=_wikimedia_request_headers())
|
||||
if res.status_code == 200:
|
||||
bindings = res.json().get("results", {}).get("bindings", [])
|
||||
return bindings if isinstance(bindings, list) else []
|
||||
except (ConnectionError, TimeoutError, ValueError, KeyError, OSError) as e:
|
||||
logger.warning("Wikidata SPARQL failed: %s", e)
|
||||
return []
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
"""Sentinel-2 road corridor freight trend analysis (DrishX engine port)."""
|
||||
|
||||
from .config import optional_deps_available, road_corridor_sat_enabled
|
||||
|
||||
__all__ = ["optional_deps_available", "road_corridor_sat_enabled"]
|
||||
@@ -0,0 +1,4 @@
|
||||
from .cli import main
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,53 @@
|
||||
"""CLI for manual road corridor analysis runs."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from .config import optional_deps_available, road_corridor_sat_enabled
|
||||
from .credentials import sentinel_credentials_configured
|
||||
from .pipeline import analyze_preset
|
||||
from .presets import CORRIDOR_PRESETS
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description="Run Sentinel-2 road corridor truck trend analysis")
|
||||
parser.add_argument("--preset", required=True, help="Preset id (e.g. laredo_i35)")
|
||||
parser.add_argument("-v", "--verbose", action="store_true")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
||||
|
||||
if not optional_deps_available():
|
||||
print(
|
||||
"Install optional deps: uv sync --extra road-corridor "
|
||||
"(geopandas, osmnx, rasterio, sentinelhub, scikit-learn, imageio)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 2
|
||||
if not road_corridor_sat_enabled() and not args.verbose:
|
||||
print("Note: ROAD_CORRIDOR_SAT_ENABLED is off — CLI still runs for manual analysis.")
|
||||
if not sentinel_credentials_configured():
|
||||
print("Set SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET first.", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
valid = {p["id"] for p in CORRIDOR_PRESETS}
|
||||
if args.preset not in valid:
|
||||
print(f"Unknown preset {args.preset!r}. Choose from: {', '.join(sorted(valid))}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
def progress(msg: str, pct: int | None = None) -> None:
|
||||
suffix = f" ({pct}%)" if pct is not None else ""
|
||||
print(f"{msg}{suffix}")
|
||||
|
||||
result = analyze_preset(args.preset, progress_cb=progress)
|
||||
print(
|
||||
f"Done: {result.get('total_detections', 0)} detections across "
|
||||
f"{len(result.get('daily_counts') or [])} days — status={result.get('status')}"
|
||||
)
|
||||
return 0 if result.get("status") == "ok" else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,41 @@
|
||||
"""Configuration for Sentinel-2 road corridor trend analysis."""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
_BACKEND_ROOT = Path(__file__).resolve().parents[2]
|
||||
DATA_ROOT = Path(os.environ.get("ROAD_CORRIDOR_DATA_DIR", str(_BACKEND_ROOT / "data" / "road_corridors")))
|
||||
CACHE_DIR = DATA_ROOT / "cache"
|
||||
DETECTION_CROP_DIR = DATA_ROOT / "detection_crops"
|
||||
STATE_PATH = DATA_ROOT / "_refresh_state.json"
|
||||
|
||||
DEFAULT_MONTHS = int(os.environ.get("ROAD_CORRIDOR_MONTHS", "2"))
|
||||
DEFAULT_MAX_FRAMES = int(os.environ.get("ROAD_CORRIDOR_MAX_FRAMES", "6"))
|
||||
SCHEDULED_PRESET_IDS = [
|
||||
s.strip()
|
||||
for s in os.environ.get("ROAD_CORRIDOR_SCHEDULED_PRESETS", "laredo_i35").split(",")
|
||||
if s.strip()
|
||||
]
|
||||
|
||||
|
||||
def road_corridor_sat_enabled() -> bool:
|
||||
return os.environ.get("ROAD_CORRIDOR_SAT_ENABLED", "").strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
|
||||
def optional_deps_available() -> bool:
|
||||
try:
|
||||
import geopandas # noqa: F401
|
||||
import osmnx # noqa: F401
|
||||
import rasterio # noqa: F401
|
||||
import sentinelhub # noqa: F401
|
||||
import sklearn # noqa: F401
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
@@ -0,0 +1,37 @@
|
||||
"""Reuse Shadowbroker Sentinel Hub / Copernicus CDSE credentials."""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
from .config import CACHE_DIR
|
||||
|
||||
|
||||
def resolve_sentinel_credentials() -> tuple[str, str]:
|
||||
client_id = (os.environ.get("SENTINEL_CLIENT_ID") or "").strip()
|
||||
client_secret = (os.environ.get("SENTINEL_CLIENT_SECRET") or "").strip()
|
||||
return client_id, client_secret
|
||||
|
||||
|
||||
def sentinel_credentials_configured() -> bool:
|
||||
client_id, client_secret = resolve_sentinel_credentials()
|
||||
return bool(client_id and client_secret)
|
||||
|
||||
|
||||
def build_sh_config():
|
||||
from sentinelhub import SHConfig
|
||||
|
||||
client_id, client_secret = resolve_sentinel_credentials()
|
||||
if not client_id or not client_secret:
|
||||
raise RuntimeError(
|
||||
"SENTINEL_CLIENT_ID and SENTINEL_CLIENT_SECRET are required for road corridor analysis"
|
||||
)
|
||||
config = SHConfig()
|
||||
config.sh_client_id = client_id
|
||||
config.sh_client_secret = client_secret
|
||||
config.sh_base_url = "https://sh.dataspace.copernicus.eu"
|
||||
config.sh_token_url = (
|
||||
"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
|
||||
)
|
||||
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
config.cache_dir = str(CACHE_DIR / "sentinelhub")
|
||||
return config
|
||||
@@ -0,0 +1,149 @@
|
||||
"""In-memory job queue for on-demand Analyze Here runs."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_lock = threading.Lock()
|
||||
_jobs: dict[str, AnalyzeJob] = {}
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalyzeJob:
|
||||
job_id: str
|
||||
lat: float
|
||||
lon: float
|
||||
status: str = "queued"
|
||||
message: str = "Queued"
|
||||
progress: int = 0
|
||||
result: dict[str, Any] | None = None
|
||||
error: str | None = None
|
||||
|
||||
|
||||
def get_job(job_id: str) -> AnalyzeJob | None:
|
||||
with _lock:
|
||||
return _jobs.get(job_id)
|
||||
|
||||
|
||||
def get_latest_job() -> AnalyzeJob | None:
|
||||
with _lock:
|
||||
if not _jobs:
|
||||
return None
|
||||
return max(_jobs.values(), key=lambda j: j.job_id)
|
||||
|
||||
|
||||
def _running_job() -> AnalyzeJob | None:
|
||||
with _lock:
|
||||
for job in _jobs.values():
|
||||
if job.status in {"queued", "running"}:
|
||||
return job
|
||||
return None
|
||||
|
||||
|
||||
def _prune_jobs(max_keep: int = 8) -> None:
|
||||
with _lock:
|
||||
if len(_jobs) <= max_keep:
|
||||
return
|
||||
ordered = sorted(_jobs.items(), key=lambda item: item[0], reverse=True)
|
||||
for job_id, _ in ordered[max_keep:]:
|
||||
_jobs.pop(job_id, None)
|
||||
|
||||
|
||||
def _worker(job_id: str, lat: float, lon: float, label: str | None) -> None:
|
||||
from services.fetchers.road_corridor_sat import refresh_road_corridor_store
|
||||
|
||||
from .pipeline import analyze_corridor
|
||||
from .viewport import adhoc_preset_id, bbox_around_point, default_label_for_point
|
||||
|
||||
job = get_job(job_id)
|
||||
if job is None:
|
||||
return
|
||||
|
||||
def progress(msg: str, pct: int | None = None) -> None:
|
||||
with _lock:
|
||||
current = _jobs.get(job_id)
|
||||
if current is None:
|
||||
return
|
||||
current.message = msg
|
||||
if pct is not None:
|
||||
current.progress = pct
|
||||
|
||||
with _lock:
|
||||
job.status = "running"
|
||||
job.message = "Starting road corridor analysis"
|
||||
job.progress = 0
|
||||
|
||||
try:
|
||||
bbox = bbox_around_point(lat, lon)
|
||||
preset_id = adhoc_preset_id(lat, lon)
|
||||
corridor_label = label or default_label_for_point(lat, lon)
|
||||
result = analyze_corridor(
|
||||
preset_id=preset_id,
|
||||
label=corridor_label,
|
||||
bbox=bbox,
|
||||
country="adhoc",
|
||||
category="viewport",
|
||||
progress_cb=progress,
|
||||
)
|
||||
refresh_road_corridor_store()
|
||||
with _lock:
|
||||
current = _jobs.get(job_id)
|
||||
if current is None:
|
||||
return
|
||||
current.status = "ok" if result.get("status") == "ok" else "error"
|
||||
current.result = result
|
||||
current.error = result.get("error")
|
||||
current.message = (
|
||||
f"{result.get('total_detections', 0)} signatures · "
|
||||
f"{len(result.get('daily_counts') or [])} days"
|
||||
)
|
||||
current.progress = 100
|
||||
except Exception as exc:
|
||||
logger.exception("road corridor analyze job %s failed", job_id)
|
||||
with _lock:
|
||||
current = _jobs.get(job_id)
|
||||
if current is None:
|
||||
return
|
||||
current.status = "error"
|
||||
current.error = str(exc)
|
||||
current.message = "Analysis failed"
|
||||
current.progress = 100
|
||||
|
||||
|
||||
def enqueue_analyze(lat: float, lon: float, label: str | None = None) -> AnalyzeJob:
|
||||
running = _running_job()
|
||||
if running is not None:
|
||||
raise RuntimeError("analysis_already_running")
|
||||
|
||||
job_id = uuid.uuid4().hex[:12]
|
||||
job = AnalyzeJob(job_id=job_id, lat=lat, lon=lon)
|
||||
with _lock:
|
||||
_jobs[job_id] = job
|
||||
_prune_jobs()
|
||||
|
||||
thread = threading.Thread(
|
||||
target=_worker,
|
||||
args=(job_id, lat, lon, label),
|
||||
name=f"road-corridor-analyze-{job_id}",
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
return job
|
||||
|
||||
|
||||
def job_to_dict(job: AnalyzeJob) -> dict[str, Any]:
|
||||
return {
|
||||
"job_id": job.job_id,
|
||||
"lat": job.lat,
|
||||
"lon": job.lon,
|
||||
"status": job.status,
|
||||
"message": job.message,
|
||||
"progress": job.progress,
|
||||
"result": job.result,
|
||||
"error": job.error,
|
||||
}
|
||||
@@ -0,0 +1,216 @@
|
||||
"""Run Sentinel-2 road-corridor truck trend analysis for a bbox preset."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections.abc import Callable
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any
|
||||
|
||||
from .config import CACHE_DIR, DEFAULT_MAX_FRAMES, DEFAULT_MONTHS, DETECTION_CROP_DIR
|
||||
from .storage import store_analysis_result
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ProgressCb = Callable[[str, int | None], None]
|
||||
|
||||
_EVALSCRIPT = """//VERSION=3
|
||||
function setup() {
|
||||
return {
|
||||
input: ["B02", "B03", "B04", "B08", "CLM"],
|
||||
output: { id: "default", bands: 5, sampleType: "FLOAT32" }
|
||||
};
|
||||
}
|
||||
function evaluatePixel(s) {
|
||||
return [s.B04, s.B03, s.B02, s.B08, s.CLM];
|
||||
}"""
|
||||
|
||||
|
||||
def _noop_progress(_msg: str, _pct: int | None = None) -> None:
|
||||
return None
|
||||
|
||||
|
||||
def analyze_corridor(
|
||||
*,
|
||||
preset_id: str,
|
||||
label: str,
|
||||
bbox: list[float],
|
||||
country: str = "",
|
||||
category: str = "",
|
||||
months: int = DEFAULT_MONTHS,
|
||||
max_frames: int = DEFAULT_MAX_FRAMES,
|
||||
progress_cb: ProgressCb | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Synchronously analyze one corridor bbox and persist daily truck-count trends."""
|
||||
from rasterio import features as rio_features
|
||||
from rasterio import transform as rio_transform
|
||||
from sentinelhub import BBox, CRS, DataCollection, MimeType, SentinelHubCatalog, SentinelHubRequest
|
||||
|
||||
from .credentials import build_sh_config
|
||||
from .s2_truck_detect import S2TruckEngine
|
||||
|
||||
progress = progress_cb or _noop_progress
|
||||
min_lat, min_lon, max_lat, max_lon = bbox
|
||||
if abs(max_lat - min_lat) > 0.5 or abs(max_lon - min_lon) > 0.5:
|
||||
raise ValueError("AOI too large. Max strategic sector is ~55 km x 55 km.")
|
||||
|
||||
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
engine = S2TruckEngine(
|
||||
cache_dir=str(CACHE_DIR),
|
||||
detection_dir=str(DETECTION_CROP_DIR),
|
||||
)
|
||||
config = build_sh_config()
|
||||
|
||||
progress(f"Road discovery for {label}", 10)
|
||||
roads = engine.fetch_roads(bbox)
|
||||
if roads.empty:
|
||||
return store_analysis_result(
|
||||
preset_id,
|
||||
label=label,
|
||||
bbox=bbox,
|
||||
country=country,
|
||||
category=category,
|
||||
road_count=0,
|
||||
frame_count=0,
|
||||
detections=[],
|
||||
status="error",
|
||||
error="No major roads found in AOI.",
|
||||
)
|
||||
|
||||
progress(f"Found {len(roads)} road segments — querying Copernicus catalog", 25)
|
||||
sh_bbox = BBox(bbox=[min_lon, min_lat, max_lon, max_lat], crs=CRS.WGS84)
|
||||
catalog = SentinelHubCatalog(config=config)
|
||||
end_date = datetime.utcnow()
|
||||
start_date = end_date - timedelta(days=max(1, months) * 30)
|
||||
cdse_collection = DataCollection.SENTINEL2_L2A.define_from(
|
||||
"s2l2a",
|
||||
service_url=config.sh_base_url,
|
||||
)
|
||||
search_results = list(
|
||||
catalog.search(
|
||||
cdse_collection,
|
||||
bbox=sh_bbox,
|
||||
datetime=(
|
||||
f"{start_date.strftime('%Y-%m-%dT00:00:00Z')}/"
|
||||
f"{end_date.strftime('%Y-%m-%dT23:59:59Z')}"
|
||||
),
|
||||
filter="eo:cloud_cover < 60",
|
||||
fields={"include": ["properties.datetime", "id"], "exclude": []},
|
||||
)
|
||||
)
|
||||
unique_scenes: dict[str, Any] = {}
|
||||
for res in search_results:
|
||||
date_key = res["properties"]["datetime"][:10]
|
||||
if date_key not in unique_scenes:
|
||||
unique_scenes[date_key] = res
|
||||
final_obs = [unique_scenes[d] for d in sorted(unique_scenes.keys(), reverse=True)]
|
||||
final_obs = final_obs[: max(1, max_frames)]
|
||||
if not final_obs:
|
||||
return store_analysis_result(
|
||||
preset_id,
|
||||
label=label,
|
||||
bbox=bbox,
|
||||
country=country,
|
||||
category=category,
|
||||
road_count=len(roads),
|
||||
frame_count=0,
|
||||
detections=[],
|
||||
status="error",
|
||||
error=f"No clear imagery found in the last {months} months.",
|
||||
)
|
||||
|
||||
def _fetch_frame(idx: int, res_obs: dict[str, Any]):
|
||||
try:
|
||||
date_str = res_obs["properties"]["datetime"]
|
||||
req_sh = SentinelHubRequest(
|
||||
evalscript=_EVALSCRIPT,
|
||||
input_data=[
|
||||
SentinelHubRequest.input_data(
|
||||
data_collection=cdse_collection,
|
||||
time_interval=(date_str, date_str),
|
||||
)
|
||||
],
|
||||
responses=[SentinelHubRequest.output_response("default", MimeType.TIFF)],
|
||||
bbox=sh_bbox,
|
||||
config=config,
|
||||
)
|
||||
data_list = req_sh.get_data()
|
||||
if not data_list:
|
||||
return idx, date_str, None
|
||||
return idx, date_str, data_list[0]
|
||||
except Exception as exc:
|
||||
logger.error("Sentinel frame %s failed: %s", idx, exc)
|
||||
return idx, None, None
|
||||
|
||||
progress(f"Seed frame 1/{len(final_obs)}", 35)
|
||||
_, seed_ts, seed_data = _fetch_frame(0, final_obs[0])
|
||||
if seed_data is None:
|
||||
return store_analysis_result(
|
||||
preset_id,
|
||||
label=label,
|
||||
bbox=bbox,
|
||||
country=country,
|
||||
category=category,
|
||||
road_count=len(roads),
|
||||
frame_count=0,
|
||||
detections=[],
|
||||
status="error",
|
||||
error="Failed to acquire seed spectral data.",
|
||||
)
|
||||
|
||||
roads_buf = roads.to_crs(epsg=3857).buffer(20).to_crs(epsg=4326)
|
||||
h, w = seed_data.shape[:2]
|
||||
trans = rio_transform.from_bounds(min_lon, min_lat, max_lon, max_lat, w, h)
|
||||
road_mask = rio_features.rasterize(
|
||||
[(geom.__geo_interface__, 1) for geom in roads_buf.geometry],
|
||||
out_shape=(h, w),
|
||||
transform=trans,
|
||||
fill=0,
|
||||
all_touched=True,
|
||||
)
|
||||
|
||||
detections: list[dict[str, Any]] = []
|
||||
detections.extend(engine.detect_trucks(seed_data, bbox, final_obs[0]["properties"]["datetime"], road_mask))
|
||||
|
||||
if len(final_obs) > 1:
|
||||
progress(f"Parallel frames ({len(final_obs) - 1} remaining)", 45)
|
||||
with ThreadPoolExecutor(max_workers=3, thread_name_prefix="road-corridor") as executor:
|
||||
futures = {
|
||||
executor.submit(_fetch_frame, i, final_obs[i]): i for i in range(1, len(final_obs))
|
||||
}
|
||||
done = 1
|
||||
for future in as_completed(futures):
|
||||
idx, date_str, frame_data = future.result()
|
||||
done += 1
|
||||
if frame_data is not None and date_str:
|
||||
detections.extend(engine.detect_trucks(frame_data, bbox, date_str, road_mask))
|
||||
progress(f"Frame {done}/{len(final_obs)}", 45 + int((done / len(final_obs)) * 50))
|
||||
|
||||
progress(f"Complete — {len(detections)} truck signatures", 100)
|
||||
return store_analysis_result(
|
||||
preset_id,
|
||||
label=label,
|
||||
bbox=bbox,
|
||||
country=country,
|
||||
category=category,
|
||||
road_count=len(roads),
|
||||
frame_count=len(final_obs),
|
||||
detections=detections,
|
||||
status="ok",
|
||||
)
|
||||
|
||||
|
||||
def analyze_preset(preset_id: str, progress_cb: ProgressCb | None = None) -> dict[str, Any]:
|
||||
from .presets import get_preset
|
||||
|
||||
preset = get_preset(preset_id)
|
||||
if preset is None:
|
||||
raise KeyError(f"Unknown preset: {preset_id}")
|
||||
return analyze_corridor(
|
||||
preset_id=preset["id"],
|
||||
label=preset["label"],
|
||||
bbox=preset["bbox"],
|
||||
country=preset["country"],
|
||||
category=preset["category"],
|
||||
progress_cb=progress_cb,
|
||||
)
|
||||
@@ -0,0 +1,59 @@
|
||||
"""Preset freight / chokepoint corridors for scheduled trend analysis."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TypedDict
|
||||
|
||||
|
||||
class CorridorPreset(TypedDict):
|
||||
id: str
|
||||
label: str
|
||||
bbox: list[float] # [min_lat, min_lon, max_lat, max_lon]
|
||||
country: str
|
||||
category: str
|
||||
|
||||
|
||||
# Bboxes are small (~5–10 km) highway segments suitable for 10 m Sentinel-2 analysis.
|
||||
CORRIDOR_PRESETS: list[CorridorPreset] = [
|
||||
{
|
||||
"id": "laredo_i35",
|
||||
"label": "Laredo I-35 (US–Mexico freight)",
|
||||
"bbox": [27.48, -99.58, 27.54, -99.48],
|
||||
"country": "USA / Mexico",
|
||||
"category": "border_crossing",
|
||||
},
|
||||
{
|
||||
"id": "bandar_abbas_feeder",
|
||||
"label": "Bandar Abbas port feeder (Highway 71)",
|
||||
"bbox": [27.12, 56.22, 27.22, 56.38],
|
||||
"country": "Iran",
|
||||
"category": "port_feeder",
|
||||
},
|
||||
{
|
||||
"id": "rotterdam_a15",
|
||||
"label": "Rotterdam A15 port feeder",
|
||||
"bbox": [51.88, 4.42, 51.96, 4.58],
|
||||
"country": "Netherlands",
|
||||
"category": "port_feeder",
|
||||
},
|
||||
{
|
||||
"id": "mombasa_nairobi_a109",
|
||||
"label": "Mombasa–Nairobi A109 corridor",
|
||||
"bbox": [-4.10, 39.55, -1.20, 37.00],
|
||||
"country": "Kenya",
|
||||
"category": "trade_corridor",
|
||||
},
|
||||
{
|
||||
"id": "braunschweig_a7",
|
||||
"label": "Braunschweig A7 (validation)",
|
||||
"bbox": [52.25, 10.45, 52.32, 10.55],
|
||||
"country": "Germany",
|
||||
"category": "validation",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def get_preset(preset_id: str) -> CorridorPreset | None:
|
||||
for preset in CORRIDOR_PRESETS:
|
||||
if preset["id"] == preset_id:
|
||||
return preset
|
||||
return None
|
||||
@@ -0,0 +1,731 @@
|
||||
"""S2 truck motion detection core (DrishX / Fisser et al. 2022 — see third_party/drishx/NOTICE.md)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import imageio.v3 as imageio
|
||||
import numpy as np
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
from shapely.geometry import LineString
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SECONDS_OFFSET_B02_B04 = 1.01
|
||||
|
||||
OVERPASS_MIRRORS = [
|
||||
"https://lz4.overpass-api.de/api/interpreter",
|
||||
"https://z.overpass-api.de/api/interpreter",
|
||||
"https://overpass.osm.ch/api/interpreter",
|
||||
"https://overpass-api.de/api/interpreter",
|
||||
]
|
||||
|
||||
_session = requests.Session()
|
||||
_retry = Retry(
|
||||
total=2,
|
||||
backoff_factor=1.0,
|
||||
status_forcelist=[429, 500, 502, 503, 504],
|
||||
allowed_methods=["GET", "POST"],
|
||||
)
|
||||
_adapter = HTTPAdapter(max_retries=_retry)
|
||||
_session.mount("http://", _adapter)
|
||||
_session.mount("https://", _adapter)
|
||||
|
||||
|
||||
def _configure_osmnx(data_dir: str) -> None:
|
||||
import osmnx as ox
|
||||
|
||||
ox.settings.requests_session = _session
|
||||
ox.settings.requests_timeout = 30
|
||||
ox.settings.overpass_rate_limit = False
|
||||
ox.settings.max_query_area_size = 1_000_000_000_000
|
||||
ox.settings.log_console = False
|
||||
ox.settings.use_cache = True
|
||||
ox.settings.cache_folder = os.path.join(data_dir, "osm_cache")
|
||||
|
||||
|
||||
def _default_rf_model_path() -> str:
|
||||
return str(Path(__file__).resolve().parents[2] / "data" / "drishx" / "rf_model.pickle")
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Helper math — mirrors S2TD.array_utils.math
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def normalized_ratio(a, b):
|
||||
"""(a - b) / (a + b), safe division."""
|
||||
denom = a + b
|
||||
with np.errstate(divide="ignore", invalid="ignore"):
|
||||
result = np.where(denom != 0, (a - b) / denom, 0.0)
|
||||
return result.astype(np.float32)
|
||||
|
||||
|
||||
def rescale_s2(bands):
|
||||
"""Rescale Sentinel-2 L2A reflectance values (typically 0–10000 int) to 0–1 float."""
|
||||
bands = bands.astype(np.float32)
|
||||
if np.nanmax(bands) > 10: # likely DN scale
|
||||
bands /= 10000.0
|
||||
return bands
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Array subset — exact replica of S2TD.pick_arr_subset
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def pick_arr_subset(arr, y, x, size):
|
||||
"""Pick a size×size window centred on (y, x) from a 2D or 3D array."""
|
||||
size_low = size // 2
|
||||
size_up = size // 2
|
||||
if size_low + size_up < size:
|
||||
size_up += 1
|
||||
ymin = max(0, y - size_low)
|
||||
ymax = max(0, y + size_up)
|
||||
xmin = max(0, x - size_low)
|
||||
xmax = max(0, x + size_up)
|
||||
if arr.ndim == 2:
|
||||
return arr[ymin:ymax, xmin:xmax]
|
||||
elif arr.ndim == 3:
|
||||
return arr[:, ymin:ymax, xmin:xmax]
|
||||
return arr
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Feature stack — exact 7 features as in S2TD._build_feature_stack (Table 1)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def build_feature_stack(data):
|
||||
"""
|
||||
Build the 7-feature stack from Sentinel-2 bands.
|
||||
|
||||
Input `data` shape: (H, W, 5) with channels [B04(R), B03(G), B02(B), B08(NIR), CLM].
|
||||
|
||||
Feature order (Table 1, Fisser et al. 2022):
|
||||
0: variance of (B04, B03, B02)
|
||||
1: normalized_ratio(B04, B02) — red / blue
|
||||
2: normalized_ratio(B03, B02) — green / blue
|
||||
3: B04 - mean(B04)
|
||||
4: B03 - mean(B03)
|
||||
5: B02 - mean(B02)
|
||||
6: B08 - mean(B08)
|
||||
"""
|
||||
R = data[:, :, 0].astype(np.float32) # B04
|
||||
G = data[:, :, 1].astype(np.float32) # B03
|
||||
B = data[:, :, 2].astype(np.float32) # B02
|
||||
NIR = data[:, :, 3].astype(np.float32) # B08
|
||||
CLM = data[:, :, 4]
|
||||
|
||||
# Rescale if needed
|
||||
bands = np.stack([R, G, B, NIR], axis=0)
|
||||
bands = rescale_s2(bands)
|
||||
R, G, B, NIR = bands[0], bands[1], bands[2], bands[3]
|
||||
|
||||
# Cloud mask → NaN
|
||||
cloud = CLM > 0
|
||||
R[cloud] = np.nan
|
||||
G[cloud] = np.nan
|
||||
B[cloud] = np.nan
|
||||
NIR[cloud] = np.nan
|
||||
|
||||
H, W = R.shape
|
||||
fs = np.zeros((7, H, W), dtype=np.float32)
|
||||
|
||||
# Check for any valid data to avoid "Mean of empty slice" warnings
|
||||
if np.any(~cloud):
|
||||
import warnings
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", category=RuntimeWarning)
|
||||
# Feature 0: variance of visible bands
|
||||
fs[0] = np.nanvar(np.stack([R, G, B], axis=0), axis=0, ddof=0)
|
||||
|
||||
# Features 1–2: normalized ratios
|
||||
fs[1] = normalized_ratio(R, B)
|
||||
fs[2] = normalized_ratio(G, B)
|
||||
|
||||
# Features 3–6: mean-centered bands
|
||||
fs[3] = R - np.nanmean(R)
|
||||
fs[4] = G - np.nanmean(G)
|
||||
fs[5] = B - np.nanmean(B)
|
||||
fs[6] = NIR - np.nanmean(NIR)
|
||||
else:
|
||||
# All pixels are cloud-masked
|
||||
fs.fill(np.nan)
|
||||
|
||||
# Ensure NaN consistency
|
||||
nan_mask = np.isnan(fs[3])
|
||||
fs[:, nan_mask] = np.nan
|
||||
|
||||
return {
|
||||
"feature_stack": fs,
|
||||
"bands": {"R": R, "G": G, "B": B, "NIR": NIR},
|
||||
"cloud_mask": cloud,
|
||||
}
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# RF Model loading
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
# Path to the trained Random Forest model from S2TruckDetect
|
||||
RF_MODEL_PATH = _default_rf_model_path()
|
||||
_rf_model = None
|
||||
|
||||
|
||||
def load_rf_model(path=None):
|
||||
"""Load the trained RF model from pickle. Returns None if not found."""
|
||||
global _rf_model
|
||||
p = path or RF_MODEL_PATH
|
||||
if _rf_model is not None:
|
||||
return _rf_model
|
||||
if os.path.isfile(p):
|
||||
try:
|
||||
_rf_model = pickle.load(open(p, "rb"))
|
||||
logger.info(f"Loaded trained RF model from {p}")
|
||||
return _rf_model
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load RF model from {p}: {e}")
|
||||
else:
|
||||
logger.warning(f"RF model not found at {p} — will use proxy classifier (lower accuracy)")
|
||||
return None
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Classification — real RF (preferred) or proxy fallback
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def rf_classify(feature_stack, road_mask, rf_model):
|
||||
"""
|
||||
Classify pixels using the trained Random Forest model.
|
||||
Exact replica of S2TD._predict + _postprocess_prediction.
|
||||
|
||||
:param feature_stack: (7, H, W) feature array
|
||||
:param road_mask: (H, W) binary road mask
|
||||
:param rf_model: trained sklearn RandomForestClassifier
|
||||
:return: (probabilities (4, H, W), prediction (H, W) int8)
|
||||
"""
|
||||
H, W = feature_stack.shape[1], feature_stack.shape[2]
|
||||
|
||||
# Reshape to (n_pixels, 7) for sklearn
|
||||
vars_reshaped = []
|
||||
for band_idx in range(feature_stack.shape[0]):
|
||||
vars_reshaped.append(feature_stack[band_idx].flatten())
|
||||
vars_reshaped = np.array(vars_reshaped).swapaxes(0, 1) # (n_pixels, 7)
|
||||
|
||||
# Build NaN mask — exclude NaN and Inf pixels
|
||||
nan_mask_flat = np.zeros_like(vars_reshaped)
|
||||
for var_idx in range(vars_reshaped.shape[1]):
|
||||
nan_mask_flat[:, var_idx] = ~np.isnan(vars_reshaped[:, var_idx])
|
||||
not_nan = (np.nanmin(nan_mask_flat, axis=1).astype(bool)
|
||||
& np.min(np.isfinite(vars_reshaped), axis=1).astype(bool))
|
||||
|
||||
# Run RF predict_proba on valid pixels only
|
||||
if not np.any(not_nan):
|
||||
# Graceful return if no valid pixels found (e.g., all cloud masked)
|
||||
probabilities_shaped = np.zeros((4, H, W), dtype=np.float32)
|
||||
classification = np.zeros((H, W), dtype=np.int8)
|
||||
return probabilities_shaped, classification
|
||||
|
||||
predictions_flat = rf_model.predict_proba(vars_reshaped[not_nan])
|
||||
|
||||
# Map probabilities back to spatial grid
|
||||
n_classes = predictions_flat.shape[1]
|
||||
probabilities_shaped = np.zeros((n_classes, H * W), dtype=np.float32)
|
||||
for idx in range(n_classes):
|
||||
probabilities_shaped[idx, not_nan] = predictions_flat[:, idx]
|
||||
|
||||
probabilities_shaped = probabilities_shaped.reshape((n_classes, H, W))
|
||||
|
||||
# Zero out NaN positions
|
||||
nan_2d = np.isnan(feature_stack[0])
|
||||
probabilities_shaped[:, nan_2d] = 0
|
||||
|
||||
# Post-process: suppress low-confidence background (exact S2TD logic)
|
||||
probabilities_shaped[1][probabilities_shaped[1] < 0.75] = 0
|
||||
|
||||
classification = np.nanargmax(probabilities_shaped, axis=0).astype(np.int8) + 1
|
||||
classification[np.max(probabilities_shaped, axis=0) == 0] = 0
|
||||
classification[nan_2d] = 0
|
||||
|
||||
# Apply road mask
|
||||
rm = road_mask.astype(bool)
|
||||
classification[~rm] = 0
|
||||
|
||||
return probabilities_shaped, classification
|
||||
|
||||
|
||||
def proxy_classify(feature_stack, road_mask):
|
||||
"""
|
||||
Heuristic proxy when RF model is unavailable. Lower accuracy.
|
||||
|
||||
Produces:
|
||||
probabilities: (4, H, W) — class probs for [background, blue, green, red]
|
||||
prediction: (H, W) — int8 labels {0=nan, 1=background, 2=blue, 3=green, 4=red}
|
||||
"""
|
||||
fs = feature_stack # (7, H, W)
|
||||
H, W = fs.shape[1], fs.shape[2]
|
||||
probs = np.zeros((4, H, W), dtype=np.float32)
|
||||
|
||||
centered_R = fs[3]
|
||||
centered_G = fs[4]
|
||||
centered_B = fs[5]
|
||||
var_feat = fs[0]
|
||||
nratio_rb = fs[1]
|
||||
nratio_gb = fs[2]
|
||||
|
||||
rm = road_mask.astype(bool)
|
||||
nan_mask = np.isnan(centered_R)
|
||||
|
||||
blue_score = np.clip(-nratio_rb * 2 + centered_B * 5 + var_feat * 10, 0, None)
|
||||
blue_score[~rm | nan_mask] = 0
|
||||
|
||||
green_score = np.clip(nratio_gb * 2 + centered_G * 5 + var_feat * 10, 0, None)
|
||||
green_score[~rm | nan_mask] = 0
|
||||
|
||||
red_score = np.clip(nratio_rb * 2 + centered_R * 5 + var_feat * 10, 0, None)
|
||||
red_score[~rm | nan_mask] = 0
|
||||
|
||||
total = blue_score + green_score + red_score + 1e-8
|
||||
probs[1] = blue_score / total
|
||||
probs[2] = green_score / total
|
||||
probs[3] = red_score / total
|
||||
probs[0] = 1.0 - np.max(probs[1:], axis=0)
|
||||
|
||||
probs[0][probs[0] < 0.75] = 0
|
||||
|
||||
classification = np.nanargmax(probs, axis=0).astype(np.int8) + 1
|
||||
classification[np.max(probs, axis=0) == 0] = 0
|
||||
classification[nan_mask] = 0
|
||||
classification[~rm] = 0
|
||||
|
||||
return probs, classification
|
||||
|
||||
|
||||
def classify(feature_stack, road_mask, rf_model=None):
|
||||
"""
|
||||
Unified classifier entry point.
|
||||
Uses trained RF if model is provided, otherwise falls back to proxy.
|
||||
"""
|
||||
if rf_model is not None:
|
||||
logger.debug("Using trained RF model for classification")
|
||||
return rf_classify(feature_stack, road_mask, rf_model)
|
||||
else:
|
||||
logger.debug("Using proxy classifier (no RF model loaded)")
|
||||
return proxy_classify(feature_stack, road_mask)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Object extraction — faithful port of S2TD ObjectExtractor
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class ObjectExtractor:
|
||||
"""
|
||||
Extracts truck objects from the RF prediction raster using recursive
|
||||
neighbourhood clustering, matching the S2TD reference implementation.
|
||||
"""
|
||||
|
||||
def __init__(self, probabilities, lat_arr, lon_arr):
|
||||
"""
|
||||
:param probabilities: (4, H, W) class probabilities
|
||||
:param lat_arr: 1-D array of latitude per row
|
||||
:param lon_arr: 1-D array of longitude per column
|
||||
"""
|
||||
self.probabilities = probabilities
|
||||
self.lat = lat_arr
|
||||
self.lon = lon_arr
|
||||
|
||||
def extract(self, predictions_arr):
|
||||
"""Main extraction loop over all blue (class 2) seed pixels."""
|
||||
preds = predictions_arr.copy()
|
||||
probs = self.probabilities.copy()
|
||||
|
||||
preds[preds == 1] = 0 # zero out background
|
||||
blue_ys, blue_xs = np.where(preds == 2)
|
||||
detections = []
|
||||
sub_size = 9
|
||||
|
||||
for i in range(len(blue_ys)):
|
||||
y_blue, x_blue = int(blue_ys[i]), int(blue_xs[i])
|
||||
if preds[y_blue, x_blue] == 0:
|
||||
continue
|
||||
|
||||
subset_9 = pick_arr_subset(preds, y_blue, x_blue, sub_size).copy()
|
||||
subset_3 = pick_arr_subset(preds, y_blue, x_blue, 3).copy()
|
||||
subset_9_probs = pick_arr_subset(probs, y_blue, x_blue, sub_size).copy()
|
||||
|
||||
half_idx_y = y_blue if subset_9.shape[0] < sub_size else subset_9.shape[0] // 2
|
||||
half_idx_x = x_blue if subset_9.shape[1] < sub_size else subset_9.shape[1] // 2
|
||||
try:
|
||||
current_value = subset_9[half_idx_y, half_idx_x]
|
||||
except IndexError:
|
||||
half_idx_y, half_idx_x = sub_size // 2, sub_size // 2
|
||||
current_value = subset_9[half_idx_y, half_idx_x]
|
||||
|
||||
new_value = 100
|
||||
if not all(v in subset_9 for v in [2, 3, 4]):
|
||||
continue
|
||||
|
||||
cluster, seen_idx, seen_vals, _ = self._cluster_array(
|
||||
arr=subset_9, probs=subset_9_probs,
|
||||
point=[half_idx_y, half_idx_x],
|
||||
new_value=new_value, current_value=current_value,
|
||||
yet_seen_indices=[], yet_seen_values=[],
|
||||
skipped_one=False,
|
||||
)
|
||||
|
||||
if np.count_nonzero(cluster == new_value) < 3:
|
||||
continue
|
||||
|
||||
det = self._postprocess_cluster(
|
||||
cluster, preds, probs, subset_3,
|
||||
y_blue, x_blue,
|
||||
half_idx_y, half_idx_x,
|
||||
new_value,
|
||||
)
|
||||
if det is not None:
|
||||
preds = det["updated_preds"]
|
||||
detections.append(det["detection"])
|
||||
|
||||
return detections
|
||||
|
||||
def _cluster_array(self, arr, probs, point, new_value, current_value,
|
||||
yet_seen_indices, yet_seen_values, skipped_one):
|
||||
"""Recursive neighbourhood clustering — matches S2TD._cluster_array."""
|
||||
if len(yet_seen_indices) == 0:
|
||||
yet_seen_indices.append(point)
|
||||
yet_seen_values.append(current_value)
|
||||
|
||||
arr_mod = arr.copy()
|
||||
arr_mod[point[0], point[1]] = 0
|
||||
|
||||
window_3x3 = pick_arr_subset(arr_mod, point[0], point[1], 3).copy()
|
||||
if window_3x3.shape[0] >= 2 and window_3x3.shape[1] >= 2:
|
||||
cy = min(1, window_3x3.shape[0] - 1)
|
||||
cx = min(1, window_3x3.shape[1] - 1)
|
||||
if window_3x3[cy, cx] == 2:
|
||||
window_3x3[window_3x3 == 4] = 1 # eliminate reds near blue
|
||||
|
||||
y, x = point[0], point[1]
|
||||
window_3x3_probs = pick_arr_subset(probs, y, x, 3)
|
||||
|
||||
windows = [window_3x3]
|
||||
windows_probs = [window_3x3_probs]
|
||||
if current_value == 4 or skipped_one:
|
||||
windows = windows[0:1]
|
||||
|
||||
ys, xs = np.array([], dtype=int), np.array([], dtype=int)
|
||||
window_idx = 0
|
||||
offset_y, offset_x = 0, 0
|
||||
|
||||
while len(ys) == 0 and window_idx < len(windows):
|
||||
window = windows[window_idx]
|
||||
window_p = windows_probs[window_idx]
|
||||
offset_y = window.shape[0] // 2
|
||||
offset_x = window.shape[1] // 2
|
||||
|
||||
go_next = (current_value + 1) in window or current_value == 2
|
||||
target_value = current_value + 1 if go_next else current_value
|
||||
match = window == target_value
|
||||
if np.count_nonzero(match) == 0:
|
||||
target_value = current_value
|
||||
match = window == target_value
|
||||
|
||||
ys_found, xs_found = np.where(match)
|
||||
|
||||
# Probability-based tie-breaking
|
||||
if len(ys_found) > 1 and window_p.ndim == 3 and window_p.shape[0] > (target_value - 1):
|
||||
wp_target = window_p[target_value - 1] * match
|
||||
max_prob_mask = (wp_target == np.max(wp_target))
|
||||
ys_found, xs_found = np.where(max_prob_mask)
|
||||
|
||||
ys, xs = ys_found, xs_found
|
||||
window_idx += 1
|
||||
|
||||
ymin_w = max(0, point[0] - offset_y)
|
||||
xmin_w = max(0, point[1] - offset_x)
|
||||
|
||||
for y_local, x_local in zip(ys, xs):
|
||||
ny, nx = ymin_w + int(y_local), xmin_w + int(x_local)
|
||||
if [ny, nx] in yet_seen_indices:
|
||||
continue
|
||||
if ny < 0 or ny >= arr.shape[0] or nx < 0 or nx >= arr.shape[1]:
|
||||
continue
|
||||
try:
|
||||
cv = arr[ny, nx]
|
||||
except IndexError:
|
||||
continue
|
||||
|
||||
# Red already seen but this is green or blue → skip
|
||||
if 4 in yet_seen_values and cv <= 3:
|
||||
continue
|
||||
|
||||
arr_mod[ny, nx] = new_value
|
||||
yet_seen_indices.append([ny, nx])
|
||||
yet_seen_values.append(cv)
|
||||
|
||||
# Guard: avoid picking many more reds than blues and greens
|
||||
n_blue = sum(1 for v in yet_seen_values if v == 2)
|
||||
n_green = sum(1 for v in yet_seen_values if v == 3)
|
||||
n_red = sum(1 for v in yet_seen_values if v == 4)
|
||||
if n_red > n_blue and n_red > n_green:
|
||||
break
|
||||
|
||||
arr_mod, yet_seen_indices, yet_seen_values, skipped_one = self._cluster_array(
|
||||
arr_mod, probs, [ny, nx], new_value, cv,
|
||||
yet_seen_indices, yet_seen_values, skipped_one,
|
||||
)
|
||||
|
||||
arr_mod[point[0], point[1]] = new_value
|
||||
return arr_mod, yet_seen_indices, yet_seen_values, skipped_one
|
||||
|
||||
def _postprocess_cluster(self, cluster, preds_copy, probs, subset_3,
|
||||
y_blue, x_blue, half_idx_y, half_idx_x,
|
||||
new_value):
|
||||
"""Validate cluster and produce a detection dict — mirrors S2TD._postprocess_cluster."""
|
||||
# Add neighbouring blues from the 3×3 window
|
||||
ys_ba, xs_ba = np.where(subset_3 == 2)
|
||||
ys_ba = ys_ba + half_idx_y - 1
|
||||
xs_ba = xs_ba + half_idx_x - 1
|
||||
for yb, xb in zip(ys_ba, xs_ba):
|
||||
yb_c = int(np.clip(yb, 0, cluster.shape[0] - 1))
|
||||
xb_c = int(np.clip(xb, 0, cluster.shape[1] - 1))
|
||||
cluster[yb_c, xb_c] = new_value
|
||||
|
||||
cluster[cluster != new_value] = 0
|
||||
cys, cxs = np.where(cluster == new_value)
|
||||
if len(cys) == 0:
|
||||
return None
|
||||
|
||||
# Map subset coords back to full array
|
||||
ymin_sub = int(np.clip(y_blue - half_idx_y, 0, np.inf))
|
||||
xmin_sub = int(np.clip(x_blue - half_idx_x, 0, np.inf))
|
||||
cys_full = cys + ymin_sub
|
||||
cxs_full = cxs + xmin_sub
|
||||
|
||||
ymin = int(np.min(cys_full))
|
||||
xmin = int(np.min(cxs_full))
|
||||
ymax = int(np.max(cys_full)) + 1 # +1: box extends to upper bound of pixel
|
||||
xmax = int(np.max(cxs_full)) + 1
|
||||
|
||||
H, W = preds_copy.shape
|
||||
ymin, ymax = max(0, ymin), min(H, ymax)
|
||||
xmin, xmax = max(0, xmin), min(W, xmax)
|
||||
|
||||
box_preds = preds_copy[ymin:ymax, xmin:xmax].copy()
|
||||
box_probs = probs[1:, ymin:ymax, xmin:xmax].copy() # classes 2,3,4 → indices 0,1,2
|
||||
|
||||
# Spectral probability scores (exact S2TD logic)
|
||||
max_probs = []
|
||||
for cls_offset, cls_val in enumerate([2, 3, 4]):
|
||||
mask = (box_preds == cls_val)
|
||||
vals = box_probs[cls_offset] * mask
|
||||
mp = float(np.nanmax(vals)) if np.any(mask) else 0.0
|
||||
max_probs.append(mp)
|
||||
|
||||
mean_max_spectral_probability = float(np.nanmean(max_probs))
|
||||
mean_spectral_probability = float(np.nanmean(np.nanmax(box_probs, axis=0)))
|
||||
|
||||
# Validation checks
|
||||
all_given = all(v in box_preds for v in [2, 3, 4])
|
||||
large_enough = box_preds.shape[0] > 2 or box_preds.shape[1] > 2
|
||||
too_large = box_preds.shape[0] > 5 or box_preds.shape[1] > 5
|
||||
|
||||
if too_large or not all_given or not large_enough:
|
||||
return None
|
||||
|
||||
# Score: TWO terms — matches reference
|
||||
score = mean_max_spectral_probability + mean_spectral_probability
|
||||
if score <= 1.2:
|
||||
return None
|
||||
|
||||
# Direction (blue → red vector)
|
||||
by, bx = np.where(box_preds == 2)
|
||||
ry, rx = np.where(box_preds == 4)
|
||||
blue_idx = np.array([by[0], bx[0]], dtype=np.int8)
|
||||
red_idx = np.array([ry[0], rx[0]], dtype=np.int8)
|
||||
vector = (blue_idx - red_idx) * np.array([1, -1], dtype=np.int8)
|
||||
heading = float(np.degrees(np.arctan2(vector[1], vector[0])) % 360)
|
||||
|
||||
# Speed
|
||||
diameter = max(box_preds.shape) * 10 - 10
|
||||
speed_kmh = float(np.sqrt(diameter * 20) / SECONDS_OFFSET_B02_B04 * 3.6)
|
||||
|
||||
# Geo-coordinates (centre of detection box)
|
||||
lat_centre = float((self.lat[ymin] + self.lat[min(ymax, len(self.lat) - 1)]) / 2)
|
||||
lon_centre = float((self.lon[xmin] + self.lon[min(xmax, len(self.lon) - 1)]) / 2)
|
||||
|
||||
# Zero out detected pixels to prevent re-detection
|
||||
preds_copy[ymin:ymax, xmin:xmax] *= np.zeros_like(box_preds)
|
||||
# Also zero 3×3 around blue pixels
|
||||
blue_in_box = np.where(box_preds == 2)
|
||||
for yb, xb in zip(blue_in_box[0], blue_in_box[1]):
|
||||
y0, y1 = max(0, ymin + yb - 1), min(H, ymin + yb + 2)
|
||||
x0, x1 = max(0, xmin + xb - 1), min(W, xmin + xb + 2)
|
||||
preds_copy[y0:y1, x0:x1] *= (preds_copy[y0:y1, x0:x1] != 2).astype(np.int8)
|
||||
|
||||
crop_id = f"truck_{int(time.time() * 1000)}_{ymin}_{xmin}.png"
|
||||
|
||||
return {
|
||||
"updated_preds": preds_copy,
|
||||
"detection": {
|
||||
"lat": lat_centre,
|
||||
"lon": lon_centre,
|
||||
"confidence": float(min(score / 2.4, 1.0)),
|
||||
"s_score": round(score, 3),
|
||||
"speed_kmh": round(speed_kmh, 1),
|
||||
"heading": round(heading, 1),
|
||||
"heading_desc": self._direction_to_compass(heading),
|
||||
"id": crop_id,
|
||||
"image_url": f"/detections/{crop_id}",
|
||||
"box_shape": list(box_preds.shape),
|
||||
"max_probs": {"blue": max_probs[0], "green": max_probs[1], "red": max_probs[2]},
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _direction_to_compass(deg):
|
||||
bins = np.arange(0, 359, 45, dtype=np.float32)
|
||||
labels = ["N", "NE", "E", "SE", "S", "SW", "W", "NW"]
|
||||
return labels[int(np.argmin(np.abs(bins - deg)))]
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# ARGUS Engine
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class S2TruckEngine:
|
||||
def __init__(self, *, cache_dir: str, detection_dir: str, rf_model_path: str | None = None):
|
||||
self.cache_dir = cache_dir
|
||||
self.detection_dir = detection_dir
|
||||
os.makedirs(self.detection_dir, exist_ok=True)
|
||||
_configure_osmnx(cache_dir)
|
||||
self.rf_model = load_rf_model(rf_model_path)
|
||||
|
||||
def fetch_roads(self, bbox_coords, progress_cb=None):
|
||||
"""Fetch major roads with automatic mirror rotation and fallbacks."""
|
||||
import geopandas as gpd
|
||||
import osmnx as ox
|
||||
|
||||
def log(msg, level="info", pct=None):
|
||||
if level == "info":
|
||||
logger.info(msg)
|
||||
elif level == "warn":
|
||||
logger.warning(msg)
|
||||
if progress_cb:
|
||||
progress_cb(msg, pct)
|
||||
|
||||
min_lat, min_lon, max_lat, max_lon = bbox_coords
|
||||
center_lat = (min_lat + max_lat) / 2
|
||||
center_lon = (min_lon + max_lon) / 2
|
||||
|
||||
lat_span = (max_lat - min_lat) * 111000
|
||||
lon_span = (max_lon - min_lon) * 111000 * np.cos(np.radians(center_lat))
|
||||
dist_m = int(max(lat_span, lon_span) * 0.6) + 1000
|
||||
|
||||
log(f"Starting road discovery (ROI: {center_lat:.4f}, {center_lon:.4f})", pct=5)
|
||||
|
||||
for i, mirror in enumerate(OVERPASS_MIRRORS):
|
||||
log(f"Trying mirror {i+1}/{len(OVERPASS_MIRRORS)}: {mirror}", pct=10 + i * 5)
|
||||
ox.settings.overpass_url = mirror
|
||||
try:
|
||||
graph = ox.graph_from_point(
|
||||
(center_lat, center_lon), dist=dist_m,
|
||||
network_type="drive", simplify=True,
|
||||
retain_all=False, truncate_by_edge=True,
|
||||
)
|
||||
roads = ox.graph_to_gdfs(graph, nodes=False)
|
||||
major_types = [
|
||||
"motorway", "trunk", "primary", "secondary",
|
||||
"motorway_link", "trunk_link", "primary_link",
|
||||
]
|
||||
roads = roads[roads["highway"].isin(major_types)].copy()
|
||||
if not roads.empty:
|
||||
logger.info(f"Fetched {len(roads)} major roads from {mirror}")
|
||||
return roads
|
||||
except Exception as e:
|
||||
logger.warning(f"Mirror {mirror} failed: {e}")
|
||||
time.sleep(1)
|
||||
|
||||
# Raw Overpass fallback
|
||||
logger.warning("All mirrors failed. Trying raw Overpass query.")
|
||||
try:
|
||||
query = f"""
|
||||
[out:json][timeout:60];
|
||||
(way["highway"~"motorway|trunk|primary"]({min_lat},{min_lon},{max_lat},{max_lon}););
|
||||
out body; >; out skel qt;
|
||||
"""
|
||||
resp = requests.post(OVERPASS_MIRRORS[0], data={"data": query}, timeout=60)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
nodes = {n["id"]: (n["lon"], n["lat"]) for n in data["elements"] if n["type"] == "node"}
|
||||
ways = []
|
||||
for w in data["elements"]:
|
||||
if w["type"] == "way" and "nodes" in w:
|
||||
coords = [nodes[nid] for nid in w["nodes"] if nid in nodes]
|
||||
if len(coords) > 1:
|
||||
ways.append({"geometry": LineString(coords), "highway": w["tags"].get("highway")})
|
||||
if ways:
|
||||
roads = gpd.GeoDataFrame(ways, crs="EPSG:4326")
|
||||
logger.info(f"Raw fallback: {len(roads)} roads")
|
||||
return roads
|
||||
except Exception as e:
|
||||
logger.error(f"Raw fallback failed: {e}")
|
||||
|
||||
return gpd.GeoDataFrame()
|
||||
|
||||
def detect_trucks(self, data, bbox_coords, timestamp, road_mask):
|
||||
"""
|
||||
Detect trucks using corrected Fisser et al. methodology.
|
||||
|
||||
:param data: (H, W, 5) array — [B04, B03, B02, B08, CLM]
|
||||
:param bbox_coords: [min_lat, min_lon, max_lat, max_lon]
|
||||
:param timestamp: str ISO timestamp
|
||||
:param road_mask: (H, W) binary mask of road pixels
|
||||
:return: list of detection dicts
|
||||
"""
|
||||
min_lat, min_lon, max_lat, max_lon = bbox_coords
|
||||
H, W = data.shape[:2]
|
||||
|
||||
# 1. Build feature stack (corrected order)
|
||||
feat = build_feature_stack(data)
|
||||
feature_stack = feat["feature_stack"]
|
||||
|
||||
# 2. Classify (real RF if loaded, proxy fallback otherwise)
|
||||
probs, prediction = classify(feature_stack, road_mask, self.rf_model)
|
||||
|
||||
# 3. Lat/lon arrays for geo-referencing
|
||||
lat_arr = np.linspace(max_lat, min_lat, H) # top to bottom
|
||||
lon_arr = np.linspace(min_lon, max_lon, W) # left to right
|
||||
|
||||
# 4. Object extraction (corrected)
|
||||
extractor = ObjectExtractor(probs, lat_arr, lon_arr)
|
||||
detections = extractor.extract(prediction)
|
||||
|
||||
# 5. Add timestamp and save crops
|
||||
for det in detections:
|
||||
det["timestamp"] = timestamp
|
||||
try:
|
||||
self._save_crop(data, det, H, W, min_lat, min_lon, max_lat, max_lon)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not save crop for {det['id']}: {e}")
|
||||
|
||||
return detections
|
||||
|
||||
def _save_crop(self, data, det, H, W, min_lat, min_lon, max_lat, max_lon):
|
||||
"""Save a 20×20 RGB crop centred on the detection."""
|
||||
cy = int((max_lat - det["lat"]) / (max_lat - min_lat + 1e-9) * H)
|
||||
cx = int((det["lon"] - min_lon) / (max_lon - min_lon + 1e-9) * W)
|
||||
cy, cx = int(np.clip(cy, 0, H - 1)), int(np.clip(cx, 0, W - 1))
|
||||
|
||||
y0, y1 = max(0, cy - 10), min(H, cy + 10)
|
||||
x0, x1 = max(0, cx - 10), min(W, cx + 10)
|
||||
|
||||
rgb = data[y0:y1, x0:x1, :3].astype(np.float32)
|
||||
rgb = rescale_s2(rgb)
|
||||
rgb = (np.clip(rgb, 0, 0.3) / 0.3 * 255).astype(np.uint8)
|
||||
|
||||
path = os.path.join(self.detection_dir, det["id"])
|
||||
imageio.imwrite(path, rgb)
|
||||
@@ -0,0 +1,145 @@
|
||||
"""Disk persistence for road corridor trend runs."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .config import DATA_ROOT, STATE_PATH
|
||||
from .presets import CORRIDOR_PRESETS, get_preset
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _utc_now_iso() -> str:
|
||||
return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
|
||||
|
||||
|
||||
def preset_result_path(preset_id: str) -> Path:
|
||||
return DATA_ROOT / f"{preset_id}.json"
|
||||
|
||||
|
||||
def load_preset_result(preset_id: str) -> dict[str, Any] | None:
|
||||
path = preset_result_path(preset_id)
|
||||
if not path.is_file():
|
||||
return None
|
||||
try:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
logger.warning("Could not read road corridor result %s: %s", path, exc)
|
||||
return None
|
||||
|
||||
|
||||
def save_preset_result(preset_id: str, payload: dict[str, Any]) -> None:
|
||||
DATA_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
path = preset_result_path(preset_id)
|
||||
path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
|
||||
|
||||
|
||||
def load_refresh_state() -> dict[str, str]:
|
||||
if not STATE_PATH.is_file():
|
||||
return {}
|
||||
try:
|
||||
raw = json.loads(STATE_PATH.read_text(encoding="utf-8"))
|
||||
return {str(k): str(v) for k, v in raw.items()}
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
|
||||
def save_refresh_state(state: dict[str, str]) -> None:
|
||||
DATA_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
STATE_PATH.write_text(json.dumps(state, indent=2), encoding="utf-8")
|
||||
|
||||
|
||||
def mark_preset_refreshed(preset_id: str) -> None:
|
||||
state = load_refresh_state()
|
||||
state[preset_id] = _utc_now_iso()
|
||||
save_refresh_state(state)
|
||||
|
||||
|
||||
def list_corridor_summaries() -> list[dict[str, Any]]:
|
||||
summaries: list[dict[str, Any]] = []
|
||||
for preset in CORRIDOR_PRESETS:
|
||||
stored = load_preset_result(preset["id"])
|
||||
if stored:
|
||||
summaries.append(stored)
|
||||
continue
|
||||
summaries.append(
|
||||
{
|
||||
"preset_id": preset["id"],
|
||||
"label": preset["label"],
|
||||
"bbox": preset["bbox"],
|
||||
"country": preset["country"],
|
||||
"category": preset["category"],
|
||||
"status": "never_run",
|
||||
"daily_counts": [],
|
||||
"total_detections": 0,
|
||||
}
|
||||
)
|
||||
return summaries
|
||||
|
||||
|
||||
def build_trends_payload() -> dict[str, Any]:
|
||||
return {
|
||||
"updated_at": _utc_now_iso(),
|
||||
"corridors": list_corridor_summaries(),
|
||||
}
|
||||
|
||||
|
||||
def store_analysis_result(
|
||||
preset_id: str,
|
||||
*,
|
||||
label: str,
|
||||
bbox: list[float],
|
||||
country: str,
|
||||
category: str,
|
||||
road_count: int,
|
||||
frame_count: int,
|
||||
detections: list[dict[str, Any]],
|
||||
status: str = "ok",
|
||||
error: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
daily: dict[str, int] = {}
|
||||
for det in detections:
|
||||
ts = str(det.get("timestamp", ""))[:10]
|
||||
if ts:
|
||||
daily[ts] = daily.get(ts, 0) + 1
|
||||
daily_counts = [{"date": d, "count": daily[d]} for d in sorted(daily.keys())]
|
||||
payload = {
|
||||
"preset_id": preset_id,
|
||||
"label": label,
|
||||
"bbox": bbox,
|
||||
"country": country,
|
||||
"category": category,
|
||||
"updated_at": _utc_now_iso(),
|
||||
"road_count": road_count,
|
||||
"frame_count": frame_count,
|
||||
"total_detections": len(detections),
|
||||
"daily_counts": daily_counts,
|
||||
"status": status,
|
||||
"error": error,
|
||||
}
|
||||
save_preset_result(preset_id, payload)
|
||||
mark_preset_refreshed(preset_id)
|
||||
return payload
|
||||
|
||||
|
||||
def preset_metadata(preset_id: str) -> dict[str, Any] | None:
|
||||
preset = get_preset(preset_id)
|
||||
if preset is None:
|
||||
return None
|
||||
stored = load_preset_result(preset_id)
|
||||
if stored:
|
||||
return stored
|
||||
return {
|
||||
"preset_id": preset["id"],
|
||||
"label": preset["label"],
|
||||
"bbox": preset["bbox"],
|
||||
"country": preset["country"],
|
||||
"category": preset["category"],
|
||||
"status": "never_run",
|
||||
"daily_counts": [],
|
||||
"total_detections": 0,
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
"""Map-viewport helpers for on-demand corridor analysis."""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
|
||||
|
||||
def bbox_around_point(lat: float, lon: float, *, half_span_deg: float = 0.04) -> list[float]:
|
||||
"""Square AOI around a map center (~4–5 km half-span, under the 0.5° engine cap)."""
|
||||
span = min(max(half_span_deg, 0.02), 0.24)
|
||||
return [lat - span, lon - span, lat + span, lon + span]
|
||||
|
||||
|
||||
def adhoc_preset_id(lat: float, lon: float) -> str:
|
||||
digest = hashlib.sha256(f"{lat:.4f},{lon:.4f}".encode()).hexdigest()[:12]
|
||||
return f"adhoc_{digest}"
|
||||
|
||||
|
||||
def default_label_for_point(lat: float, lon: float) -> str:
|
||||
return f"Map center ({lat:.4f}, {lon:.4f})"
|
||||
@@ -0,0 +1 @@
|
||||
"""Sanctions screening (OpenSanctions OFAC SDN)."""
|
||||
@@ -0,0 +1,154 @@
|
||||
"""OFAC SDN index via OpenSanctions (adapted from Osiris sanctions.ts)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import io
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SDN_CSV_URL = "https://data.opensanctions.org/datasets/latest/us_ofac_sdn/targets.simple.csv"
|
||||
TTL_S = 24 * 60 * 60
|
||||
|
||||
_lock = threading.Lock()
|
||||
_cache: dict[str, Any] | None = None
|
||||
_cache_at: float = 0.0
|
||||
_inflight: threading.Event | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SanctionEntry:
|
||||
id: str
|
||||
schema: str
|
||||
name: str
|
||||
aliases: list[str] = field(default_factory=list)
|
||||
countries: list[str] = field(default_factory=list)
|
||||
programs: list[str] = field(default_factory=list)
|
||||
sanctions: str = ""
|
||||
first_seen: str | None = None
|
||||
last_seen: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"id": self.id,
|
||||
"schema": self.schema,
|
||||
"name": self.name,
|
||||
"aliases": self.aliases,
|
||||
"countries": self.countries,
|
||||
"programs": self.programs,
|
||||
"sanctions": self.sanctions,
|
||||
"first_seen": self.first_seen,
|
||||
"last_seen": self.last_seen,
|
||||
}
|
||||
|
||||
|
||||
def norm_name(s: str) -> str:
|
||||
s = re.sub(r"[^\w\s]+", " ", s.lower(), flags=re.UNICODE)
|
||||
return re.sub(r"\s+", " ", s).strip()
|
||||
|
||||
|
||||
def _split_semi(val: str) -> list[str]:
|
||||
return [x.strip() for x in (val or "").split(";") if x.strip()]
|
||||
|
||||
|
||||
def _load_list() -> dict[str, Any]:
|
||||
global _cache, _cache_at
|
||||
with _lock:
|
||||
if _cache and (time.time() - _cache_at) < TTL_S:
|
||||
return _cache
|
||||
|
||||
try:
|
||||
resp = fetch_with_curl(SDN_CSV_URL, timeout=45, headers={"Accept": "text/csv"})
|
||||
if resp.status_code != 200:
|
||||
raise RuntimeError(f"OpenSanctions HTTP {resp.status_code}")
|
||||
text = resp.text
|
||||
reader = csv.DictReader(io.StringIO(text))
|
||||
entries: list[SanctionEntry] = []
|
||||
by_norm: dict[str, list[SanctionEntry]] = {}
|
||||
for row in reader:
|
||||
name = (row.get("name") or "").strip()
|
||||
if not name:
|
||||
continue
|
||||
entry = SanctionEntry(
|
||||
id=row.get("id") or "",
|
||||
schema=row.get("schema") or "LegalEntity",
|
||||
name=name,
|
||||
aliases=_split_semi(row.get("aliases") or ""),
|
||||
countries=_split_semi(row.get("countries") or ""),
|
||||
programs=_split_semi(row.get("program_ids") or ""),
|
||||
sanctions=row.get("sanctions") or "",
|
||||
first_seen=row.get("first_seen") or None,
|
||||
last_seen=row.get("last_seen") or None,
|
||||
)
|
||||
entries.append(entry)
|
||||
for key in {norm_name(name), *(norm_name(a) for a in entry.aliases)}:
|
||||
if not key:
|
||||
continue
|
||||
by_norm.setdefault(key, []).append(entry)
|
||||
loaded = {"entries": entries, "by_norm": by_norm, "fetched_at": time.time()}
|
||||
with _lock:
|
||||
_cache = loaded
|
||||
_cache_at = time.time()
|
||||
logger.info("OFAC SDN index loaded: %s entries", len(entries))
|
||||
return loaded
|
||||
except Exception as exc:
|
||||
logger.error("OFAC SDN load failed: %s", exc)
|
||||
with _lock:
|
||||
if _cache:
|
||||
return _cache
|
||||
raise
|
||||
|
||||
|
||||
def match_exact(query: str) -> list[dict[str, Any]]:
|
||||
if not query or len(query) < 3:
|
||||
return []
|
||||
data = _load_list()
|
||||
hits = data["by_norm"].get(norm_name(query), [])
|
||||
return [e.to_dict() for e in hits]
|
||||
|
||||
|
||||
def search_sanctions(query: str, *, schema: str | None = None, limit: int = 50) -> list[dict[str, Any]]:
|
||||
if not query or len(query) < 4:
|
||||
return []
|
||||
data = _load_list()
|
||||
q = norm_name(query)
|
||||
exact_name: list[SanctionEntry] = []
|
||||
exact_alias: list[SanctionEntry] = []
|
||||
sub_name: list[SanctionEntry] = []
|
||||
sub_alias: list[SanctionEntry] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def push(bucket: list[SanctionEntry], entry: SanctionEntry) -> None:
|
||||
if entry.id in seen:
|
||||
return
|
||||
if schema and entry.schema != schema:
|
||||
return
|
||||
seen.add(entry.id)
|
||||
bucket.append(entry)
|
||||
|
||||
for entry in data["entries"]:
|
||||
name_norm = norm_name(entry.name)
|
||||
if name_norm == q:
|
||||
push(exact_name, entry)
|
||||
elif any(norm_name(a) == q for a in entry.aliases):
|
||||
push(exact_alias, entry)
|
||||
elif q in name_norm:
|
||||
push(sub_name, entry)
|
||||
elif any(q in norm_name(a) for a in entry.aliases):
|
||||
push(sub_alias, entry)
|
||||
if len(seen) >= limit * 4:
|
||||
break
|
||||
|
||||
ordered = exact_name + exact_alias + sub_name + sub_alias
|
||||
return [e.to_dict() for e in ordered[:limit]]
|
||||
|
||||
|
||||
def index_size() -> int:
|
||||
return len(_load_list()["entries"])
|
||||
@@ -0,0 +1 @@
|
||||
"""Supply-chain risk overlay."""
|
||||
@@ -0,0 +1,154 @@
|
||||
"""SCM supplier risk overlay (Osiris port, uses in-memory dashboard data)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from services.fetchers._store import _data_lock, _mark_fresh, get_latest_data_subset_refs, is_any_active, latest_data
|
||||
from services.network_utils import fetch_with_curl
|
||||
|
||||
SUPPLIERS: list[dict[str, Any]] = [
|
||||
{"id": "sup-tsmc-hsinchu", "name": "TSMC Fab 12 (Tier 1)", "city": "Hsinchu", "country": "Taiwan", "lat": 24.774, "lng": 120.992, "category": "Semiconductor"},
|
||||
{"id": "sup-tsmc-tainan", "name": "TSMC Fab 14 (Tier 1)", "city": "Tainan", "country": "Taiwan", "lat": 23.111, "lng": 120.273, "category": "Semiconductor"},
|
||||
{"id": "sup-sec-giheung", "name": "Samsung Electronics (Tier 1)", "city": "Giheung", "country": "South Korea", "lat": 37.221, "lng": 127.098, "category": "Semiconductor"},
|
||||
{"id": "sup-sk-icheon", "name": "SK Hynix (Tier 1)", "city": "Icheon", "country": "South Korea", "lat": 37.256, "lng": 127.483, "category": "Semiconductor"},
|
||||
{"id": "sup-sony-kumamoto", "name": "Sony Semiconductor (Tier 2)", "city": "Kikuyo", "country": "Japan", "lat": 32.883, "lng": 130.825, "category": "Electronics"},
|
||||
{"id": "sup-mlcc-murata", "name": "Murata MLCC (Tier 2)", "city": "Izumo", "country": "Japan", "lat": 35.361, "lng": 132.756, "category": "Electronics"},
|
||||
{"id": "sup-bosch-stuttgart", "name": "Bosch Auto Parts (Tier 1)", "city": "Stuttgart", "country": "Germany", "lat": 48.815, "lng": 9.176, "category": "Automotive"},
|
||||
{"id": "sup-zf-bavaria", "name": "ZF Friedrichshafen (Tier 1)", "city": "Friedrichshafen", "country": "Germany", "lat": 47.662, "lng": 9.489, "category": "Automotive"},
|
||||
{"id": "sup-valeo-paris", "name": "Valeo R&D (Tier 2)", "city": "Paris", "country": "France", "lat": 48.878, "lng": 2.308, "category": "Automotive"},
|
||||
{"id": "sup-magna-celaya", "name": "Magna Assembly (Tier 2)", "city": "Celaya", "country": "Mexico", "lat": 20.525, "lng": -100.814, "category": "Automotive"},
|
||||
{"id": "sup-denso-monterrey", "name": "Denso Corp (Tier 1)", "city": "Monterrey", "country": "Mexico", "lat": 25.772, "lng": -100.174, "category": "Automotive"},
|
||||
{"id": "sup-catl-ningde", "name": "CATL Battery HQ (Tier 1)", "city": "Ningde", "country": "China", "lat": 26.666, "lng": 119.544, "category": "Battery"},
|
||||
{"id": "sup-byd-shenzhen", "name": "BYD Gigafactory (Tier 1)", "city": "Shenzhen", "country": "China", "lat": 22.684, "lng": 114.341, "category": "Battery"},
|
||||
{"id": "sup-panasonic-nevada", "name": "Panasonic Giga (Tier 1)", "city": "Sparks", "country": "US", "lat": 39.539, "lng": -119.439, "category": "Battery"},
|
||||
]
|
||||
|
||||
|
||||
def _distance_km(lat1: float, lng1: float, lat2: float, lng2: float) -> float:
|
||||
dx = (lng1 - lng2) * math.cos(math.radians((lat1 + lat2) / 2))
|
||||
dy = lat1 - lat2
|
||||
return math.sqrt(dx * dx + dy * dy) * 111.32
|
||||
|
||||
|
||||
def _seismic_risk_level(distance_km: float, magnitude: float) -> str | None:
|
||||
"""Meaningful fab impact only — ignore routine micro-quakes (e.g. Taiwan M3.x)."""
|
||||
if magnitude < 4.5:
|
||||
return None
|
||||
if magnitude >= 6.0 and distance_km <= 200:
|
||||
return "CRITICAL"
|
||||
if magnitude >= 5.5 and distance_km <= 75:
|
||||
return "CRITICAL"
|
||||
if magnitude >= 5.0 and distance_km <= 100:
|
||||
return "HIGH"
|
||||
if magnitude >= 4.5 and distance_km <= 40:
|
||||
return "HIGH"
|
||||
return None
|
||||
|
||||
|
||||
def _apply_seismic_threats(suppliers: list[dict[str, Any]], earthquakes: list[dict[str, Any]]) -> None:
|
||||
for sup in suppliers:
|
||||
best: tuple[str, float] | None = None
|
||||
for eq in earthquakes:
|
||||
lat = eq.get("lat")
|
||||
lng = eq.get("lng") or eq.get("lon")
|
||||
mag = float(eq.get("mag") or eq.get("magnitude") or 0)
|
||||
if lat is None or lng is None or mag < 4.5:
|
||||
continue
|
||||
dist = _distance_km(sup["lat"], sup["lng"], float(lat), float(lng))
|
||||
level = _seismic_risk_level(dist, mag)
|
||||
if not level:
|
||||
continue
|
||||
severity = {"HIGH": 1, "CRITICAL": 2}
|
||||
if best is None:
|
||||
best = (level, mag)
|
||||
else:
|
||||
cur = severity[level]
|
||||
prev = severity[best[0]]
|
||||
if cur > prev or (cur == prev and mag > best[1]):
|
||||
best = (level, mag)
|
||||
if best:
|
||||
level, mag = best
|
||||
if sup["risk_level"] == "NORMAL" or (
|
||||
level == "CRITICAL" and sup["risk_level"] != "CRITICAL"
|
||||
):
|
||||
sup["risk_level"] = level
|
||||
elif level == "CRITICAL" and sup["risk_level"] == "HIGH":
|
||||
sup["risk_level"] = "CRITICAL"
|
||||
sup["active_threats"].append(f"SEISMIC PROXIMITY (M{mag:.1f})")
|
||||
|
||||
|
||||
def build_scm_payload() -> dict[str, Any]:
|
||||
suppliers = [{**s, "risk_level": "NORMAL", "active_threats": []} for s in SUPPLIERS]
|
||||
refs = get_latest_data_subset_refs("earthquakes", "firms_fires", "gdelt")
|
||||
|
||||
earthquakes = refs.get("earthquakes") or []
|
||||
_apply_seismic_threats(suppliers, earthquakes)
|
||||
|
||||
fires = refs.get("firms_fires") or []
|
||||
for sup in suppliers:
|
||||
count = 0
|
||||
for fire in fires:
|
||||
lat = fire.get("lat") or fire.get("latitude")
|
||||
lng = fire.get("lng") or fire.get("lon") or fire.get("longitude")
|
||||
if lat is None or lng is None:
|
||||
continue
|
||||
if _distance_km(sup["lat"], sup["lng"], float(lat), float(lng)) < 50:
|
||||
count += 1
|
||||
if count:
|
||||
if sup["risk_level"] == "NORMAL":
|
||||
sup["risk_level"] = "HIGH"
|
||||
sup["active_threats"].append(f"WILDFIRE PROXIMITY ({count} hotspots)")
|
||||
|
||||
conflicts = refs.get("gdelt") or []
|
||||
for sup in suppliers:
|
||||
for event in conflicts:
|
||||
lat = event.get("lat")
|
||||
lng = event.get("lng") or event.get("lon")
|
||||
if lat is None or lng is None:
|
||||
continue
|
||||
if _distance_km(sup["lat"], sup["lng"], float(lat), float(lng)) < 100:
|
||||
sup["risk_level"] = "CRITICAL"
|
||||
sup["active_threats"].append("ARMED CONFLICT / RIOT")
|
||||
break
|
||||
|
||||
# USGS fallback if earthquakes empty
|
||||
if not earthquakes:
|
||||
try:
|
||||
resp = fetch_with_curl(
|
||||
"https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_day.geojson",
|
||||
timeout=5,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
features = resp.json().get("features") or []
|
||||
usgs_quakes = [
|
||||
{
|
||||
"lat": f.get("geometry", {}).get("coordinates", [None, None])[1],
|
||||
"lng": f.get("geometry", {}).get("coordinates", [None, None])[0],
|
||||
"mag": f.get("properties", {}).get("mag") or 0,
|
||||
}
|
||||
for f in features
|
||||
if len(f.get("geometry", {}).get("coordinates") or []) >= 2
|
||||
]
|
||||
_apply_seismic_threats(suppliers, usgs_quakes)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
critical = sum(1 for s in suppliers if s["risk_level"] == "CRITICAL")
|
||||
return {
|
||||
"suppliers": suppliers,
|
||||
"total": len(suppliers),
|
||||
"critical_count": critical,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
|
||||
def fetch_scm_suppliers() -> dict[str, Any]:
|
||||
if not is_any_active("scm_suppliers"):
|
||||
return latest_data.get("scm_suppliers") or {}
|
||||
payload = build_scm_payload()
|
||||
with _data_lock:
|
||||
latest_data["scm_suppliers"] = payload
|
||||
_mark_fresh("scm_suppliers")
|
||||
return payload
|
||||
@@ -26,6 +26,46 @@ def _planetary_user_agent() -> str:
|
||||
return outbound_user_agent("sentinel2-planetary-computer")
|
||||
|
||||
|
||||
def _sign_planetary_href(href: str) -> str:
|
||||
"""Sign a Planetary Computer blob URL with a short-lived SAS token."""
|
||||
if not href or "blob.core.windows.net" not in href:
|
||||
return href
|
||||
try:
|
||||
account = href.split(".blob.core.windows.net")[0].split("//")[-1]
|
||||
token_resp = requests.get(
|
||||
f"https://planetarycomputer.microsoft.com/api/sas/v1/token/{account}",
|
||||
timeout=5,
|
||||
headers={"User-Agent": _planetary_user_agent()},
|
||||
)
|
||||
token_resp.raise_for_status()
|
||||
token = token_resp.json().get("token", "")
|
||||
if not token:
|
||||
return href
|
||||
sep = "&" if "?" in href else "?"
|
||||
return f"{href}{sep}{token}"
|
||||
except (requests.RequestException, ValueError, KeyError):
|
||||
return href
|
||||
|
||||
|
||||
def _scene_from_stac_feature(item: dict) -> dict:
|
||||
assets = item.get("assets", {}) or {}
|
||||
rendered = assets.get("rendered_preview") or {}
|
||||
thumbnail = assets.get("thumbnail") or {}
|
||||
props = item.get("properties", {}) or {}
|
||||
thumb_href = _sign_planetary_href(thumbnail.get("href") or rendered.get("href") or "")
|
||||
full_href = _sign_planetary_href(rendered.get("href") or thumbnail.get("href") or "")
|
||||
return {
|
||||
"found": True,
|
||||
"scene_id": item.get("id"),
|
||||
"datetime": props.get("datetime"),
|
||||
"cloud_cover": props.get("eo:cloud_cover"),
|
||||
"thumbnail_url": thumb_href or None,
|
||||
"fullres_url": full_href or None,
|
||||
"bbox": list(item.get("bbox", [])) if item.get("bbox") else None,
|
||||
"platform": props.get("platform", "Sentinel-2"),
|
||||
}
|
||||
|
||||
|
||||
def _esri_imagery_fallback(lat: float, lng: float) -> dict:
|
||||
lat_span = 0.18
|
||||
lng_span = 0.24
|
||||
@@ -53,14 +93,14 @@ def _esri_imagery_fallback(lat: float, lng: float) -> dict:
|
||||
|
||||
|
||||
def search_sentinel2_scene(lat: float, lng: float) -> dict:
|
||||
"""Search for the latest Sentinel-2 L2A scene covering a point."""
|
||||
"""Search for up to 3 recent Sentinel-2 L2A scenes covering a point."""
|
||||
cache_key = f"{round(lat, 2)}_{round(lng, 2)}"
|
||||
if cache_key in _sentinel_cache:
|
||||
return _sentinel_cache[cache_key]
|
||||
|
||||
try:
|
||||
end = datetime.utcnow()
|
||||
start = end - timedelta(days=30)
|
||||
start = end - timedelta(days=60)
|
||||
search_payload = {
|
||||
"collections": ["sentinel-2-l2a"],
|
||||
"intersects": {"type": "Point", "coordinates": [lng, lat]},
|
||||
@@ -83,26 +123,8 @@ def search_sentinel2_scene(lat: float, lng: float) -> dict:
|
||||
_sentinel_cache[cache_key] = result
|
||||
return result
|
||||
|
||||
item = features[0]
|
||||
assets = item.get("assets", {}) or {}
|
||||
rendered = assets.get("rendered_preview") or {}
|
||||
thumbnail = assets.get("thumbnail") or {}
|
||||
|
||||
# Full-res image URL — what opens when user clicks
|
||||
fullres_url = rendered.get("href") or thumbnail.get("href")
|
||||
# Thumbnail URL — what shows in the popup card
|
||||
thumb_url = thumbnail.get("href") or rendered.get("href")
|
||||
|
||||
result = {
|
||||
"found": True,
|
||||
"scene_id": item.get("id"),
|
||||
"datetime": item.get("properties", {}).get("datetime"),
|
||||
"cloud_cover": item.get("properties", {}).get("eo:cloud_cover"),
|
||||
"thumbnail_url": thumb_url,
|
||||
"fullres_url": fullres_url,
|
||||
"bbox": list(item.get("bbox", [])) if item.get("bbox") else None,
|
||||
"platform": item.get("properties", {}).get("platform", "Sentinel-2"),
|
||||
}
|
||||
scenes = [_scene_from_stac_feature(item) for item in features[:3]]
|
||||
result = {**scenes[0], "scenes": scenes}
|
||||
_sentinel_cache[cache_key] = result
|
||||
return result
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ SLO_REGISTRY: Dict[str, SLO] = {
|
||||
"uap_sightings": SLO(
|
||||
max_age_s=26 * _HOUR,
|
||||
min_rows=50,
|
||||
description="NUFORC rolling 60-day window (daily refresh)",
|
||||
description="NUFORC rolling 60-day window (weekly refresh)",
|
||||
),
|
||||
"wastewater": SLO(
|
||||
max_age_s=30 * _HOUR,
|
||||
|
||||
@@ -0,0 +1,141 @@
|
||||
"""SSRF guard for operator-initiated recon (ported from Osiris ssrf-guard.ts)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import re
|
||||
import socket
|
||||
from typing import Any
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
import requests
|
||||
|
||||
_IPV4_BLOCKS = [
|
||||
ipaddress.ip_network("0.0.0.0/8"),
|
||||
ipaddress.ip_network("10.0.0.0/8"),
|
||||
ipaddress.ip_network("100.64.0.0/10"),
|
||||
ipaddress.ip_network("127.0.0.0/8"),
|
||||
ipaddress.ip_network("169.254.0.0/16"),
|
||||
ipaddress.ip_network("172.16.0.0/12"),
|
||||
ipaddress.ip_network("192.0.0.0/24"),
|
||||
ipaddress.ip_network("192.0.2.0/24"),
|
||||
ipaddress.ip_network("192.168.0.0/16"),
|
||||
ipaddress.ip_network("198.18.0.0/15"),
|
||||
ipaddress.ip_network("198.51.100.0/24"),
|
||||
ipaddress.ip_network("203.0.113.0/24"),
|
||||
ipaddress.ip_network("224.0.0.0/4"),
|
||||
ipaddress.ip_network("240.0.0.0/4"),
|
||||
]
|
||||
|
||||
_NAME_BLOCKLIST = (
|
||||
re.compile(r"^localhost$", re.I),
|
||||
re.compile(r"\.localhost$", re.I),
|
||||
re.compile(r"^host\.docker\.internal$", re.I),
|
||||
re.compile(r"\.local$", re.I),
|
||||
re.compile(r"\.internal$", re.I),
|
||||
re.compile(r"^metadata\.google\.internal$", re.I),
|
||||
)
|
||||
|
||||
_HOSTNAME_RE = re.compile(
|
||||
r"^[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?)*$"
|
||||
)
|
||||
|
||||
|
||||
def _ipv4_blocked(ip: str) -> bool:
|
||||
try:
|
||||
addr = ipaddress.ip_address(ip)
|
||||
except ValueError:
|
||||
return True
|
||||
if not isinstance(addr, ipaddress.IPv4Address):
|
||||
return False
|
||||
return any(addr in net for net in _IPV4_BLOCKS)
|
||||
|
||||
|
||||
def _ip_blocked(ip: str) -> bool:
|
||||
try:
|
||||
addr = ipaddress.ip_address(ip)
|
||||
except ValueError:
|
||||
return True
|
||||
if isinstance(addr, ipaddress.IPv4Address):
|
||||
return _ipv4_blocked(ip)
|
||||
return (
|
||||
addr.is_loopback
|
||||
or addr.is_private
|
||||
or addr.is_link_local
|
||||
or addr.is_multicast
|
||||
or addr.is_reserved
|
||||
or addr.is_unspecified
|
||||
)
|
||||
|
||||
|
||||
def validate_host(host: str) -> dict[str, Any]:
|
||||
trimmed = (host or "").strip()
|
||||
if not trimmed:
|
||||
return {"ok": False, "reason": "empty host"}
|
||||
bracketed = trimmed.strip("[]")
|
||||
lower = trimmed.lower()
|
||||
if any(p.search(lower) for p in _NAME_BLOCKLIST):
|
||||
return {"ok": False, "reason": "hostname matches reserved name pattern"}
|
||||
|
||||
try:
|
||||
ipaddress.ip_address(bracketed)
|
||||
is_literal = True
|
||||
except ValueError:
|
||||
is_literal = False
|
||||
|
||||
if is_literal:
|
||||
if _ip_blocked(bracketed):
|
||||
return {"ok": False, "reason": "IP in reserved range"}
|
||||
return {"ok": True, "resolved": [bracketed]}
|
||||
|
||||
if not _HOSTNAME_RE.match(trimmed):
|
||||
return {"ok": False, "reason": "invalid hostname syntax"}
|
||||
|
||||
try:
|
||||
infos = socket.getaddrinfo(trimmed, None, proto=socket.IPPROTO_TCP)
|
||||
except OSError as exc:
|
||||
return {"ok": False, "reason": f"DNS lookup failed: {exc}"}
|
||||
if not infos:
|
||||
return {"ok": False, "reason": "hostname has no A/AAAA records"}
|
||||
|
||||
resolved: list[str] = []
|
||||
for info in infos:
|
||||
addr = info[4][0]
|
||||
if _ip_blocked(addr):
|
||||
return {"ok": False, "reason": f"hostname resolves to reserved IP {addr}"}
|
||||
resolved.append(addr)
|
||||
return {"ok": True, "resolved": resolved}
|
||||
|
||||
|
||||
def safe_get(
|
||||
url: str,
|
||||
*,
|
||||
timeout: float = 8.0,
|
||||
headers: dict[str, str] | None = None,
|
||||
max_redirects: int = 3,
|
||||
) -> requests.Response:
|
||||
current = url
|
||||
for _ in range(max_redirects + 1):
|
||||
parsed = urlparse(current)
|
||||
if parsed.scheme not in ("http", "https"):
|
||||
raise ValueError(f"blocked protocol {parsed.scheme}")
|
||||
check = validate_host(parsed.hostname or "")
|
||||
if not check.get("ok"):
|
||||
raise ValueError(f"blocked target — {check.get('reason')}")
|
||||
res = requests.get(
|
||||
current,
|
||||
timeout=timeout,
|
||||
headers=headers or {},
|
||||
allow_redirects=False,
|
||||
)
|
||||
if 300 <= res.status_code < 400:
|
||||
loc = res.headers.get("location")
|
||||
if not loc:
|
||||
return res
|
||||
current = urljoin(current, loc)
|
||||
continue
|
||||
return res
|
||||
raise ValueError("too many redirects")
|
||||
|
||||
|
||||
def validate_domain(domain: str) -> bool:
|
||||
return bool(re.match(r"^[a-zA-Z0-9][a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$", domain or ""))
|
||||
+200
-30
@@ -93,6 +93,10 @@ _SLOW_KEYS = (
|
||||
"sar_scenes",
|
||||
"sar_anomalies",
|
||||
"sar_aoi_coverage",
|
||||
"malware_threats",
|
||||
"cyber_threats",
|
||||
"scm_suppliers",
|
||||
"telegram_osint",
|
||||
)
|
||||
|
||||
|
||||
@@ -158,6 +162,20 @@ _ENTITY_LAYER_ALIASES = {
|
||||
"uap_sightings": "uap_sightings",
|
||||
"wastewater": "wastewater",
|
||||
"pins": "pins",
|
||||
"telegram": "telegram_osint",
|
||||
"telegram_osint": "telegram_osint",
|
||||
"osint_feed": "telegram_osint",
|
||||
"malware": "malware_threats",
|
||||
"malware_threats": "malware_threats",
|
||||
"malware_c2": "malware_threats",
|
||||
"botnet": "malware_threats",
|
||||
"cyber": "cyber_threats",
|
||||
"cyber_threats": "cyber_threats",
|
||||
"cisa": "cyber_threats",
|
||||
"kev": "cyber_threats",
|
||||
"scm": "scm_suppliers",
|
||||
"scm_suppliers": "scm_suppliers",
|
||||
"suppliers": "scm_suppliers",
|
||||
}
|
||||
|
||||
_SLICEABLE_LAYERS = tuple(dict.fromkeys(_FAST_KEYS + _SLOW_KEYS))
|
||||
@@ -188,6 +206,21 @@ _LAYER_ALIASES = {
|
||||
"sar_coverage": "sar_aoi_coverage",
|
||||
# Satellite analysis (maneuvers, decay, Starlink)
|
||||
"satellite_analysis": "satellite_analysis",
|
||||
# OSINT / cyber / supply-chain overlays
|
||||
"telegram": "telegram_osint",
|
||||
"telegram_osint": "telegram_osint",
|
||||
"osint_feed": "telegram_osint",
|
||||
"malware": "malware_threats",
|
||||
"malware_threats": "malware_threats",
|
||||
"malware_c2": "malware_threats",
|
||||
"botnet": "malware_threats",
|
||||
"cyber": "cyber_threats",
|
||||
"cyber_threats": "cyber_threats",
|
||||
"cisa": "cyber_threats",
|
||||
"kev": "cyber_threats",
|
||||
"scm": "scm_suppliers",
|
||||
"scm_suppliers": "scm_suppliers",
|
||||
"suppliers": "scm_suppliers",
|
||||
}
|
||||
|
||||
_UNIVERSAL_SEARCH_DEFAULT_LAYERS = (
|
||||
@@ -225,6 +258,10 @@ _UNIVERSAL_SEARCH_DEFAULT_LAYERS = (
|
||||
"tinygs_satellites",
|
||||
"psk_reporter",
|
||||
"ukraine_alerts",
|
||||
"telegram_osint",
|
||||
"malware_threats",
|
||||
"cyber_threats",
|
||||
"scm_suppliers",
|
||||
)
|
||||
|
||||
_GENERIC_QUERY_STOPWORDS = {
|
||||
@@ -269,7 +306,19 @@ _GENERIC_LAYER_HINTS: dict[str, tuple[str, ...]] = {
|
||||
"protest": ("crowdthreat", "gdelt", "news", "frontlines", "liveuamap"),
|
||||
"riot": ("crowdthreat", "gdelt", "news", "frontlines", "liveuamap"),
|
||||
"event": ("crowdthreat", "gdelt", "news", "frontlines", "liveuamap"),
|
||||
"news": ("news", "gdelt", "crowdthreat", "frontlines", "liveuamap"),
|
||||
"news": ("news", "gdelt", "crowdthreat", "frontlines", "liveuamap", "telegram_osint"),
|
||||
"telegram": ("telegram_osint",),
|
||||
"osint": ("telegram_osint", "gdelt", "news", "crowdthreat"),
|
||||
"channel": ("telegram_osint",),
|
||||
"malware": ("malware_threats",),
|
||||
"botnet": ("malware_threats",),
|
||||
"c2": ("malware_threats",),
|
||||
"cve": ("cyber_threats",),
|
||||
"cisa": ("cyber_threats",),
|
||||
"cyber": ("cyber_threats", "malware_threats"),
|
||||
"supplier": ("scm_suppliers",),
|
||||
"scm": ("scm_suppliers",),
|
||||
"semiconductor": ("scm_suppliers",),
|
||||
"plant": ("power_plants", "wastewater"),
|
||||
"datacenter": ("datacenters",),
|
||||
"data": ("datacenters",),
|
||||
@@ -314,6 +363,10 @@ _SEARCH_GROUP_BY_LAYER = {
|
||||
"kiwisdr": "signals",
|
||||
"psk_reporter": "signals",
|
||||
"ukraine_alerts": "events",
|
||||
"telegram_osint": "events",
|
||||
"malware_threats": "cyber",
|
||||
"cyber_threats": "cyber",
|
||||
"scm_suppliers": "infrastructure",
|
||||
}
|
||||
|
||||
_SEARCH_QUERY_SYNONYMS: dict[str, tuple[str, ...]] = {
|
||||
@@ -328,6 +381,9 @@ _SEARCH_QUERY_SYNONYMS: dict[str, tuple[str, ...]] = {
|
||||
"plants": ("plant",),
|
||||
"cameras": ("camera",),
|
||||
"radios": ("radio",),
|
||||
"telegrams": ("telegram",),
|
||||
"channels": ("channel",),
|
||||
"suppliers": ("supplier",),
|
||||
}
|
||||
|
||||
_SEARCH_INDEX_LOCK = threading.Lock()
|
||||
@@ -653,6 +709,42 @@ _UNIVERSAL_SEARCH_SPECS: dict[str, dict[str, Any]] = {
|
||||
"id_fields": ("id",),
|
||||
"time_fields": ("updated_at", "timestamp"),
|
||||
},
|
||||
"telegram_osint": {
|
||||
"fields": ("title", "description", "source", "channel", "link"),
|
||||
"primary_fields": ("title", "description", "channel"),
|
||||
"label_fields": ("title", "channel"),
|
||||
"summary_fields": ("description", "source"),
|
||||
"type_fields": ("channel", "source"),
|
||||
"id_fields": ("id", "link"),
|
||||
"time_fields": ("published", "timestamp"),
|
||||
},
|
||||
"malware_threats": {
|
||||
"fields": ("ip", "malware", "status", "country", "threat_type"),
|
||||
"primary_fields": ("ip", "malware", "country"),
|
||||
"label_fields": ("ip", "malware"),
|
||||
"summary_fields": ("status", "country", "threat_type"),
|
||||
"type_fields": ("threat_type", "malware"),
|
||||
"id_fields": ("id", "ip"),
|
||||
"time_fields": ("first_seen", "last_online", "timestamp"),
|
||||
},
|
||||
"cyber_threats": {
|
||||
"fields": ("id", "name", "vendor", "product", "severity", "source"),
|
||||
"primary_fields": ("id", "name", "vendor", "product"),
|
||||
"label_fields": ("id", "name"),
|
||||
"summary_fields": ("vendor", "product", "severity", "source"),
|
||||
"type_fields": ("severity", "source"),
|
||||
"id_fields": ("id",),
|
||||
"time_fields": ("date", "due", "timestamp"),
|
||||
},
|
||||
"scm_suppliers": {
|
||||
"fields": ("name", "city", "country", "category", "risk_level"),
|
||||
"primary_fields": ("name", "city", "country", "category"),
|
||||
"label_fields": ("name", "city"),
|
||||
"summary_fields": ("country", "category", "risk_level"),
|
||||
"type_fields": ("category", "risk_level"),
|
||||
"id_fields": ("id",),
|
||||
"time_fields": ("timestamp",),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -734,6 +826,11 @@ def _extract_coords(candidate: dict[str, Any]) -> tuple[float | None, float | No
|
||||
if isinstance(coords, (list, tuple)) and len(coords) >= 2:
|
||||
lng = lng if lng is not None else _coerce_float(coords[0])
|
||||
lat = lat if lat is not None else _coerce_float(coords[1])
|
||||
if lat is None or lng is None:
|
||||
coords = candidate.get("coords")
|
||||
if isinstance(coords, (list, tuple)) and len(coords) >= 2:
|
||||
lat = lat if lat is not None else _coerce_float(coords[0])
|
||||
lng = lng if lng is not None else _coerce_float(coords[1])
|
||||
return lat, lng
|
||||
|
||||
|
||||
@@ -832,6 +929,53 @@ def _layer_group(layer: str) -> str:
|
||||
return _SEARCH_GROUP_BY_LAYER.get(layer, "other")
|
||||
|
||||
|
||||
_LAYER_NESTED_LIST_KEYS: dict[str, tuple[str, ...]] = {
|
||||
"telegram_osint": ("posts",),
|
||||
"malware_threats": ("threats",),
|
||||
"cyber_threats": ("threats",),
|
||||
"scm_suppliers": ("suppliers",),
|
||||
}
|
||||
_DEFAULT_NESTED_LIST_KEYS = (
|
||||
"items",
|
||||
"results",
|
||||
"vessels",
|
||||
"features",
|
||||
"posts",
|
||||
"threats",
|
||||
"suppliers",
|
||||
)
|
||||
|
||||
|
||||
def _unwrap_layer_items(value: Any, layer: str = "") -> list[Any]:
|
||||
"""Return the searchable/geospatial item list inside a layer value."""
|
||||
if isinstance(value, list):
|
||||
return value
|
||||
if not isinstance(value, dict):
|
||||
return []
|
||||
keys = _LAYER_NESTED_LIST_KEYS.get(layer, _DEFAULT_NESTED_LIST_KEYS)
|
||||
for key in keys:
|
||||
nested = value.get(key)
|
||||
if isinstance(nested, list):
|
||||
return nested
|
||||
return []
|
||||
|
||||
|
||||
def _layer_record_count(value: Any, layer: str = "") -> int:
|
||||
if isinstance(value, list):
|
||||
return len(value)
|
||||
if isinstance(value, dict):
|
||||
total = value.get("total")
|
||||
if isinstance(total, (int, float)):
|
||||
return int(total)
|
||||
items = _unwrap_layer_items(value, layer)
|
||||
if items:
|
||||
return len(items)
|
||||
return len(value) if value else 0
|
||||
if value is None:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
|
||||
def _build_search_document(doc_id: int, layer: str, candidate: dict[str, Any], spec: dict[str, Any]) -> dict[str, Any]:
|
||||
fields = tuple(spec.get("fields", ()))
|
||||
text = _document_text(candidate, fields)
|
||||
@@ -880,9 +1024,7 @@ def _get_search_index() -> dict[str, Any]:
|
||||
|
||||
for layer in layers:
|
||||
spec = _UNIVERSAL_SEARCH_SPECS[layer]
|
||||
items = snap.get(layer) or []
|
||||
if isinstance(items, dict):
|
||||
items = items.get("items", []) or items.get("results", []) or items.get("vessels", [])
|
||||
items = _unwrap_layer_items(snap.get(layer), layer)
|
||||
if not isinstance(items, list):
|
||||
continue
|
||||
for item in items:
|
||||
@@ -1144,18 +1286,9 @@ def get_telemetry_summary() -> dict[str, Any]:
|
||||
|
||||
for layer in layer_names:
|
||||
value = snap.get(layer)
|
||||
if isinstance(value, list):
|
||||
counts[layer] = len(value)
|
||||
if value:
|
||||
non_empty_layers.append(layer)
|
||||
elif isinstance(value, dict):
|
||||
counts[layer] = len(value)
|
||||
if value:
|
||||
non_empty_layers.append(layer)
|
||||
elif value is None:
|
||||
counts[layer] = 0
|
||||
else:
|
||||
counts[layer] = 1
|
||||
count = _layer_record_count(value, layer)
|
||||
counts[layer] = count
|
||||
if count > 0:
|
||||
non_empty_layers.append(layer)
|
||||
|
||||
alias_examples = {
|
||||
@@ -1167,6 +1300,16 @@ def get_telemetry_summary() -> dict[str, Any]:
|
||||
"tracked": "tracked_flights",
|
||||
"military": "military_flights",
|
||||
"jets": "private_jets",
|
||||
"telegram": "telegram_osint",
|
||||
"osint_feed": "telegram_osint",
|
||||
"malware": "malware_threats",
|
||||
"malware_c2": "malware_threats",
|
||||
"botnet": "malware_threats",
|
||||
"cyber": "cyber_threats",
|
||||
"cisa": "cyber_threats",
|
||||
"kev": "cyber_threats",
|
||||
"scm": "scm_suppliers",
|
||||
"suppliers": "scm_suppliers",
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -1577,14 +1720,7 @@ def _nearby_items_from_layers(
|
||||
snap = get_latest_data_subset_refs(*layers)
|
||||
out: dict[str, list[dict[str, Any]]] = {}
|
||||
for layer in layers:
|
||||
value = snap.get(layer) or []
|
||||
if isinstance(value, dict):
|
||||
if layer == "gdelt" and isinstance(value.get("features"), list):
|
||||
items = value.get("features") or []
|
||||
else:
|
||||
items = value.get("items") or value.get("features") or value.get("vessels") or []
|
||||
else:
|
||||
items = value
|
||||
items = _unwrap_layer_items(snap.get(layer), layer)
|
||||
if not isinstance(items, list):
|
||||
continue
|
||||
matches: list[dict[str, Any]] = []
|
||||
@@ -1728,6 +1864,9 @@ def correlate_entity(
|
||||
"crowdthreat",
|
||||
"frontlines",
|
||||
"liveuamap",
|
||||
"telegram_osint",
|
||||
"malware_threats",
|
||||
"scm_suppliers",
|
||||
"military_bases",
|
||||
"datacenters",
|
||||
"power_plants",
|
||||
@@ -1809,13 +1948,17 @@ def search_news(
|
||||
query: str,
|
||||
limit: int = 10,
|
||||
include_gdelt: bool = True,
|
||||
include_telegram: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
"""Search news and event layers server-side and return a compact result set."""
|
||||
query_norm = _norm_text(query)
|
||||
if not query_norm:
|
||||
return {"results": [], "version": get_data_version(), "truncated": False}
|
||||
|
||||
snap = get_latest_data_subset_refs("news", "gdelt", "crowdthreat", "liveuamap", "frontlines")
|
||||
layer_keys = ["news", "gdelt", "crowdthreat", "liveuamap", "frontlines"]
|
||||
if include_telegram:
|
||||
layer_keys.append("telegram_osint")
|
||||
snap = get_latest_data_subset_refs(*layer_keys)
|
||||
out: list[dict[str, Any]] = []
|
||||
limit = _coerce_limit(limit, default=10, maximum=50)
|
||||
|
||||
@@ -1941,6 +2084,36 @@ def search_news(
|
||||
if len(out) >= limit:
|
||||
return {"results": out, "version": get_data_version(), "truncated": True}
|
||||
|
||||
if include_telegram:
|
||||
for post in _unwrap_layer_items(snap.get("telegram_osint"), "telegram_osint"):
|
||||
if not isinstance(post, dict):
|
||||
continue
|
||||
text = " ".join(
|
||||
(
|
||||
_norm_text(post.get("title")),
|
||||
_norm_text(post.get("description")),
|
||||
_norm_text(post.get("source")),
|
||||
_norm_text(post.get("channel")),
|
||||
)
|
||||
)
|
||||
if not _text_matches_query(query_norm, text):
|
||||
continue
|
||||
lat, lng = _extract_coords(post)
|
||||
out.append(
|
||||
{
|
||||
"source_layer": "telegram_osint",
|
||||
"title": post.get("title") or "",
|
||||
"summary": post.get("description") or "",
|
||||
"source": post.get("source") or post.get("channel") or "Telegram",
|
||||
"link": post.get("link") or "",
|
||||
"lat": lat,
|
||||
"lng": lng,
|
||||
"risk_score": post.get("risk_score"),
|
||||
}
|
||||
)
|
||||
if len(out) >= limit:
|
||||
return {"results": out, "version": get_data_version(), "truncated": True}
|
||||
|
||||
return {"results": out, "version": get_data_version(), "truncated": False}
|
||||
|
||||
|
||||
@@ -2205,16 +2378,13 @@ def entities_near(
|
||||
out: list[dict[str, Any]] = []
|
||||
|
||||
for layer in layers:
|
||||
items = snap.get(layer) or []
|
||||
if isinstance(items, dict):
|
||||
items = items.get("vessels", []) or items.get("items", [])
|
||||
items = _unwrap_layer_items(snap.get(layer), layer)
|
||||
if not isinstance(items, list):
|
||||
continue
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
item_lat = _coerce_float(item.get("lat") or item.get("latitude"))
|
||||
item_lng = _coerce_float(item.get("lng") or item.get("lon") or item.get("longitude"))
|
||||
item_lat, item_lng = _extract_coords(item)
|
||||
if item_lat is None or item_lng is None:
|
||||
continue
|
||||
distance = _haversine_km(center_lat, center_lng, item_lat, item_lng)
|
||||
|
||||
@@ -30,12 +30,6 @@ def client(_suppress_background_services):
|
||||
from main import app
|
||||
import asyncio
|
||||
|
||||
transport = ASGITransport(app=app)
|
||||
|
||||
async def _make_client():
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as ac:
|
||||
return ac
|
||||
|
||||
# Return a sync-usable wrapper
|
||||
class SyncClient:
|
||||
def __init__(self):
|
||||
|
||||
@@ -38,18 +38,6 @@
|
||||
"main",
|
||||
"routers.health"
|
||||
],
|
||||
"GET /api/live-data": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/live-data/fast": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/live-data/slow": [
|
||||
"main",
|
||||
"routers.data"
|
||||
],
|
||||
"GET /api/mesh/channels": [
|
||||
"main",
|
||||
"routers.mesh_public"
|
||||
|
||||
@@ -22,9 +22,11 @@ class TestHealthEndpoint:
|
||||
|
||||
|
||||
class TestLiveDataEndpoints:
|
||||
def test_live_data_returns_200(self, client):
|
||||
def test_live_data_returns_200_or_304(self, client):
|
||||
r = client.get("/api/live-data")
|
||||
assert r.status_code == 200
|
||||
assert r.status_code in (200, 304)
|
||||
if r.status_code == 200:
|
||||
assert r.headers.get("etag")
|
||||
|
||||
def test_live_data_fast_returns_200_or_304(self, client):
|
||||
r = client.get("/api/live-data/fast")
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
from starlette.requests import Request
|
||||
|
||||
import auth
|
||||
|
||||
|
||||
async def _empty_receive():
|
||||
return {"type": "http.request", "body": b"", "more_body": False}
|
||||
|
||||
|
||||
def _request(path: str, *, host: str = "example.com/health?x=", client_host: str = "203.0.113.10") -> Request:
|
||||
return Request(
|
||||
{
|
||||
"type": "http",
|
||||
"method": "GET",
|
||||
"scheme": "http",
|
||||
"server": ("127.0.0.1", 8000),
|
||||
"client": (client_host, 12345),
|
||||
"path": path,
|
||||
"raw_path": path.encode("ascii"),
|
||||
"query_string": b"",
|
||||
"headers": [(b"host", host.encode("ascii"))],
|
||||
},
|
||||
receive=_empty_receive,
|
||||
)
|
||||
|
||||
|
||||
def test_scope_auth_uses_asgi_path_not_host_derived_url_path():
|
||||
request = _request("/api/mesh/gate/alpha/message")
|
||||
|
||||
assert auth._request_scope_path(request) == "/api/mesh/gate/alpha/message"
|
||||
assert auth._required_scope_for_request(request) == "mesh"
|
||||
|
||||
|
||||
def test_debug_test_request_does_not_trust_host_header(monkeypatch):
|
||||
monkeypatch.setattr(auth, "_debug_mode_enabled", lambda: True)
|
||||
|
||||
request = _request("/api/admin", host="test/api/public?x=")
|
||||
|
||||
assert auth._is_debug_test_request(request) is False
|
||||
|
||||
|
||||
def test_peer_hmac_identity_requires_explicit_peer_url_header():
|
||||
request = _request("/api/mesh/infonet/push", host="https://peer.example/api/public?x=")
|
||||
|
||||
assert auth._peer_hmac_url_from_request(request) == ""
|
||||
|
||||
request = _request("/api/mesh/infonet/push")
|
||||
request.scope["headers"].append((b"x-peer-url", b"https://peer.example/"))
|
||||
|
||||
assert auth._peer_hmac_url_from_request(request) == "https://peer.example"
|
||||
@@ -77,3 +77,62 @@ def test_ingest_updates_existing_rows_in_persistent_data_dir(tmp_path, monkeypat
|
||||
assert len(cameras) == 1
|
||||
assert cameras[0]["media_url"] == "https://example.com/live.m3u8"
|
||||
assert cameras[0]["media_type"] == "hls"
|
||||
|
||||
|
||||
def test_scheduled_cctv_ingestors_include_asfinag_and_alpr():
|
||||
names = {ing.__class__.__name__ for ing, _ in cctv_pipeline.scheduled_cctv_ingestors()}
|
||||
assert "AsfinagIngestor" in names
|
||||
assert "OSMALPRCameraIngestor" in names
|
||||
assert "OSMTrafficCameraIngestor" in names
|
||||
assert "Ontario511Ingestor" in names
|
||||
assert "Alberta511Ingestor" in names
|
||||
assert "Florida511Ingestor" in names
|
||||
assert "AustraliaLiveTrafficIngestor" in names
|
||||
assert "NetherlandsRWSIngestor" in names
|
||||
assert len(names) == 21
|
||||
|
||||
|
||||
def test_fetch_traveliq_v2_cameras_parses_views(monkeypatch):
|
||||
class FakeResp:
|
||||
status_code = 200
|
||||
|
||||
@staticmethod
|
||||
def json():
|
||||
return [
|
||||
{
|
||||
"Id": 9,
|
||||
"Latitude": 45.0,
|
||||
"Longitude": -75.0,
|
||||
"Location": "Test Highway",
|
||||
"Views": [
|
||||
{
|
||||
"Id": 42,
|
||||
"Url": "/map/Cctv/42",
|
||||
"Status": "Enabled",
|
||||
"Description": "Northbound",
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
monkeypatch.setattr(cctv_pipeline, "fetch_with_curl", lambda *a, **k: FakeResp())
|
||||
cameras = cctv_pipeline._fetch_traveliq_v2_cameras(
|
||||
api_url="https://511on.ca/api/v2/get/cameras",
|
||||
base_url="https://511on.ca",
|
||||
id_prefix="ON511",
|
||||
source_agency="511 Ontario",
|
||||
)
|
||||
assert len(cameras) == 1
|
||||
assert cameras[0]["id"] == "ON511-9-42"
|
||||
assert cameras[0]["media_url"] == "https://511on.ca/map/Cctv/42"
|
||||
|
||||
|
||||
def test_ensure_https_upgrades_http_media_urls():
|
||||
assert (
|
||||
cctv_pipeline._ensure_https_url("http://example.com/camera.jpg")
|
||||
== "https://example.com/camera.jpg"
|
||||
)
|
||||
assert (
|
||||
cctv_pipeline._ensure_https_url("https://secure.example.com/live.m3u8")
|
||||
== "https://secure.example.com/live.m3u8"
|
||||
)
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
"""Datacenters load from static JSON regardless of layer toggle."""
|
||||
from services.fetchers import _store
|
||||
from services.fetchers.infrastructure import fetch_datacenters
|
||||
|
||||
|
||||
def test_fetch_datacenters_populates_store_when_layer_disabled(monkeypatch):
|
||||
monkeypatch.setitem(_store.active_layers, "datacenters", False)
|
||||
_store.latest_data["datacenters"] = []
|
||||
fetch_datacenters()
|
||||
assert len(_store.latest_data.get("datacenters") or []) > 0
|
||||
@@ -0,0 +1,46 @@
|
||||
"""DeepState GitHub mirror pinning (#362)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import services.geopolitics as gp
|
||||
|
||||
|
||||
def test_deepstate_mirror_ref_defaults(monkeypatch):
|
||||
monkeypatch.delenv("DEEPSTATE_MIRROR_COMMIT", raising=False)
|
||||
monkeypatch.delenv("DEEPSTATE_MIRROR_REPO", raising=False)
|
||||
repo, ref = gp._deepstate_mirror_ref()
|
||||
assert repo == "cyterat/deepstate-map-data"
|
||||
assert ref == "main"
|
||||
|
||||
|
||||
def test_deepstate_mirror_ref_pinned_commit(monkeypatch):
|
||||
monkeypatch.setenv("DEEPSTATE_MIRROR_COMMIT", "abc123def456")
|
||||
monkeypatch.setenv("DEEPSTATE_MIRROR_REPO", "cyterat/deepstate-map-data")
|
||||
repo, ref = gp._deepstate_mirror_ref()
|
||||
assert repo == "cyterat/deepstate-map-data"
|
||||
assert ref == "abc123def456"
|
||||
|
||||
|
||||
def test_fetch_ukraine_frontlines_uses_pinned_tree_url(monkeypatch):
|
||||
monkeypatch.setenv("DEEPSTATE_MIRROR_COMMIT", "deadbeef")
|
||||
gp.frontline_cache.clear()
|
||||
|
||||
tree_resp = MagicMock(status_code=200)
|
||||
tree_resp.json.return_value = {
|
||||
"tree": [{"path": "data/deepstatemap_data_20260101.geojson"}]
|
||||
}
|
||||
geo_resp = MagicMock(status_code=200)
|
||||
geo_resp.json.return_value = {"features": []}
|
||||
|
||||
with patch("services.geopolitics.requests.get", side_effect=[tree_resp, geo_resp]) as get:
|
||||
result = gp.fetch_ukraine_frontlines()
|
||||
|
||||
assert result == {"features": []}
|
||||
tree_call = get.call_args_list[0][0][0]
|
||||
raw_call = get.call_args_list[1][0][0]
|
||||
assert "/git/trees/deadbeef" in tree_call
|
||||
assert "raw.githubusercontent.com/cyterat/deepstate-map-data/deadbeef/" in raw_call
|
||||
|
||||
gp.frontline_cache.clear()
|
||||
@@ -113,3 +113,52 @@ def test_fetch_fishing_activity_dedupes_to_latest_event_per_vessel(monkeypatch):
|
||||
assert latest_data["fishing_activity"][0]["vessel_ssvid"] == "ssvid-1"
|
||||
finally:
|
||||
latest_data["fishing_activity"] = original
|
||||
|
||||
|
||||
def test_fetch_fishing_activity_respects_max_pages(monkeypatch):
|
||||
from services.fetchers import geo
|
||||
from services.fetchers._store import latest_data
|
||||
|
||||
original = list(latest_data.get("fishing_activity") or [])
|
||||
requests: list[str] = []
|
||||
|
||||
def fake_fetch(url, timeout=30, headers=None):
|
||||
requests.append(url)
|
||||
offset = 0
|
||||
if "offset=500" in url:
|
||||
offset = 500
|
||||
payload = {
|
||||
"total": 5000,
|
||||
"entries": [
|
||||
{
|
||||
"id": f"evt-{offset + i}",
|
||||
"position": {"lat": 10.0 + i, "lon": 20.0 + i},
|
||||
"event": {"duration": 3600},
|
||||
"vessel": {
|
||||
"id": f"v-{offset + i}",
|
||||
"ssvid": f"ssvid-{offset + i}",
|
||||
"name": f"Vessel-{offset + i}",
|
||||
"flag": "US",
|
||||
},
|
||||
}
|
||||
for i in range(500)
|
||||
],
|
||||
"nextOffset": offset + 500,
|
||||
}
|
||||
return SimpleNamespace(status_code=200, json=lambda p=payload: p)
|
||||
|
||||
monkeypatch.setenv("GFW_API_TOKEN", "test-token")
|
||||
monkeypatch.setenv("GFW_EVENTS_PAGE_SIZE", "500")
|
||||
monkeypatch.setenv("GFW_EVENTS_MAX_PAGES", "2")
|
||||
monkeypatch.setattr("services.fetchers._store.is_any_active", lambda *args: True)
|
||||
monkeypatch.setattr(geo, "fetch_with_curl", fake_fetch)
|
||||
monkeypatch.setattr(geo, "_mark_fresh", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(geo, "_last_fishing_fetch_ts", 0.0)
|
||||
|
||||
try:
|
||||
geo.fetch_fishing_activity()
|
||||
assert len(latest_data["fishing_activity"]) == 1000
|
||||
assert len(requests) == 2
|
||||
assert all("offset=0" in url or "offset=500" in url for url in requests)
|
||||
finally:
|
||||
latest_data["fishing_activity"] = original
|
||||
|
||||
@@ -0,0 +1,150 @@
|
||||
"""Regression tests for GitHub #375 production-readiness fixes."""
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestDevBindHost:
|
||||
def test_defaults_to_loopback(self, monkeypatch):
|
||||
monkeypatch.delenv("SHADOWBROKER_DEV_BIND_ALL", raising=False)
|
||||
from main import _dev_uvicorn_bind_host
|
||||
|
||||
assert _dev_uvicorn_bind_host() == "127.0.0.1"
|
||||
|
||||
@pytest.mark.parametrize("value", ("1", "true", "yes", "on", "TRUE"))
|
||||
def test_bind_all_opt_in(self, monkeypatch, value):
|
||||
monkeypatch.setenv("SHADOWBROKER_DEV_BIND_ALL", value)
|
||||
from main import _dev_uvicorn_bind_host
|
||||
|
||||
assert _dev_uvicorn_bind_host() == "0.0.0.0"
|
||||
|
||||
|
||||
class TestDataStoreSnapshots:
|
||||
def test_deepcopy_snapshot_isolated_from_store(self):
|
||||
from services.fetchers import _store
|
||||
|
||||
original = [{"title": "baseline"}]
|
||||
with _store._data_lock:
|
||||
_store.latest_data["news"] = list(original)
|
||||
snap = _store.get_latest_data_deepcopy_snapshot()
|
||||
snap["news"][0]["title"] = "mutated"
|
||||
with _store._data_lock:
|
||||
assert _store.latest_data["news"][0]["title"] == "baseline"
|
||||
|
||||
def test_subset_deepcopy_isolated(self):
|
||||
from services.fetchers import _store
|
||||
|
||||
with _store._data_lock:
|
||||
_store.latest_data["news"] = [{"title": "subset"}]
|
||||
snap = _store.get_latest_data_subset("news")
|
||||
snap["news"][0]["title"] = "changed"
|
||||
with _store._data_lock:
|
||||
assert _store.latest_data["news"][0]["title"] == "subset"
|
||||
|
||||
|
||||
class TestHeavyFetchExecutorRouting:
|
||||
def test_slow_tier_uses_slow_executor(self):
|
||||
from services.data_fetcher import (
|
||||
_SLOW_EXECUTOR,
|
||||
_SHARED_EXECUTOR,
|
||||
_executor_for_task_label,
|
||||
)
|
||||
|
||||
assert _executor_for_task_label("slow-tier-refresh") is _SLOW_EXECUTOR
|
||||
assert _executor_for_task_label("startup-heavy-warm") is _SLOW_EXECUTOR
|
||||
assert _executor_for_task_label("fast-tier-refresh") is _SHARED_EXECUTOR
|
||||
|
||||
|
||||
class TestLiveDataFullEndpoint:
|
||||
def test_live_data_supports_etag_304(self, client):
|
||||
r1 = client.get("/api/live-data")
|
||||
assert r1.status_code == 200
|
||||
etag = r1.headers.get("etag")
|
||||
assert etag
|
||||
r2 = client.get("/api/live-data", headers={"If-None-Match": etag})
|
||||
assert r2.status_code == 304
|
||||
assert r2.headers.get("etag") == etag
|
||||
|
||||
def test_live_data_fast_serializes_non_json_native_values(self, client):
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from services.fetchers import _store
|
||||
|
||||
with _store._data_lock:
|
||||
prior = _store.latest_data.get("sigint")
|
||||
_store.latest_data["sigint"] = [
|
||||
{"source": "aprs", "observed": datetime(2026, 1, 1, tzinfo=timezone.utc)},
|
||||
]
|
||||
try:
|
||||
r = client.get("/api/live-data/fast")
|
||||
assert r.status_code == 200
|
||||
assert "2026-01-01" in r.text
|
||||
finally:
|
||||
with _store._data_lock:
|
||||
_store.latest_data["sigint"] = prior
|
||||
|
||||
def test_live_data_serializes_non_json_native_values(self, client):
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from services.fetchers import _store
|
||||
|
||||
with _store._data_lock:
|
||||
prior = _store.latest_data.get("gdelt")
|
||||
_store.latest_data["gdelt"] = [
|
||||
{"observed": datetime(2026, 1, 1, tzinfo=timezone.utc)},
|
||||
]
|
||||
try:
|
||||
r = client.get("/api/live-data")
|
||||
assert r.status_code == 200
|
||||
assert "2026-01-01" in r.text
|
||||
finally:
|
||||
with _store._data_lock:
|
||||
_store.latest_data["gdelt"] = prior
|
||||
|
||||
|
||||
class TestSlowTaskConcurrency:
|
||||
def test_run_tasks_caps_batch_size_to_executor_workers(self, monkeypatch):
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import services.data_fetcher as df
|
||||
|
||||
class _FakeExecutor:
|
||||
_max_workers = 2
|
||||
|
||||
def submit(self, func):
|
||||
return MagicMock()
|
||||
|
||||
mock_executor = _FakeExecutor()
|
||||
monkeypatch.setattr(df, "_executor_for_task_label", lambda _label: mock_executor)
|
||||
monkeypatch.setattr(df, "_SLOW_FETCH_CONCURRENCY", 8)
|
||||
|
||||
batch_sizes = []
|
||||
|
||||
def _capture_drain(_label, futures):
|
||||
batch_sizes.append(len(futures))
|
||||
|
||||
monkeypatch.setattr(df, "_drain_task_futures", _capture_drain)
|
||||
|
||||
jobs = [lambda: None for _ in range(5)]
|
||||
df._run_tasks("slow-tier-test", jobs)
|
||||
|
||||
assert batch_sizes == [2, 2, 1]
|
||||
|
||||
|
||||
class TestFetcherRetryScope:
|
||||
def test_http_error_is_not_retried(self, monkeypatch):
|
||||
import requests
|
||||
|
||||
from services.fetchers.retry import with_retry
|
||||
|
||||
attempts = {"n": 0}
|
||||
|
||||
@with_retry(max_retries=2, base_delay=0.01)
|
||||
def _raises_http():
|
||||
attempts["n"] += 1
|
||||
raise requests.HTTPError("403 Client Error")
|
||||
|
||||
with pytest.raises(requests.HTTPError):
|
||||
_raises_http()
|
||||
assert attempts["n"] == 1
|
||||
@@ -0,0 +1,29 @@
|
||||
"""KiwiSDR mirror prefers HTTPS (#364)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from services.kiwisdr_fetcher import (
|
||||
_SOURCE_URL_HTTP,
|
||||
_SOURCE_URL_HTTPS,
|
||||
_fetch_mirror_payload_text,
|
||||
)
|
||||
|
||||
|
||||
def test_fetch_mirror_tries_https_before_http():
|
||||
calls: list[str] = []
|
||||
|
||||
def fake_fetch(url, **kwargs):
|
||||
calls.append(url)
|
||||
if url == _SOURCE_URL_HTTPS:
|
||||
raise ConnectionError("tls not available")
|
||||
res = MagicMock()
|
||||
res.status_code = 200
|
||||
res.text = "var kiwisdr_com = [];"
|
||||
return res
|
||||
|
||||
with patch("services.network_utils.fetch_with_curl", side_effect=fake_fetch):
|
||||
body = _fetch_mirror_payload_text()
|
||||
|
||||
assert body == "var kiwisdr_com = [];"
|
||||
assert calls == [_SOURCE_URL_HTTPS, _SOURCE_URL_HTTP]
|
||||
@@ -0,0 +1,45 @@
|
||||
"""LiveUAMap scraper UI opt-in on Windows (#348)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from services import liveuamap_settings as settings
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def opt_in_file(tmp_path, monkeypatch):
|
||||
path = tmp_path / "liveuamap_scraper_opt_in.json"
|
||||
monkeypatch.setattr(settings, "_OPT_IN_FILE", path)
|
||||
return path
|
||||
|
||||
|
||||
def test_windows_defaults_off_without_opt_in(monkeypatch, opt_in_file):
|
||||
monkeypatch.setattr(settings.os, "name", "nt")
|
||||
monkeypatch.delenv("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER", raising=False)
|
||||
assert settings.liveuamap_scraper_enabled() is False
|
||||
assert settings.liveuamap_requires_ui_opt_in() is True
|
||||
|
||||
|
||||
def test_windows_opt_in_enables_scraper(monkeypatch, opt_in_file):
|
||||
monkeypatch.setattr(settings.os, "name", "nt")
|
||||
monkeypatch.delenv("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER", raising=False)
|
||||
settings.set_liveuamap_ui_opt_in(True)
|
||||
assert settings.liveuamap_scraper_enabled() is True
|
||||
assert json.loads(opt_in_file.read_text())["opted_in"] is True
|
||||
|
||||
|
||||
def test_linux_enabled_without_opt_in(monkeypatch, opt_in_file):
|
||||
monkeypatch.setattr(settings.os, "name", "posix")
|
||||
monkeypatch.delenv("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER", raising=False)
|
||||
assert settings.liveuamap_requires_ui_opt_in() is False
|
||||
assert settings.liveuamap_scraper_enabled() is True
|
||||
|
||||
|
||||
def test_env_force_off_overrides_ui_opt_in(monkeypatch, opt_in_file):
|
||||
monkeypatch.setattr(settings.os, "name", "nt")
|
||||
monkeypatch.setenv("SHADOWBROKER_ENABLE_LIVEUAMAP_SCRAPER", "false")
|
||||
settings.set_liveuamap_ui_opt_in(True)
|
||||
assert settings.liveuamap_scraper_enabled() is False
|
||||
@@ -0,0 +1,27 @@
|
||||
"""Madrid CCTV KML prefers HTTPS (#363)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from services.cctv_pipeline import MadridCityIngestor
|
||||
|
||||
|
||||
def test_madrid_fetch_kml_tries_https_before_http():
|
||||
ingestor = MadridCityIngestor()
|
||||
calls: list[str] = []
|
||||
|
||||
def fake_fetch(url, **kwargs):
|
||||
calls.append(url)
|
||||
if url == ingestor.KML_URL_HTTPS:
|
||||
raise ConnectionError("tls handshake failed")
|
||||
res = MagicMock()
|
||||
res.status_code = 200
|
||||
res.content = b'<?xml version="1.0"?><kml xmlns="http://www.opengis.net/kml/2.2"></kml>'
|
||||
res.raise_for_status = MagicMock()
|
||||
return res
|
||||
|
||||
with patch("services.cctv_pipeline.fetch_with_curl", side_effect=fake_fetch):
|
||||
response = ingestor._fetch_kml()
|
||||
|
||||
assert response.status_code == 200
|
||||
assert calls == [ingestor.KML_URL_HTTPS, ingestor.KML_URL_HTTP]
|
||||
@@ -1,56 +1,27 @@
|
||||
"""Issue #203 (tg12): meshtastic_map.py was unconditionally including
|
||||
``MESHTASTIC_OPERATOR_CALLSIGN`` in the outbound User-Agent header,
|
||||
which contradicted the README's "no user data transmitted" claim.
|
||||
|
||||
The fix preserves the existing default behavior (callsign sent — that's
|
||||
what operators who configured the variable expected) but adds an
|
||||
opt-out env var ``MESHTASTIC_SEND_CALLSIGN_HEADER=false`` for
|
||||
privacy-conscious operators.
|
||||
"""
|
||||
import importlib
|
||||
import sys
|
||||
"""Issue #350: Meshtastic callsign in outbound UA is opt-in, not default."""
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _reload_meshtastic_module():
|
||||
"""Reload meshtastic_map so settings are re-read on demand."""
|
||||
if "services.fetchers.meshtastic_map" in sys.modules:
|
||||
del sys.modules["services.fetchers.meshtastic_map"]
|
||||
return importlib.import_module("services.fetchers.meshtastic_map")
|
||||
def _send_callsign_header_from_env() -> bool:
|
||||
raw = str(os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "false")).strip().lower()
|
||||
return raw in {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def test_default_behavior_includes_callsign(monkeypatch):
|
||||
"""Operators who set the callsign and don't change anything else
|
||||
keep their existing behavior (callsign sent in UA)."""
|
||||
# We test the UA construction logic by exercising the same branches
|
||||
# the fetcher uses. Direct fetch isn't run because it makes a real
|
||||
# network call — we just verify the env-var-driven decision.
|
||||
import os
|
||||
def test_default_does_not_send_callsign(monkeypatch):
|
||||
monkeypatch.setenv("MESHTASTIC_OPERATOR_CALLSIGN", "N0CALL")
|
||||
monkeypatch.delenv("MESHTASTIC_SEND_CALLSIGN_HEADER", raising=False)
|
||||
|
||||
raw = str(os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")).strip().lower()
|
||||
send_callsign_header = raw not in {"0", "false", "no", "off", ""}
|
||||
assert send_callsign_header is True
|
||||
assert _send_callsign_header_from_env() is False
|
||||
|
||||
|
||||
def test_opt_out_suppresses_callsign(monkeypatch):
|
||||
"""Setting MESHTASTIC_SEND_CALLSIGN_HEADER=false suppresses the header."""
|
||||
import os
|
||||
def test_opt_in_sends_callsign(monkeypatch):
|
||||
monkeypatch.setenv("MESHTASTIC_OPERATOR_CALLSIGN", "N0CALL")
|
||||
monkeypatch.setenv("MESHTASTIC_SEND_CALLSIGN_HEADER", "false")
|
||||
|
||||
raw = str(os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")).strip().lower()
|
||||
send_callsign_header = raw not in {"0", "false", "no", "off", ""}
|
||||
assert send_callsign_header is False
|
||||
monkeypatch.setenv("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")
|
||||
assert _send_callsign_header_from_env() is True
|
||||
|
||||
|
||||
def test_various_falsy_values_all_opt_out(monkeypatch):
|
||||
"""Common falsy strings should all suppress the callsign header."""
|
||||
import os
|
||||
for falsy in ("0", "false", "FALSE", "no", "off"):
|
||||
def test_various_falsy_values_do_not_opt_in(monkeypatch):
|
||||
for falsy in ("0", "false", "FALSE", "no", "off", ""):
|
||||
monkeypatch.setenv("MESHTASTIC_SEND_CALLSIGN_HEADER", falsy)
|
||||
raw = str(os.environ.get("MESHTASTIC_SEND_CALLSIGN_HEADER", "true")).strip().lower()
|
||||
send_callsign_header = raw not in {"0", "false", "no", "off", ""}
|
||||
assert send_callsign_header is False, f"value {falsy!r} did not opt out"
|
||||
assert _send_callsign_header_from_env() is False, f"value {falsy!r} should not opt in"
|
||||
|
||||
@@ -5,7 +5,7 @@ from pathlib import Path
|
||||
import pytest
|
||||
|
||||
from services.fetchers.news import _resolve_coords
|
||||
from services.news_feed_config import DEFAULT_FEEDS
|
||||
from services.news_feed_config import DEFAULT_FEEDS, _normalise_feeds
|
||||
|
||||
|
||||
CONFIG_PATH = Path(__file__).parent.parent / "config" / "news_feeds.json"
|
||||
@@ -152,3 +152,14 @@ class TestFeedConfig:
|
||||
urls = {f["url"] for f in DEFAULT_FEEDS}
|
||||
assert "https://www.reutersagency.com/feed/?best-topics=world" not in urls
|
||||
assert "https://rsshub.app/apnews/topics/world-news" not in urls
|
||||
|
||||
def test_legacy_http_feeds_are_migrated_to_https(self):
|
||||
feeds = _normalise_feeds(
|
||||
[
|
||||
{"name": "BBC", "url": "http://feeds.bbci.co.uk/news/world/rss.xml", "weight": 3},
|
||||
{"name": "Xinhua", "url": "http://www.news.cn/english/rss/worldrss.xml", "weight": 2},
|
||||
]
|
||||
)
|
||||
urls = {f["url"] for f in feeds}
|
||||
assert "https://feeds.bbci.co.uk/news/world/rss.xml" in urls
|
||||
assert "https://www.news.cn/english/rss/worldrss.xml" in urls
|
||||
|
||||
@@ -28,6 +28,10 @@ def sample_store():
|
||||
"weather_alerts": list(latest_data.get("weather_alerts") or []),
|
||||
"gps_jamming": list(latest_data.get("gps_jamming") or []),
|
||||
"military_bases": list(latest_data.get("military_bases") or []),
|
||||
"telegram_osint": dict(latest_data.get("telegram_osint") or {}),
|
||||
"malware_threats": dict(latest_data.get("malware_threats") or {}),
|
||||
"cyber_threats": dict(latest_data.get("cyber_threats") or {}),
|
||||
"scm_suppliers": dict(latest_data.get("scm_suppliers") or {}),
|
||||
}
|
||||
latest_data["tracked_flights"] = [
|
||||
{
|
||||
@@ -188,6 +192,66 @@ def sample_store():
|
||||
"lng": -76.87,
|
||||
}
|
||||
]
|
||||
latest_data["telegram_osint"] = {
|
||||
"posts": [
|
||||
{
|
||||
"id": "tg-1",
|
||||
"title": "Missile strike reported near Kyiv overnight",
|
||||
"description": "OSINT channel reports explosions near Kyiv",
|
||||
"channel": "osintdefender",
|
||||
"source": "t.me/osintdefender",
|
||||
"link": "https://t.me/osintdefender/123",
|
||||
"published": "2026-06-02T12:00:00+00:00",
|
||||
"risk_score": 0.8,
|
||||
"coords": [50.45, 30.52],
|
||||
}
|
||||
],
|
||||
"total": 1,
|
||||
"geolocated": 1,
|
||||
}
|
||||
latest_data["malware_threats"] = {
|
||||
"threats": [
|
||||
{
|
||||
"id": "feodo-1",
|
||||
"ip": "203.0.113.10",
|
||||
"malware": "Emotet",
|
||||
"country": "US",
|
||||
"threat_type": "botnet_c2",
|
||||
"lat": 38.95,
|
||||
"lng": -77.45,
|
||||
}
|
||||
],
|
||||
"total": 1,
|
||||
}
|
||||
latest_data["cyber_threats"] = {
|
||||
"threats": [
|
||||
{
|
||||
"id": "CVE-2026-1234",
|
||||
"name": "Example Vendor RCE",
|
||||
"vendor": "Example Vendor",
|
||||
"product": "Example Product",
|
||||
"severity": "CRITICAL",
|
||||
"source": "CISA KEV",
|
||||
}
|
||||
],
|
||||
"stats": {"active_cves": 1},
|
||||
}
|
||||
latest_data["scm_suppliers"] = {
|
||||
"suppliers": [
|
||||
{
|
||||
"id": "sup-tsmc-hsinchu",
|
||||
"name": "TSMC Fab 12 (Tier 1)",
|
||||
"city": "Hsinchu",
|
||||
"country": "Taiwan",
|
||||
"category": "Semiconductor",
|
||||
"risk_level": "NORMAL",
|
||||
"lat": 24.774,
|
||||
"lng": 120.992,
|
||||
}
|
||||
],
|
||||
"total": 1,
|
||||
"critical_count": 0,
|
||||
}
|
||||
|
||||
try:
|
||||
yield
|
||||
@@ -475,6 +539,89 @@ def test_correlate_entity_returns_evidence_pack_near_aircraft(sample_store, monk
|
||||
assert result["recommended_next"]
|
||||
|
||||
|
||||
def test_get_slow_telemetry_includes_new_osint_layers(sample_store, monkeypatch):
|
||||
import services.telemetry as telemetry
|
||||
|
||||
monkeypatch.setattr(telemetry, "get_data_version", lambda: 210)
|
||||
result = telemetry.get_cached_slow_telemetry()
|
||||
|
||||
assert "telegram_osint" in result
|
||||
assert result["telegram_osint"]["total"] == 1
|
||||
assert "malware_threats" in result
|
||||
assert result["malware_threats"]["total"] == 1
|
||||
assert "scm_suppliers" in result
|
||||
assert result["scm_suppliers"]["total"] == 1
|
||||
|
||||
|
||||
def test_get_layer_slice_accepts_telegram_alias(sample_store, monkeypatch):
|
||||
import services.telemetry as telemetry
|
||||
|
||||
monkeypatch.setattr(telemetry, "get_data_version", lambda: 211)
|
||||
result = telemetry.get_layer_slice(layers=["telegram"], limit_per_layer=10)
|
||||
|
||||
assert result["requested_layers"] == ["telegram_osint"]
|
||||
assert result["layers"]["telegram_osint"]["posts"][0]["channel"] == "osintdefender"
|
||||
|
||||
|
||||
def test_get_telemetry_summary_counts_nested_layer_items(sample_store, monkeypatch):
|
||||
import services.telemetry as telemetry
|
||||
|
||||
monkeypatch.setattr(telemetry, "get_data_version", lambda: 212)
|
||||
result = telemetry.get_telemetry_summary()
|
||||
|
||||
assert result["counts"]["telegram_osint"] == 1
|
||||
assert result["counts"]["malware_threats"] == 1
|
||||
assert result["counts"]["scm_suppliers"] == 1
|
||||
assert "telegram_osint" in result["non_empty_layers"]
|
||||
assert result["layer_aliases"]["telegram"] == "telegram_osint"
|
||||
assert result["layer_aliases"]["scm"] == "scm_suppliers"
|
||||
|
||||
|
||||
def test_search_news_matches_telegram_osint(sample_store, monkeypatch):
|
||||
import services.telemetry as telemetry
|
||||
|
||||
monkeypatch.setattr(telemetry, "get_data_version", lambda: 213)
|
||||
result = telemetry.search_news(query="kyiv missile", limit=10, include_telegram=True)
|
||||
|
||||
assert result["results"]
|
||||
assert result["results"][0]["source_layer"] == "telegram_osint"
|
||||
assert result["results"][0]["lat"] == 50.45
|
||||
|
||||
|
||||
def test_search_telemetry_finds_telegram_malware_and_scm(sample_store, monkeypatch):
|
||||
import services.telemetry as telemetry
|
||||
|
||||
monkeypatch.setattr(telemetry, "get_data_version", lambda: 214)
|
||||
|
||||
telegram = telemetry.search_telemetry(query="osintdefender kyiv", limit=10)
|
||||
assert any(item["source_layer"] == "telegram_osint" for item in telegram["results"])
|
||||
|
||||
malware = telemetry.search_telemetry(query="emotet", limit=10)
|
||||
assert any(item["source_layer"] == "malware_threats" for item in malware["results"])
|
||||
|
||||
scm = telemetry.search_telemetry(query="tsmc hsinchu", limit=10)
|
||||
assert any(item["source_layer"] == "scm_suppliers" for item in scm["results"])
|
||||
|
||||
cve = telemetry.search_telemetry(query="CVE-2026-1234", limit=10)
|
||||
assert any(item["source_layer"] == "cyber_threats" for item in cve["results"])
|
||||
|
||||
|
||||
def test_entities_near_finds_telegram_and_malware(sample_store, monkeypatch):
|
||||
import services.telemetry as telemetry
|
||||
|
||||
monkeypatch.setattr(telemetry, "get_data_version", lambda: 215)
|
||||
result = telemetry.entities_near(
|
||||
lat=38.95,
|
||||
lng=-77.45,
|
||||
radius_km=50,
|
||||
entity_types=["telegram", "malware"],
|
||||
limit=10,
|
||||
)
|
||||
|
||||
layers = {item["source_layer"] for item in result["results"]}
|
||||
assert "malware_threats" in layers
|
||||
|
||||
|
||||
def test_openclaw_correlate_entity_command(sample_store, monkeypatch):
|
||||
import services.telemetry as telemetry
|
||||
from services.openclaw_channel import _dispatch_command
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
"""Tests for OpenClaw recon / OSINT command dispatch."""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_osint_tools_lists_supported_lookups():
|
||||
from services.osint.openclaw_recon import osint_tool_help
|
||||
|
||||
help_data = osint_tool_help()
|
||||
assert "ip" in help_data["tools"]
|
||||
assert "sanctions" in help_data["tools"]
|
||||
assert "aircraft" in help_data["entity_types"]
|
||||
|
||||
|
||||
def test_osint_lookup_ip(monkeypatch):
|
||||
from services.osint import openclaw_recon
|
||||
|
||||
monkeypatch.setattr(
|
||||
openclaw_recon.lookups,
|
||||
"lookup_ip",
|
||||
lambda ip: {"ip": ip, "geo": {"country": "US"}},
|
||||
)
|
||||
result = openclaw_recon.run_osint_lookup("ip", {"ip": "8.8.8.8"})
|
||||
assert result["ip"] == "8.8.8.8"
|
||||
assert result["geo"]["country"] == "US"
|
||||
|
||||
|
||||
def test_osint_lookup_sanctions_passes_schema(monkeypatch):
|
||||
from services.osint import openclaw_recon
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_sanctions(query, *, schema=None, limit=25):
|
||||
captured["query"] = query
|
||||
captured["schema"] = schema
|
||||
captured["limit"] = limit
|
||||
return {"query": query, "results": []}
|
||||
|
||||
monkeypatch.setattr(openclaw_recon.lookups, "lookup_sanctions", fake_sanctions)
|
||||
openclaw_recon.run_osint_lookup(
|
||||
"sanctions",
|
||||
{"query": "Example Corp", "schema": "Company", "limit": 10},
|
||||
)
|
||||
assert captured["query"] == "Example Corp"
|
||||
assert captured["schema"] == "Company"
|
||||
assert captured["limit"] == 10
|
||||
|
||||
|
||||
def test_osint_lookup_rejects_unknown_tool():
|
||||
from services.osint.openclaw_recon import run_osint_lookup
|
||||
|
||||
with pytest.raises(ValueError, match="Unknown OSINT tool"):
|
||||
run_osint_lookup("not_a_tool", {})
|
||||
|
||||
|
||||
def test_openclaw_osint_lookup_command(monkeypatch):
|
||||
from services import openclaw_channel
|
||||
|
||||
monkeypatch.setattr(
|
||||
"services.osint.openclaw_recon.run_osint_lookup",
|
||||
lambda tool, args: {"ip": args["ip"], "tool": tool},
|
||||
)
|
||||
result = openclaw_channel._dispatch_command(
|
||||
"osint_lookup",
|
||||
{"tool": "ip", "ip": "1.1.1.1"},
|
||||
)
|
||||
assert result["ok"] is True
|
||||
assert result["data"]["ip"] == "1.1.1.1"
|
||||
|
||||
|
||||
def test_openclaw_entity_expand_command(monkeypatch):
|
||||
from services import openclaw_channel
|
||||
|
||||
monkeypatch.setattr(
|
||||
"services.osint.openclaw_recon.run_entity_expand",
|
||||
lambda args: {"nodes": [{"id": "ip:1.1.1.1"}], "links": []},
|
||||
)
|
||||
result = openclaw_channel._dispatch_command(
|
||||
"entity_expand",
|
||||
{"type": "ip", "id": "1.1.1.1"},
|
||||
)
|
||||
assert result["ok"] is True
|
||||
assert result["data"]["nodes"][0]["id"] == "ip:1.1.1.1"
|
||||
|
||||
|
||||
def test_osint_sweep_requires_full_tier_for_restricted():
|
||||
from services.openclaw_channel import WRITE_COMMANDS, allowed_commands
|
||||
|
||||
assert "osint_sweep" in WRITE_COMMANDS
|
||||
assert "osint_sweep" not in allowed_commands("restricted")
|
||||
assert "osint_sweep" in allowed_commands("full")
|
||||
|
||||
|
||||
def test_osint_lookup_available_on_restricted_tier():
|
||||
from services.openclaw_channel import allowed_commands
|
||||
|
||||
assert "osint_lookup" in allowed_commands("restricted")
|
||||
assert "entity_expand" in allowed_commands("restricted")
|
||||
@@ -0,0 +1,38 @@
|
||||
"""Regression coverage for OpenClaw skill HMAC environment names."""
|
||||
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _load_sb_query(monkeypatch):
|
||||
module_path = Path(__file__).resolve().parents[2] / "openclaw-skills" / "shadowbroker" / "sb_query.py"
|
||||
spec = importlib.util.spec_from_file_location("shadowbroker_skill_sb_query_test", module_path)
|
||||
assert spec and spec.loader
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def test_openclaw_skill_prefers_hmac_secret_env(monkeypatch):
|
||||
monkeypatch.setenv("SHADOWBROKER_HMAC_SECRET", "preferred-hmac-secret")
|
||||
monkeypatch.setenv("SHADOWBROKER_KEY", "legacy-hmac-secret")
|
||||
|
||||
module = _load_sb_query(monkeypatch)
|
||||
|
||||
assert module.ShadowBrokerClient()._hmac_secret == "preferred-hmac-secret"
|
||||
|
||||
|
||||
def test_openclaw_skill_accepts_legacy_key_as_hmac_secret_alias(monkeypatch):
|
||||
monkeypatch.delenv("SHADOWBROKER_HMAC_SECRET", raising=False)
|
||||
monkeypatch.setenv("SHADOWBROKER_KEY", "legacy-hmac-secret")
|
||||
|
||||
module = _load_sb_query(monkeypatch)
|
||||
client = module.ShadowBrokerClient()
|
||||
headers = client._sign_headers("GET", "/api/ai/tools")
|
||||
|
||||
assert client._hmac_secret == "legacy-hmac-secret"
|
||||
assert "X-SB-Timestamp" in headers
|
||||
assert "X-SB-Nonce" in headers
|
||||
assert "X-SB-Signature" in headers
|
||||
assert "Authorization" not in headers
|
||||
assert "X-Admin-Key" not in headers
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user